added convert_pickle.py

This commit is contained in:
Shautvast 2025-02-02 15:40:49 +01:00
parent 69d518e975
commit 221e5aa058
4 changed files with 55 additions and 48 deletions

22
convert_pickle.py Normal file
View file

@ -0,0 +1,22 @@
import pickle
import gzip
import json
# Load the data from the .pkl.gz file
with gzip.open("mnist.pkl.gz", "rb") as f:
training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
# Define a helper function to convert the data into JSON serializable format
def convert_data(data):
features, labels = data
return [{"x": features[i].tolist(), "y": int(labels[i])} for i in range(len(features))]
# Convert and save to JSON
with open("training_data.json", "w") as train_json:
json.dump(convert_data(training_data), train_json)
with open("validation_data.json", "w") as val_json:
json.dump(convert_data(validation_data), val_json)
with open("test_data.json", "w") as test_json:
json.dump(convert_data(test_data), test_json)

View file

@ -1,4 +1,5 @@
use std::fmt::Debug;
use nalgebra::DMatrix;
use rand::prelude::*;
use serde::Deserialize;
@ -10,8 +11,8 @@ pub fn load_data() -> (Data<f64, OneHotVector>, Data<f64, OneHotVector>) {
// this is transformed to:
// Data : Vec<DataLine>
// DataLine {inputs: Vec<pixels as f64>, label: f64}
let raw_training_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/training.json")).unwrap();
let raw_test_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/test.json")).unwrap();
let raw_training_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/training_data.json")).unwrap();
let raw_test_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/test_data.json")).unwrap();
let train = vectorize(raw_training_data);
let test = vectorize(raw_test_data);
@ -19,10 +20,10 @@ pub fn load_data() -> (Data<f64, OneHotVector>, Data<f64, OneHotVector>) {
(Data(train), Data(test))
}
fn vectorize(raw_training_data: Vec<RawData>) -> Vec<DataLine<f64, OneHotVector>>{
fn vectorize(raw_training_data: Vec<RawData>) -> Vec<DataLine<f64, OneHotVector>> {
let mut result = Vec::new();
for line in raw_training_data {
result.push(DataLine { inputs: line.x, label: onehot(line.y) });
result.push(DataLine { inputs: DMatrix::from_vec(line.x.len(), 1, line.x), label: onehot(line.y) });
}
result
}
@ -37,7 +38,7 @@ struct RawData {
/// Y is type of output
#[derive(Debug, Clone)]
pub struct DataLine<X, Y> where X: Clone, Y: Clone {
pub inputs: Vec<X>,
pub inputs: DMatrix<X>,
pub label: Y,
}
@ -64,7 +65,7 @@ impl OneHotVector {
}
#[derive(Debug, Clone)]
pub struct Data<X, Y>(pub Vec<DataLine<X, Y>>) where X: Clone, Y: Clone ;
pub struct Data<X, Y>(pub Vec<DataLine<X, Y>>) where X: Clone, Y: Clone;
impl<X, Y> Data<X, Y> where X: Clone, Y: Clone {
pub fn shuffle(&mut self) {

View file

@ -1,16 +1,12 @@
use mnist_rs::dataloader::load_data;
use std::time::Instant;
fn main() {
let mut net = mnist_rs::net::Network::gaussian(vec![784, 30, 10]);
let (training_data, test_data) = load_data();
net.sgd(training_data, 30, 1, 0.01, Some(test_data));
// let sizes = vec![5,3,2];
// let net = mnist_rs::net::Network::from(sizes);
// println!("biases {:?}", net.biases.iter().map(|b|b.shape()).collect::<Vec<(usize,usize)>>());
// println!("weights {:?}", net.weights.iter().map(|b|b.shape()).collect::<Vec<(usize,usize)>>());
let t0 = Instant::now();
net.sgd(training_data, 30, 10, 3.0, Some(test_data));
println!("{}", t0.elapsed().as_millis());
}

View file

@ -48,17 +48,17 @@ impl Network {
}
}
fn feed_forward(&self, input: Vec<f64>) -> Vec<f64> {
fn feed_forward(&self, input: &DMatrix<f64>) -> DMatrix<f64> {
self.feed_forward_activation(input, sigmoid_inplace)
}
fn feed_forward_activation(&self, input: Vec<f64>, activation: fn(&mut f64)) -> Vec<f64> {
let mut a = DMatrix::from_vec(input.len(), 1, input);
fn feed_forward_activation(&self, input: &DMatrix<f64>, activation: fn(&mut f64)) -> DMatrix<f64> {
let mut a = input.clone();
for (b, w) in zip(&self.biases, &self.weights) {
a = b.clone()+ w * a;
a = b + w * a;
a.apply(activation);
}
a.column(0).iter().copied().collect()
a
}
pub fn sgd(&mut self, mut training_data: Data<f64, OneHotVector>, epochs: usize, minibatch_size: usize, eta: f64, test_data: Option<Data<f64, OneHotVector>>) {
@ -84,10 +84,8 @@ impl Network {
fn update_mini_batch(&mut self, mini_batch: &[DataLine<f64, OneHotVector>], eta: f64) {
let (mut nabla_b, mut nabla_w) = self.zero_gradient();
for line in mini_batch.iter() {
let (delta_nabla_b, delta_nabla_w) = self.backprop(line.inputs.to_vec(), &line.label);
let (delta_nabla_b, delta_nabla_w) = self.backprop(&line.inputs, &line.label);
// nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
// nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
nabla_b = zip(&nabla_b, &delta_nabla_b).map(|(nb, dnb)| nb.add(dnb)).collect();
nabla_w = zip(&nabla_w, &delta_nabla_w).map(|(nw, dnw)| nw.add(dnw)).collect();
}
@ -105,7 +103,7 @@ impl Network {
/// neuron in the final layer has the highest activation.
fn evaluate(&self, test_data: &Data<f64, OneHotVector>) -> usize {
let test_results: Vec<(usize, usize)> = test_data.0.iter()
.map(|line| (argmax(self.feed_forward(line.inputs.clone())), line.label.val))
.map(|line| (argmax(self.feed_forward(&line.inputs)), line.label.val))
.collect();
test_results.into_iter().filter(|(x, y)| *x == *y).count()
@ -115,30 +113,27 @@ impl Network {
/// gradient for the cost function C_x. `nabla_b` and
/// `nabla_w` are layer-by-layer lists of matrices, similar
/// to `self.biases` and `self.weights`.
fn backprop(&self, x: Vec<f64>, y: &OneHotVector) -> (Vec<DMatrix<f64>>, Vec<DMatrix<f64>>) {
fn backprop(&self, x: &DMatrix<f64>, y: &OneHotVector) -> (Vec<DMatrix<f64>>, Vec<DMatrix<f64>>) {
let (mut nabla_b, mut nabla_w) = self.zero_gradient();
// feedforward
let mut activation = DMatrix::from_vec(x.len(), 1, x);
let mut activation = x.clone();
let mut activations = vec![activation.clone()];
let mut zs = vec![];
for (b, w) in zip(&self.biases, &self.weights) {
let z = (w * &activation)+b.clone();
let z = (w * activation) + b;
zs.push(z.clone());
activation = z.map(sigmoid);
activations.push(activation.clone());
}
// backward pass
// delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
let delta: DMatrix<f64> = cost_derivative(&activations[activations.len() - 1], y).component_mul(&zs[zs.len() - 1].map(sigmoid_prime));
// println!("delta {:?}", delta);
let index = nabla_b.len() - 1;
nabla_b[index] = delta.clone();
let index = nabla_w.len() - 1;
let ac = &activations[activations.len() - 2].transpose();
nabla_w[index] = &delta * ac;
nabla_w[index] = &delta * (&activations[activations.len() - 2].transpose());
let lens_zs = zs.len();
for l in 2..self.num_layers {
let z = &zs[lens_zs - l];
@ -164,32 +159,25 @@ impl Network {
.collect();
(nabla_b, nabla_w)
}
}
fn cost_derivative(output_activations: &DMatrix<f64>, y: &OneHotVector) -> DMatrix<f64> {
// output_activations - y
// println!("output {:?}", output_activations);
// println!("expected {:?}", y);
let shape = output_activations.shape();
let t = DMatrix::from_iterator(shape.0, shape.1, output_activations.iter().enumerate()
.map(|(index, a)| a - y.get(index)));
// println!("t {:?}",t);
t
DMatrix::from_iterator(shape.0, shape.1, output_activations.iter().enumerate()
.map(|(index, a)| a - y.get(index)))
}
fn argmax(val: Vec<f64>) -> usize {
/// index of max value
/// only meaningful for single row or column matrix
fn argmax(val: DMatrix<f64>) -> usize {
let mut max = 0.0;
let mut index = 0;
for (i, x) in val.iter().enumerate() {
// print!("{},",x);
if *x > max {
index = i;
max = *x;
}
}
// println!();
index
}
@ -247,7 +235,7 @@ mod test {
#[test]
fn test_argmax() {
assert_eq!(5, argmax(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0]));
assert_eq!(5, argmax(DMatrix::from_vec(10, 1, vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0])));
}
#[test]
@ -262,15 +250,15 @@ mod test {
// 2 layers of 2 units
let mut net = Network::ones(vec![2, 2]);
let prediction = net.feed_forward_activation(vec![2.0, 2.0], |a| {});
assert_eq!(prediction, vec![5.0, 5.0])
let prediction = net.feed_forward_activation(&DMatrix::from_vec(2, 1, vec![2.0, 2.0]), |a| {});
assert_eq!(prediction, DMatrix::from_vec(2, 1, vec![5.0, 5.0]))
}
#[test]
fn test_sgd() {
// 2 layers of 2 units
let mut net = Network::ones(vec![2, 2]);
let data = Data(vec![DataLine { inputs: vec![1.0, 1.0], label: OneHotVector::new(1) }]);
let data = Data(vec![DataLine { inputs: DMatrix::from_vec(2, 1, vec![1.0, 1.0]), label: OneHotVector::new(1) }]);
net.sgd(data, 1, 1, 0.001, None);
println!("{:?}", net);
}