diff --git a/convert_pickle.py b/convert_pickle.py new file mode 100644 index 0000000..b69cf53 --- /dev/null +++ b/convert_pickle.py @@ -0,0 +1,22 @@ +import pickle +import gzip +import json + +# Load the data from the .pkl.gz file +with gzip.open("mnist.pkl.gz", "rb") as f: + training_data, validation_data, test_data = pickle.load(f, encoding="latin1") + +# Define a helper function to convert the data into JSON serializable format +def convert_data(data): + features, labels = data + return [{"x": features[i].tolist(), "y": int(labels[i])} for i in range(len(features))] + +# Convert and save to JSON +with open("training_data.json", "w") as train_json: + json.dump(convert_data(training_data), train_json) + +with open("validation_data.json", "w") as val_json: + json.dump(convert_data(validation_data), val_json) + +with open("test_data.json", "w") as test_json: + json.dump(convert_data(test_data), test_json) \ No newline at end of file diff --git a/src/dataloader.rs b/src/dataloader.rs index c91d644..4787e63 100644 --- a/src/dataloader.rs +++ b/src/dataloader.rs @@ -1,4 +1,5 @@ use std::fmt::Debug; +use nalgebra::DMatrix; use rand::prelude::*; use serde::Deserialize; @@ -10,8 +11,8 @@ pub fn load_data() -> (Data, Data) { // this is transformed to: // Data : Vec // DataLine {inputs: Vec, label: f64} - let raw_training_data: Vec = serde_json::from_slice(include_bytes!("data/training.json")).unwrap(); - let raw_test_data: Vec = serde_json::from_slice(include_bytes!("data/test.json")).unwrap(); + let raw_training_data: Vec = serde_json::from_slice(include_bytes!("data/training_data.json")).unwrap(); + let raw_test_data: Vec = serde_json::from_slice(include_bytes!("data/test_data.json")).unwrap(); let train = vectorize(raw_training_data); let test = vectorize(raw_test_data); @@ -19,10 +20,10 @@ pub fn load_data() -> (Data, Data) { (Data(train), Data(test)) } -fn vectorize(raw_training_data: Vec) -> Vec>{ +fn vectorize(raw_training_data: Vec) -> Vec> { let mut result = Vec::new(); for line in raw_training_data { - result.push(DataLine { inputs: line.x, label: onehot(line.y) }); + result.push(DataLine { inputs: DMatrix::from_vec(line.x.len(), 1, line.x), label: onehot(line.y) }); } result } @@ -37,7 +38,7 @@ struct RawData { /// Y is type of output #[derive(Debug, Clone)] pub struct DataLine where X: Clone, Y: Clone { - pub inputs: Vec, + pub inputs: DMatrix, pub label: Y, } @@ -64,7 +65,7 @@ impl OneHotVector { } #[derive(Debug, Clone)] -pub struct Data(pub Vec>) where X: Clone, Y: Clone ; +pub struct Data(pub Vec>) where X: Clone, Y: Clone; impl Data where X: Clone, Y: Clone { pub fn shuffle(&mut self) { diff --git a/src/main.rs b/src/main.rs index a3881b4..7240a8c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,12 @@ use mnist_rs::dataloader::load_data; +use std::time::Instant; fn main() { let mut net = mnist_rs::net::Network::gaussian(vec![784, 30, 10]); let (training_data, test_data) = load_data(); - net.sgd(training_data, 30, 1, 0.01, Some(test_data)); - - - // let sizes = vec![5,3,2]; - // let net = mnist_rs::net::Network::from(sizes); - // println!("biases {:?}", net.biases.iter().map(|b|b.shape()).collect::>()); - // println!("weights {:?}", net.weights.iter().map(|b|b.shape()).collect::>()); - + let t0 = Instant::now(); + net.sgd(training_data, 30, 10, 3.0, Some(test_data)); + println!("{}", t0.elapsed().as_millis()); } \ No newline at end of file diff --git a/src/net.rs b/src/net.rs index b91bc2e..7a7089d 100644 --- a/src/net.rs +++ b/src/net.rs @@ -48,17 +48,17 @@ impl Network { } } - fn feed_forward(&self, input: Vec) -> Vec { + fn feed_forward(&self, input: &DMatrix) -> DMatrix { self.feed_forward_activation(input, sigmoid_inplace) } - fn feed_forward_activation(&self, input: Vec, activation: fn(&mut f64)) -> Vec { - let mut a = DMatrix::from_vec(input.len(), 1, input); + fn feed_forward_activation(&self, input: &DMatrix, activation: fn(&mut f64)) -> DMatrix { + let mut a = input.clone(); for (b, w) in zip(&self.biases, &self.weights) { - a = b.clone()+ w * a; + a = b + w * a; a.apply(activation); } - a.column(0).iter().copied().collect() + a } pub fn sgd(&mut self, mut training_data: Data, epochs: usize, minibatch_size: usize, eta: f64, test_data: Option>) { @@ -82,12 +82,10 @@ impl Network { /// The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta`` /// is the learning rate. fn update_mini_batch(&mut self, mini_batch: &[DataLine], eta: f64) { - let (mut nabla_b, mut nabla_w) = self.zero_gradient(); + let (mut nabla_b, mut nabla_w) = self.zero_gradient(); for line in mini_batch.iter() { - let (delta_nabla_b, delta_nabla_w) = self.backprop(line.inputs.to_vec(), &line.label); + let (delta_nabla_b, delta_nabla_w) = self.backprop(&line.inputs, &line.label); - // nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)] - // nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] nabla_b = zip(&nabla_b, &delta_nabla_b).map(|(nb, dnb)| nb.add(dnb)).collect(); nabla_w = zip(&nabla_w, &delta_nabla_w).map(|(nw, dnw)| nw.add(dnw)).collect(); } @@ -105,7 +103,7 @@ impl Network { /// neuron in the final layer has the highest activation. fn evaluate(&self, test_data: &Data) -> usize { let test_results: Vec<(usize, usize)> = test_data.0.iter() - .map(|line| (argmax(self.feed_forward(line.inputs.clone())), line.label.val)) + .map(|line| (argmax(self.feed_forward(&line.inputs)), line.label.val)) .collect(); test_results.into_iter().filter(|(x, y)| *x == *y).count() @@ -115,30 +113,27 @@ impl Network { /// gradient for the cost function C_x. `nabla_b` and /// `nabla_w` are layer-by-layer lists of matrices, similar /// to `self.biases` and `self.weights`. - fn backprop(&self, x: Vec, y: &OneHotVector) -> (Vec>, Vec>) { - let (mut nabla_b, mut nabla_w) = self.zero_gradient(); + fn backprop(&self, x: &DMatrix, y: &OneHotVector) -> (Vec>, Vec>) { + let (mut nabla_b, mut nabla_w) = self.zero_gradient(); // feedforward - let mut activation = DMatrix::from_vec(x.len(), 1, x); + let mut activation = x.clone(); let mut activations = vec![activation.clone()]; let mut zs = vec![]; for (b, w) in zip(&self.biases, &self.weights) { - let z = (w * &activation)+b.clone(); + let z = (w * activation) + b; zs.push(z.clone()); activation = z.map(sigmoid); activations.push(activation.clone()); } // backward pass - // delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1]) let delta: DMatrix = cost_derivative(&activations[activations.len() - 1], y).component_mul(&zs[zs.len() - 1].map(sigmoid_prime)); - // println!("delta {:?}", delta); let index = nabla_b.len() - 1; nabla_b[index] = delta.clone(); let index = nabla_w.len() - 1; - let ac = &activations[activations.len() - 2].transpose(); - nabla_w[index] = &delta * ac; + nabla_w[index] = &delta * (&activations[activations.len() - 2].transpose()); let lens_zs = zs.len(); for l in 2..self.num_layers { let z = &zs[lens_zs - l]; @@ -164,32 +159,25 @@ impl Network { .collect(); (nabla_b, nabla_w) } - } fn cost_derivative(output_activations: &DMatrix, y: &OneHotVector) -> DMatrix { - // output_activations - y - // println!("output {:?}", output_activations); - // println!("expected {:?}", y); - let shape = output_activations.shape(); - let t = DMatrix::from_iterator(shape.0, shape.1, output_activations.iter().enumerate() - .map(|(index, a)| a - y.get(index))); - // println!("t {:?}",t); - t + DMatrix::from_iterator(shape.0, shape.1, output_activations.iter().enumerate() + .map(|(index, a)| a - y.get(index))) } -fn argmax(val: Vec) -> usize { +/// index of max value +/// only meaningful for single row or column matrix +fn argmax(val: DMatrix) -> usize { let mut max = 0.0; let mut index = 0; for (i, x) in val.iter().enumerate() { - // print!("{},",x); if *x > max { index = i; max = *x; } } - // println!(); index } @@ -247,7 +235,7 @@ mod test { #[test] fn test_argmax() { - assert_eq!(5, argmax(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0])); + assert_eq!(5, argmax(DMatrix::from_vec(10, 1, vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0]))); } #[test] @@ -262,15 +250,15 @@ mod test { // 2 layers of 2 units let mut net = Network::ones(vec![2, 2]); - let prediction = net.feed_forward_activation(vec![2.0, 2.0], |a| {}); - assert_eq!(prediction, vec![5.0, 5.0]) + let prediction = net.feed_forward_activation(&DMatrix::from_vec(2, 1, vec![2.0, 2.0]), |a| {}); + assert_eq!(prediction, DMatrix::from_vec(2, 1, vec![5.0, 5.0])) } #[test] fn test_sgd() { // 2 layers of 2 units let mut net = Network::ones(vec![2, 2]); - let data = Data(vec![DataLine { inputs: vec![1.0, 1.0], label: OneHotVector::new(1) }]); + let data = Data(vec![DataLine { inputs: DMatrix::from_vec(2, 1, vec![1.0, 1.0]), label: OneHotVector::new(1) }]); net.sgd(data, 1, 1, 0.001, None); println!("{:?}", net); }