added convert_pickle.py

2025-02-02 15:40:49 +01:00 · 2025-02-02 15:40:49 +01:00 · 221e5aa058
commit 221e5aa058
parent 69d518e975
4 changed files with 55 additions and 48 deletions
--- a/convert_pickle.py
+++ b/convert_pickle.py
@ -0,0 +1,22 @@
+import pickle
+import gzip
+import json
+
+# Load the data from the .pkl.gz file
+with gzip.open("mnist.pkl.gz", "rb") as f:
+    training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
+
+# Define a helper function to convert the data into JSON serializable format
+def convert_data(data):
+    features, labels = data
+    return [{"x": features[i].tolist(), "y": int(labels[i])} for i in range(len(features))]
+
+# Convert and save to JSON
+with open("training_data.json", "w") as train_json:
+    json.dump(convert_data(training_data), train_json)
+
+with open("validation_data.json", "w") as val_json:
+    json.dump(convert_data(validation_data), val_json)
+
+with open("test_data.json", "w") as test_json:
+    json.dump(convert_data(test_data), test_json)
--- a/src/dataloader.rs
+++ b/src/dataloader.rs
@ -1,4 +1,5 @@
 use std::fmt::Debug;
+use nalgebra::DMatrix;

 use rand::prelude::*;
 use serde::Deserialize;
@ -10,8 +11,8 @@ pub fn load_data() -> (Data<f64, OneHotVector>, Data<f64, OneHotVector>) {
    // this is transformed to:
    // Data : Vec<DataLine>
    // DataLine {inputs: Vec<pixels as f64>, label: f64}
-    let raw_training_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/training.json")).unwrap();
-    let raw_test_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/test.json")).unwrap();
+    let raw_training_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/training_data.json")).unwrap();
+    let raw_test_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/test_data.json")).unwrap();

    let train = vectorize(raw_training_data);
    let test = vectorize(raw_test_data);
@ -19,10 +20,10 @@ pub fn load_data() -> (Data<f64, OneHotVector>, Data<f64, OneHotVector>) {
    (Data(train), Data(test))
 }

-fn vectorize(raw_training_data: Vec<RawData>) -> Vec<DataLine<f64, OneHotVector>>{
+fn vectorize(raw_training_data: Vec<RawData>) -> Vec<DataLine<f64, OneHotVector>> {
    let mut result = Vec::new();
    for line in raw_training_data {
-        result.push(DataLine { inputs: line.x, label: onehot(line.y) });
+        result.push(DataLine { inputs: DMatrix::from_vec(line.x.len(), 1, line.x), label: onehot(line.y) });
    }
    result
 }
@ -37,7 +38,7 @@ struct RawData {
 /// Y is type of output
 #[derive(Debug, Clone)]
 pub struct DataLine<X, Y> where X: Clone, Y: Clone {
-    pub inputs: Vec<X>,
+    pub inputs: DMatrix<X>,
    pub label: Y,
 }

@ -64,7 +65,7 @@ impl OneHotVector {
 }

 #[derive(Debug, Clone)]
-pub struct Data<X, Y>(pub Vec<DataLine<X, Y>>) where X: Clone, Y: Clone ;
+pub struct Data<X, Y>(pub Vec<DataLine<X, Y>>) where X: Clone, Y: Clone;

 impl<X, Y> Data<X, Y> where X: Clone, Y: Clone {
    pub fn shuffle(&mut self) {
--- a/src/main.rs
+++ b/src/main.rs
@ -1,16 +1,12 @@
 use mnist_rs::dataloader::load_data;
+use std::time::Instant;

 fn main() {
    let mut net = mnist_rs::net::Network::gaussian(vec![784, 30, 10]);
    let (training_data, test_data) = load_data();

-    net.sgd(training_data, 30, 1, 0.01, Some(test_data));
-
-
-    // let sizes = vec![5,3,2];
-    // let net = mnist_rs::net::Network::from(sizes);
-    // println!("biases {:?}", net.biases.iter().map(|b|b.shape()).collect::<Vec<(usize,usize)>>());
-    // println!("weights {:?}", net.weights.iter().map(|b|b.shape()).collect::<Vec<(usize,usize)>>());
-
+    let t0 = Instant::now();

+    net.sgd(training_data, 30, 10, 3.0, Some(test_data));
+    println!("{}", t0.elapsed().as_millis());
 }
--- a/src/net.rs
+++ b/src/net.rs
@ -48,17 +48,17 @@ impl Network {
        }
    }

-    fn feed_forward(&self, input: Vec<f64>) -> Vec<f64> {
+    fn feed_forward(&self, input: &DMatrix<f64>) -> DMatrix<f64> {
        self.feed_forward_activation(input, sigmoid_inplace)
    }

-    fn feed_forward_activation(&self, input: Vec<f64>, activation: fn(&mut f64)) -> Vec<f64> {
-        let mut a = DMatrix::from_vec(input.len(), 1, input);
+    fn feed_forward_activation(&self, input: &DMatrix<f64>, activation: fn(&mut f64)) -> DMatrix<f64> {
+        let mut a = input.clone();
        for (b, w) in zip(&self.biases, &self.weights) {
-            a = b.clone()+ w * a;
+            a = b + w * a;
            a.apply(activation);
        }
-        a.column(0).iter().copied().collect()
+        a
    }

    pub fn sgd(&mut self, mut training_data: Data<f64, OneHotVector>, epochs: usize, minibatch_size: usize, eta: f64, test_data: Option<Data<f64, OneHotVector>>) {
@ -82,12 +82,10 @@ impl Network {
    /// The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
    /// is the learning rate.
    fn update_mini_batch(&mut self, mini_batch: &[DataLine<f64, OneHotVector>], eta: f64) {
-        let  (mut nabla_b, mut nabla_w) = self.zero_gradient();
+        let (mut nabla_b, mut nabla_w) = self.zero_gradient();
        for line in mini_batch.iter() {
-            let (delta_nabla_b, delta_nabla_w) = self.backprop(line.inputs.to_vec(), &line.label);
+            let (delta_nabla_b, delta_nabla_w) = self.backprop(&line.inputs, &line.label);

-            // nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
-            // nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
            nabla_b = zip(&nabla_b, &delta_nabla_b).map(|(nb, dnb)| nb.add(dnb)).collect();
            nabla_w = zip(&nabla_w, &delta_nabla_w).map(|(nw, dnw)| nw.add(dnw)).collect();
        }
@ -105,7 +103,7 @@ impl Network {
    /// neuron in the final layer has the highest activation.
    fn evaluate(&self, test_data: &Data<f64, OneHotVector>) -> usize {
        let test_results: Vec<(usize, usize)> = test_data.0.iter()
-            .map(|line| (argmax(self.feed_forward(line.inputs.clone())), line.label.val))
+            .map(|line| (argmax(self.feed_forward(&line.inputs)), line.label.val))
            .collect();

        test_results.into_iter().filter(|(x, y)| *x == *y).count()
@ -115,30 +113,27 @@ impl Network {
    /// gradient for the cost function C_x.  `nabla_b` and
    /// `nabla_w` are layer-by-layer lists of matrices, similar
    /// to `self.biases` and `self.weights`.
-    fn backprop(&self, x: Vec<f64>, y: &OneHotVector) -> (Vec<DMatrix<f64>>, Vec<DMatrix<f64>>) {
-        let  (mut nabla_b, mut nabla_w) = self.zero_gradient();
+    fn backprop(&self, x: &DMatrix<f64>, y: &OneHotVector) -> (Vec<DMatrix<f64>>, Vec<DMatrix<f64>>) {
+        let (mut nabla_b, mut nabla_w) = self.zero_gradient();

        // feedforward
-        let mut activation = DMatrix::from_vec(x.len(), 1, x);
+        let mut activation = x.clone();
        let mut activations = vec![activation.clone()];
        let mut zs = vec![];

        for (b, w) in zip(&self.biases, &self.weights) {
-            let z = (w * &activation)+b.clone();
+            let z = (w * activation) + b;
            zs.push(z.clone());
            activation = z.map(sigmoid);
            activations.push(activation.clone());
        }
        // backward pass
-        // delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        let delta: DMatrix<f64> = cost_derivative(&activations[activations.len() - 1], y).component_mul(&zs[zs.len() - 1].map(sigmoid_prime));
-        // println!("delta {:?}", delta);
        let index = nabla_b.len() - 1;
        nabla_b[index] = delta.clone();

        let index = nabla_w.len() - 1;
-        let ac = &activations[activations.len() - 2].transpose();
-        nabla_w[index] = &delta * ac;
+        nabla_w[index] = &delta * (&activations[activations.len() - 2].transpose());
        let lens_zs = zs.len();
        for l in 2..self.num_layers {
            let z = &zs[lens_zs - l];
@ -164,32 +159,25 @@ impl Network {
            .collect();
        (nabla_b, nabla_w)
    }
-
 }

 fn cost_derivative(output_activations: &DMatrix<f64>, y: &OneHotVector) -> DMatrix<f64> {
-    // output_activations - y
-    // println!("output {:?}", output_activations);
-    // println!("expected {:?}", y);
-
    let shape = output_activations.shape();
-    let t = DMatrix::from_iterator(shape.0, shape.1, output_activations.iter().enumerate()
-        .map(|(index, a)| a - y.get(index)));
-    // println!("t {:?}",t);
-    t
+     DMatrix::from_iterator(shape.0, shape.1, output_activations.iter().enumerate()
+        .map(|(index, a)| a - y.get(index)))
 }

-fn argmax(val: Vec<f64>) -> usize {
+/// index of max value
+/// only meaningful for single row or column matrix
+fn argmax(val: DMatrix<f64>) -> usize {
    let mut max = 0.0;
    let mut index = 0;
    for (i, x) in val.iter().enumerate() {
-        // print!("{},",x);
        if *x > max {
            index = i;
            max = *x;
        }
    }
-    // println!();
    index
 }

@ -247,7 +235,7 @@ mod test {

    #[test]
    fn test_argmax() {
-        assert_eq!(5, argmax(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0]));
+        assert_eq!(5, argmax(DMatrix::from_vec(10, 1, vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0])));
    }

    #[test]
@ -262,15 +250,15 @@ mod test {
        // 2 layers of 2 units
        let mut net = Network::ones(vec![2, 2]);

-        let prediction = net.feed_forward_activation(vec![2.0, 2.0], |a| {});
-        assert_eq!(prediction, vec![5.0, 5.0])
+        let prediction = net.feed_forward_activation(&DMatrix::from_vec(2, 1, vec![2.0, 2.0]), |a| {});
+        assert_eq!(prediction, DMatrix::from_vec(2, 1, vec![5.0, 5.0]))
    }

    #[test]
    fn test_sgd() {
        // 2 layers of 2 units
        let mut net = Network::ones(vec![2, 2]);
-        let data = Data(vec![DataLine { inputs: vec![1.0, 1.0], label: OneHotVector::new(1) }]);
+        let data = Data(vec![DataLine { inputs: DMatrix::from_vec(2, 1, vec![1.0, 1.0]), label: OneHotVector::new(1) }]);
        net.sgd(data, 1, 1, 0.001, None);
        println!("{:?}", net);
    }