added convert_pickle.py
This commit is contained in:
parent
69d518e975
commit
221e5aa058
4 changed files with 55 additions and 48 deletions
22
convert_pickle.py
Normal file
22
convert_pickle.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
import pickle
|
||||
import gzip
|
||||
import json
|
||||
|
||||
# Load the data from the .pkl.gz file
|
||||
with gzip.open("mnist.pkl.gz", "rb") as f:
|
||||
training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
|
||||
|
||||
# Define a helper function to convert the data into JSON serializable format
|
||||
def convert_data(data):
|
||||
features, labels = data
|
||||
return [{"x": features[i].tolist(), "y": int(labels[i])} for i in range(len(features))]
|
||||
|
||||
# Convert and save to JSON
|
||||
with open("training_data.json", "w") as train_json:
|
||||
json.dump(convert_data(training_data), train_json)
|
||||
|
||||
with open("validation_data.json", "w") as val_json:
|
||||
json.dump(convert_data(validation_data), val_json)
|
||||
|
||||
with open("test_data.json", "w") as test_json:
|
||||
json.dump(convert_data(test_data), test_json)
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
use std::fmt::Debug;
|
||||
use nalgebra::DMatrix;
|
||||
|
||||
use rand::prelude::*;
|
||||
use serde::Deserialize;
|
||||
|
|
@ -10,8 +11,8 @@ pub fn load_data() -> (Data<f64, OneHotVector>, Data<f64, OneHotVector>) {
|
|||
// this is transformed to:
|
||||
// Data : Vec<DataLine>
|
||||
// DataLine {inputs: Vec<pixels as f64>, label: f64}
|
||||
let raw_training_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/training.json")).unwrap();
|
||||
let raw_test_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/test.json")).unwrap();
|
||||
let raw_training_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/training_data.json")).unwrap();
|
||||
let raw_test_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/test_data.json")).unwrap();
|
||||
|
||||
let train = vectorize(raw_training_data);
|
||||
let test = vectorize(raw_test_data);
|
||||
|
|
@ -19,10 +20,10 @@ pub fn load_data() -> (Data<f64, OneHotVector>, Data<f64, OneHotVector>) {
|
|||
(Data(train), Data(test))
|
||||
}
|
||||
|
||||
fn vectorize(raw_training_data: Vec<RawData>) -> Vec<DataLine<f64, OneHotVector>>{
|
||||
fn vectorize(raw_training_data: Vec<RawData>) -> Vec<DataLine<f64, OneHotVector>> {
|
||||
let mut result = Vec::new();
|
||||
for line in raw_training_data {
|
||||
result.push(DataLine { inputs: line.x, label: onehot(line.y) });
|
||||
result.push(DataLine { inputs: DMatrix::from_vec(line.x.len(), 1, line.x), label: onehot(line.y) });
|
||||
}
|
||||
result
|
||||
}
|
||||
|
|
@ -37,7 +38,7 @@ struct RawData {
|
|||
/// Y is type of output
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DataLine<X, Y> where X: Clone, Y: Clone {
|
||||
pub inputs: Vec<X>,
|
||||
pub inputs: DMatrix<X>,
|
||||
pub label: Y,
|
||||
}
|
||||
|
||||
|
|
@ -64,7 +65,7 @@ impl OneHotVector {
|
|||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Data<X, Y>(pub Vec<DataLine<X, Y>>) where X: Clone, Y: Clone ;
|
||||
pub struct Data<X, Y>(pub Vec<DataLine<X, Y>>) where X: Clone, Y: Clone;
|
||||
|
||||
impl<X, Y> Data<X, Y> where X: Clone, Y: Clone {
|
||||
pub fn shuffle(&mut self) {
|
||||
|
|
|
|||
12
src/main.rs
12
src/main.rs
|
|
@ -1,16 +1,12 @@
|
|||
use mnist_rs::dataloader::load_data;
|
||||
use std::time::Instant;
|
||||
|
||||
fn main() {
|
||||
let mut net = mnist_rs::net::Network::gaussian(vec![784, 30, 10]);
|
||||
let (training_data, test_data) = load_data();
|
||||
|
||||
net.sgd(training_data, 30, 1, 0.01, Some(test_data));
|
||||
|
||||
|
||||
// let sizes = vec![5,3,2];
|
||||
// let net = mnist_rs::net::Network::from(sizes);
|
||||
// println!("biases {:?}", net.biases.iter().map(|b|b.shape()).collect::<Vec<(usize,usize)>>());
|
||||
// println!("weights {:?}", net.weights.iter().map(|b|b.shape()).collect::<Vec<(usize,usize)>>());
|
||||
|
||||
let t0 = Instant::now();
|
||||
|
||||
net.sgd(training_data, 30, 10, 3.0, Some(test_data));
|
||||
println!("{}", t0.elapsed().as_millis());
|
||||
}
|
||||
56
src/net.rs
56
src/net.rs
|
|
@ -48,17 +48,17 @@ impl Network {
|
|||
}
|
||||
}
|
||||
|
||||
fn feed_forward(&self, input: Vec<f64>) -> Vec<f64> {
|
||||
fn feed_forward(&self, input: &DMatrix<f64>) -> DMatrix<f64> {
|
||||
self.feed_forward_activation(input, sigmoid_inplace)
|
||||
}
|
||||
|
||||
fn feed_forward_activation(&self, input: Vec<f64>, activation: fn(&mut f64)) -> Vec<f64> {
|
||||
let mut a = DMatrix::from_vec(input.len(), 1, input);
|
||||
fn feed_forward_activation(&self, input: &DMatrix<f64>, activation: fn(&mut f64)) -> DMatrix<f64> {
|
||||
let mut a = input.clone();
|
||||
for (b, w) in zip(&self.biases, &self.weights) {
|
||||
a = b.clone()+ w * a;
|
||||
a = b + w * a;
|
||||
a.apply(activation);
|
||||
}
|
||||
a.column(0).iter().copied().collect()
|
||||
a
|
||||
}
|
||||
|
||||
pub fn sgd(&mut self, mut training_data: Data<f64, OneHotVector>, epochs: usize, minibatch_size: usize, eta: f64, test_data: Option<Data<f64, OneHotVector>>) {
|
||||
|
|
@ -82,12 +82,10 @@ impl Network {
|
|||
/// The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
|
||||
/// is the learning rate.
|
||||
fn update_mini_batch(&mut self, mini_batch: &[DataLine<f64, OneHotVector>], eta: f64) {
|
||||
let (mut nabla_b, mut nabla_w) = self.zero_gradient();
|
||||
let (mut nabla_b, mut nabla_w) = self.zero_gradient();
|
||||
for line in mini_batch.iter() {
|
||||
let (delta_nabla_b, delta_nabla_w) = self.backprop(line.inputs.to_vec(), &line.label);
|
||||
let (delta_nabla_b, delta_nabla_w) = self.backprop(&line.inputs, &line.label);
|
||||
|
||||
// nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
|
||||
// nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
|
||||
nabla_b = zip(&nabla_b, &delta_nabla_b).map(|(nb, dnb)| nb.add(dnb)).collect();
|
||||
nabla_w = zip(&nabla_w, &delta_nabla_w).map(|(nw, dnw)| nw.add(dnw)).collect();
|
||||
}
|
||||
|
|
@ -105,7 +103,7 @@ impl Network {
|
|||
/// neuron in the final layer has the highest activation.
|
||||
fn evaluate(&self, test_data: &Data<f64, OneHotVector>) -> usize {
|
||||
let test_results: Vec<(usize, usize)> = test_data.0.iter()
|
||||
.map(|line| (argmax(self.feed_forward(line.inputs.clone())), line.label.val))
|
||||
.map(|line| (argmax(self.feed_forward(&line.inputs)), line.label.val))
|
||||
.collect();
|
||||
|
||||
test_results.into_iter().filter(|(x, y)| *x == *y).count()
|
||||
|
|
@ -115,30 +113,27 @@ impl Network {
|
|||
/// gradient for the cost function C_x. `nabla_b` and
|
||||
/// `nabla_w` are layer-by-layer lists of matrices, similar
|
||||
/// to `self.biases` and `self.weights`.
|
||||
fn backprop(&self, x: Vec<f64>, y: &OneHotVector) -> (Vec<DMatrix<f64>>, Vec<DMatrix<f64>>) {
|
||||
let (mut nabla_b, mut nabla_w) = self.zero_gradient();
|
||||
fn backprop(&self, x: &DMatrix<f64>, y: &OneHotVector) -> (Vec<DMatrix<f64>>, Vec<DMatrix<f64>>) {
|
||||
let (mut nabla_b, mut nabla_w) = self.zero_gradient();
|
||||
|
||||
// feedforward
|
||||
let mut activation = DMatrix::from_vec(x.len(), 1, x);
|
||||
let mut activation = x.clone();
|
||||
let mut activations = vec![activation.clone()];
|
||||
let mut zs = vec![];
|
||||
|
||||
for (b, w) in zip(&self.biases, &self.weights) {
|
||||
let z = (w * &activation)+b.clone();
|
||||
let z = (w * activation) + b;
|
||||
zs.push(z.clone());
|
||||
activation = z.map(sigmoid);
|
||||
activations.push(activation.clone());
|
||||
}
|
||||
// backward pass
|
||||
// delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
|
||||
let delta: DMatrix<f64> = cost_derivative(&activations[activations.len() - 1], y).component_mul(&zs[zs.len() - 1].map(sigmoid_prime));
|
||||
// println!("delta {:?}", delta);
|
||||
let index = nabla_b.len() - 1;
|
||||
nabla_b[index] = delta.clone();
|
||||
|
||||
let index = nabla_w.len() - 1;
|
||||
let ac = &activations[activations.len() - 2].transpose();
|
||||
nabla_w[index] = &delta * ac;
|
||||
nabla_w[index] = &delta * (&activations[activations.len() - 2].transpose());
|
||||
let lens_zs = zs.len();
|
||||
for l in 2..self.num_layers {
|
||||
let z = &zs[lens_zs - l];
|
||||
|
|
@ -164,32 +159,25 @@ impl Network {
|
|||
.collect();
|
||||
(nabla_b, nabla_w)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fn cost_derivative(output_activations: &DMatrix<f64>, y: &OneHotVector) -> DMatrix<f64> {
|
||||
// output_activations - y
|
||||
// println!("output {:?}", output_activations);
|
||||
// println!("expected {:?}", y);
|
||||
|
||||
let shape = output_activations.shape();
|
||||
let t = DMatrix::from_iterator(shape.0, shape.1, output_activations.iter().enumerate()
|
||||
.map(|(index, a)| a - y.get(index)));
|
||||
// println!("t {:?}",t);
|
||||
t
|
||||
DMatrix::from_iterator(shape.0, shape.1, output_activations.iter().enumerate()
|
||||
.map(|(index, a)| a - y.get(index)))
|
||||
}
|
||||
|
||||
fn argmax(val: Vec<f64>) -> usize {
|
||||
/// index of max value
|
||||
/// only meaningful for single row or column matrix
|
||||
fn argmax(val: DMatrix<f64>) -> usize {
|
||||
let mut max = 0.0;
|
||||
let mut index = 0;
|
||||
for (i, x) in val.iter().enumerate() {
|
||||
// print!("{},",x);
|
||||
if *x > max {
|
||||
index = i;
|
||||
max = *x;
|
||||
}
|
||||
}
|
||||
// println!();
|
||||
index
|
||||
}
|
||||
|
||||
|
|
@ -247,7 +235,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn test_argmax() {
|
||||
assert_eq!(5, argmax(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0]));
|
||||
assert_eq!(5, argmax(DMatrix::from_vec(10, 1, vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -262,15 +250,15 @@ mod test {
|
|||
// 2 layers of 2 units
|
||||
let mut net = Network::ones(vec![2, 2]);
|
||||
|
||||
let prediction = net.feed_forward_activation(vec![2.0, 2.0], |a| {});
|
||||
assert_eq!(prediction, vec![5.0, 5.0])
|
||||
let prediction = net.feed_forward_activation(&DMatrix::from_vec(2, 1, vec![2.0, 2.0]), |a| {});
|
||||
assert_eq!(prediction, DMatrix::from_vec(2, 1, vec![5.0, 5.0]))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sgd() {
|
||||
// 2 layers of 2 units
|
||||
let mut net = Network::ones(vec![2, 2]);
|
||||
let data = Data(vec![DataLine { inputs: vec![1.0, 1.0], label: OneHotVector::new(1) }]);
|
||||
let data = Data(vec![DataLine { inputs: DMatrix::from_vec(2, 1, vec![1.0, 1.0]), label: OneHotVector::new(1) }]);
|
||||
net.sgd(data, 1, 1, 0.001, None);
|
||||
println!("{:?}", net);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue