added convert_pickle.py
This commit is contained in:
parent
69d518e975
commit
221e5aa058
4 changed files with 55 additions and 48 deletions
22
convert_pickle.py
Normal file
22
convert_pickle.py
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
import pickle
|
||||||
|
import gzip
|
||||||
|
import json
|
||||||
|
|
||||||
|
# Load the data from the .pkl.gz file
|
||||||
|
with gzip.open("mnist.pkl.gz", "rb") as f:
|
||||||
|
training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
|
||||||
|
|
||||||
|
# Define a helper function to convert the data into JSON serializable format
|
||||||
|
def convert_data(data):
|
||||||
|
features, labels = data
|
||||||
|
return [{"x": features[i].tolist(), "y": int(labels[i])} for i in range(len(features))]
|
||||||
|
|
||||||
|
# Convert and save to JSON
|
||||||
|
with open("training_data.json", "w") as train_json:
|
||||||
|
json.dump(convert_data(training_data), train_json)
|
||||||
|
|
||||||
|
with open("validation_data.json", "w") as val_json:
|
||||||
|
json.dump(convert_data(validation_data), val_json)
|
||||||
|
|
||||||
|
with open("test_data.json", "w") as test_json:
|
||||||
|
json.dump(convert_data(test_data), test_json)
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
use nalgebra::DMatrix;
|
||||||
|
|
||||||
use rand::prelude::*;
|
use rand::prelude::*;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
@ -10,8 +11,8 @@ pub fn load_data() -> (Data<f64, OneHotVector>, Data<f64, OneHotVector>) {
|
||||||
// this is transformed to:
|
// this is transformed to:
|
||||||
// Data : Vec<DataLine>
|
// Data : Vec<DataLine>
|
||||||
// DataLine {inputs: Vec<pixels as f64>, label: f64}
|
// DataLine {inputs: Vec<pixels as f64>, label: f64}
|
||||||
let raw_training_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/training.json")).unwrap();
|
let raw_training_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/training_data.json")).unwrap();
|
||||||
let raw_test_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/test.json")).unwrap();
|
let raw_test_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/test_data.json")).unwrap();
|
||||||
|
|
||||||
let train = vectorize(raw_training_data);
|
let train = vectorize(raw_training_data);
|
||||||
let test = vectorize(raw_test_data);
|
let test = vectorize(raw_test_data);
|
||||||
|
|
@ -19,10 +20,10 @@ pub fn load_data() -> (Data<f64, OneHotVector>, Data<f64, OneHotVector>) {
|
||||||
(Data(train), Data(test))
|
(Data(train), Data(test))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn vectorize(raw_training_data: Vec<RawData>) -> Vec<DataLine<f64, OneHotVector>>{
|
fn vectorize(raw_training_data: Vec<RawData>) -> Vec<DataLine<f64, OneHotVector>> {
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
for line in raw_training_data {
|
for line in raw_training_data {
|
||||||
result.push(DataLine { inputs: line.x, label: onehot(line.y) });
|
result.push(DataLine { inputs: DMatrix::from_vec(line.x.len(), 1, line.x), label: onehot(line.y) });
|
||||||
}
|
}
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
@ -37,7 +38,7 @@ struct RawData {
|
||||||
/// Y is type of output
|
/// Y is type of output
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct DataLine<X, Y> where X: Clone, Y: Clone {
|
pub struct DataLine<X, Y> where X: Clone, Y: Clone {
|
||||||
pub inputs: Vec<X>,
|
pub inputs: DMatrix<X>,
|
||||||
pub label: Y,
|
pub label: Y,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -64,7 +65,7 @@ impl OneHotVector {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Data<X, Y>(pub Vec<DataLine<X, Y>>) where X: Clone, Y: Clone ;
|
pub struct Data<X, Y>(pub Vec<DataLine<X, Y>>) where X: Clone, Y: Clone;
|
||||||
|
|
||||||
impl<X, Y> Data<X, Y> where X: Clone, Y: Clone {
|
impl<X, Y> Data<X, Y> where X: Clone, Y: Clone {
|
||||||
pub fn shuffle(&mut self) {
|
pub fn shuffle(&mut self) {
|
||||||
|
|
|
||||||
12
src/main.rs
12
src/main.rs
|
|
@ -1,16 +1,12 @@
|
||||||
use mnist_rs::dataloader::load_data;
|
use mnist_rs::dataloader::load_data;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let mut net = mnist_rs::net::Network::gaussian(vec![784, 30, 10]);
|
let mut net = mnist_rs::net::Network::gaussian(vec![784, 30, 10]);
|
||||||
let (training_data, test_data) = load_data();
|
let (training_data, test_data) = load_data();
|
||||||
|
|
||||||
net.sgd(training_data, 30, 1, 0.01, Some(test_data));
|
let t0 = Instant::now();
|
||||||
|
|
||||||
|
|
||||||
// let sizes = vec![5,3,2];
|
|
||||||
// let net = mnist_rs::net::Network::from(sizes);
|
|
||||||
// println!("biases {:?}", net.biases.iter().map(|b|b.shape()).collect::<Vec<(usize,usize)>>());
|
|
||||||
// println!("weights {:?}", net.weights.iter().map(|b|b.shape()).collect::<Vec<(usize,usize)>>());
|
|
||||||
|
|
||||||
|
|
||||||
|
net.sgd(training_data, 30, 10, 3.0, Some(test_data));
|
||||||
|
println!("{}", t0.elapsed().as_millis());
|
||||||
}
|
}
|
||||||
52
src/net.rs
52
src/net.rs
|
|
@ -48,17 +48,17 @@ impl Network {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn feed_forward(&self, input: Vec<f64>) -> Vec<f64> {
|
fn feed_forward(&self, input: &DMatrix<f64>) -> DMatrix<f64> {
|
||||||
self.feed_forward_activation(input, sigmoid_inplace)
|
self.feed_forward_activation(input, sigmoid_inplace)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn feed_forward_activation(&self, input: Vec<f64>, activation: fn(&mut f64)) -> Vec<f64> {
|
fn feed_forward_activation(&self, input: &DMatrix<f64>, activation: fn(&mut f64)) -> DMatrix<f64> {
|
||||||
let mut a = DMatrix::from_vec(input.len(), 1, input);
|
let mut a = input.clone();
|
||||||
for (b, w) in zip(&self.biases, &self.weights) {
|
for (b, w) in zip(&self.biases, &self.weights) {
|
||||||
a = b.clone()+ w * a;
|
a = b + w * a;
|
||||||
a.apply(activation);
|
a.apply(activation);
|
||||||
}
|
}
|
||||||
a.column(0).iter().copied().collect()
|
a
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sgd(&mut self, mut training_data: Data<f64, OneHotVector>, epochs: usize, minibatch_size: usize, eta: f64, test_data: Option<Data<f64, OneHotVector>>) {
|
pub fn sgd(&mut self, mut training_data: Data<f64, OneHotVector>, epochs: usize, minibatch_size: usize, eta: f64, test_data: Option<Data<f64, OneHotVector>>) {
|
||||||
|
|
@ -84,10 +84,8 @@ impl Network {
|
||||||
fn update_mini_batch(&mut self, mini_batch: &[DataLine<f64, OneHotVector>], eta: f64) {
|
fn update_mini_batch(&mut self, mini_batch: &[DataLine<f64, OneHotVector>], eta: f64) {
|
||||||
let (mut nabla_b, mut nabla_w) = self.zero_gradient();
|
let (mut nabla_b, mut nabla_w) = self.zero_gradient();
|
||||||
for line in mini_batch.iter() {
|
for line in mini_batch.iter() {
|
||||||
let (delta_nabla_b, delta_nabla_w) = self.backprop(line.inputs.to_vec(), &line.label);
|
let (delta_nabla_b, delta_nabla_w) = self.backprop(&line.inputs, &line.label);
|
||||||
|
|
||||||
// nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
|
|
||||||
// nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
|
|
||||||
nabla_b = zip(&nabla_b, &delta_nabla_b).map(|(nb, dnb)| nb.add(dnb)).collect();
|
nabla_b = zip(&nabla_b, &delta_nabla_b).map(|(nb, dnb)| nb.add(dnb)).collect();
|
||||||
nabla_w = zip(&nabla_w, &delta_nabla_w).map(|(nw, dnw)| nw.add(dnw)).collect();
|
nabla_w = zip(&nabla_w, &delta_nabla_w).map(|(nw, dnw)| nw.add(dnw)).collect();
|
||||||
}
|
}
|
||||||
|
|
@ -105,7 +103,7 @@ impl Network {
|
||||||
/// neuron in the final layer has the highest activation.
|
/// neuron in the final layer has the highest activation.
|
||||||
fn evaluate(&self, test_data: &Data<f64, OneHotVector>) -> usize {
|
fn evaluate(&self, test_data: &Data<f64, OneHotVector>) -> usize {
|
||||||
let test_results: Vec<(usize, usize)> = test_data.0.iter()
|
let test_results: Vec<(usize, usize)> = test_data.0.iter()
|
||||||
.map(|line| (argmax(self.feed_forward(line.inputs.clone())), line.label.val))
|
.map(|line| (argmax(self.feed_forward(&line.inputs)), line.label.val))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
test_results.into_iter().filter(|(x, y)| *x == *y).count()
|
test_results.into_iter().filter(|(x, y)| *x == *y).count()
|
||||||
|
|
@ -115,30 +113,27 @@ impl Network {
|
||||||
/// gradient for the cost function C_x. `nabla_b` and
|
/// gradient for the cost function C_x. `nabla_b` and
|
||||||
/// `nabla_w` are layer-by-layer lists of matrices, similar
|
/// `nabla_w` are layer-by-layer lists of matrices, similar
|
||||||
/// to `self.biases` and `self.weights`.
|
/// to `self.biases` and `self.weights`.
|
||||||
fn backprop(&self, x: Vec<f64>, y: &OneHotVector) -> (Vec<DMatrix<f64>>, Vec<DMatrix<f64>>) {
|
fn backprop(&self, x: &DMatrix<f64>, y: &OneHotVector) -> (Vec<DMatrix<f64>>, Vec<DMatrix<f64>>) {
|
||||||
let (mut nabla_b, mut nabla_w) = self.zero_gradient();
|
let (mut nabla_b, mut nabla_w) = self.zero_gradient();
|
||||||
|
|
||||||
// feedforward
|
// feedforward
|
||||||
let mut activation = DMatrix::from_vec(x.len(), 1, x);
|
let mut activation = x.clone();
|
||||||
let mut activations = vec![activation.clone()];
|
let mut activations = vec![activation.clone()];
|
||||||
let mut zs = vec![];
|
let mut zs = vec![];
|
||||||
|
|
||||||
for (b, w) in zip(&self.biases, &self.weights) {
|
for (b, w) in zip(&self.biases, &self.weights) {
|
||||||
let z = (w * &activation)+b.clone();
|
let z = (w * activation) + b;
|
||||||
zs.push(z.clone());
|
zs.push(z.clone());
|
||||||
activation = z.map(sigmoid);
|
activation = z.map(sigmoid);
|
||||||
activations.push(activation.clone());
|
activations.push(activation.clone());
|
||||||
}
|
}
|
||||||
// backward pass
|
// backward pass
|
||||||
// delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
|
|
||||||
let delta: DMatrix<f64> = cost_derivative(&activations[activations.len() - 1], y).component_mul(&zs[zs.len() - 1].map(sigmoid_prime));
|
let delta: DMatrix<f64> = cost_derivative(&activations[activations.len() - 1], y).component_mul(&zs[zs.len() - 1].map(sigmoid_prime));
|
||||||
// println!("delta {:?}", delta);
|
|
||||||
let index = nabla_b.len() - 1;
|
let index = nabla_b.len() - 1;
|
||||||
nabla_b[index] = delta.clone();
|
nabla_b[index] = delta.clone();
|
||||||
|
|
||||||
let index = nabla_w.len() - 1;
|
let index = nabla_w.len() - 1;
|
||||||
let ac = &activations[activations.len() - 2].transpose();
|
nabla_w[index] = &delta * (&activations[activations.len() - 2].transpose());
|
||||||
nabla_w[index] = &delta * ac;
|
|
||||||
let lens_zs = zs.len();
|
let lens_zs = zs.len();
|
||||||
for l in 2..self.num_layers {
|
for l in 2..self.num_layers {
|
||||||
let z = &zs[lens_zs - l];
|
let z = &zs[lens_zs - l];
|
||||||
|
|
@ -164,32 +159,25 @@ impl Network {
|
||||||
.collect();
|
.collect();
|
||||||
(nabla_b, nabla_w)
|
(nabla_b, nabla_w)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cost_derivative(output_activations: &DMatrix<f64>, y: &OneHotVector) -> DMatrix<f64> {
|
fn cost_derivative(output_activations: &DMatrix<f64>, y: &OneHotVector) -> DMatrix<f64> {
|
||||||
// output_activations - y
|
|
||||||
// println!("output {:?}", output_activations);
|
|
||||||
// println!("expected {:?}", y);
|
|
||||||
|
|
||||||
let shape = output_activations.shape();
|
let shape = output_activations.shape();
|
||||||
let t = DMatrix::from_iterator(shape.0, shape.1, output_activations.iter().enumerate()
|
DMatrix::from_iterator(shape.0, shape.1, output_activations.iter().enumerate()
|
||||||
.map(|(index, a)| a - y.get(index)));
|
.map(|(index, a)| a - y.get(index)))
|
||||||
// println!("t {:?}",t);
|
|
||||||
t
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn argmax(val: Vec<f64>) -> usize {
|
/// index of max value
|
||||||
|
/// only meaningful for single row or column matrix
|
||||||
|
fn argmax(val: DMatrix<f64>) -> usize {
|
||||||
let mut max = 0.0;
|
let mut max = 0.0;
|
||||||
let mut index = 0;
|
let mut index = 0;
|
||||||
for (i, x) in val.iter().enumerate() {
|
for (i, x) in val.iter().enumerate() {
|
||||||
// print!("{},",x);
|
|
||||||
if *x > max {
|
if *x > max {
|
||||||
index = i;
|
index = i;
|
||||||
max = *x;
|
max = *x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// println!();
|
|
||||||
index
|
index
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -247,7 +235,7 @@ mod test {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_argmax() {
|
fn test_argmax() {
|
||||||
assert_eq!(5, argmax(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0]));
|
assert_eq!(5, argmax(DMatrix::from_vec(10, 1, vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0])));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -262,15 +250,15 @@ mod test {
|
||||||
// 2 layers of 2 units
|
// 2 layers of 2 units
|
||||||
let mut net = Network::ones(vec![2, 2]);
|
let mut net = Network::ones(vec![2, 2]);
|
||||||
|
|
||||||
let prediction = net.feed_forward_activation(vec![2.0, 2.0], |a| {});
|
let prediction = net.feed_forward_activation(&DMatrix::from_vec(2, 1, vec![2.0, 2.0]), |a| {});
|
||||||
assert_eq!(prediction, vec![5.0, 5.0])
|
assert_eq!(prediction, DMatrix::from_vec(2, 1, vec![5.0, 5.0]))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_sgd() {
|
fn test_sgd() {
|
||||||
// 2 layers of 2 units
|
// 2 layers of 2 units
|
||||||
let mut net = Network::ones(vec![2, 2]);
|
let mut net = Network::ones(vec![2, 2]);
|
||||||
let data = Data(vec![DataLine { inputs: vec![1.0, 1.0], label: OneHotVector::new(1) }]);
|
let data = Data(vec![DataLine { inputs: DMatrix::from_vec(2, 1, vec![1.0, 1.0]), label: OneHotVector::new(1) }]);
|
||||||
net.sgd(data, 1, 1, 0.001, None);
|
net.sgd(data, 1, 1, 0.001, None);
|
||||||
println!("{:?}", net);
|
println!("{:?}", net);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue