bugs fixed, improved loading
This commit is contained in:
parent
068cf2a1d1
commit
8acf2a11d5
5 changed files with 158 additions and 76 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -1,4 +1,4 @@
|
||||||
/target
|
/target
|
||||||
*.iml
|
*.iml
|
||||||
.idea
|
.idea
|
||||||
src/data/training.json
|
src/data/training.jsonde
|
||||||
|
|
@ -11,3 +11,5 @@ to do:
|
||||||
* train using actual training data
|
* train using actual training data
|
||||||
* evaluate with test/validation data
|
* evaluate with test/validation data
|
||||||
* make more efficient
|
* make more efficient
|
||||||
|
|
||||||
|
training_data/test_data not included
|
||||||
|
|
|
||||||
|
|
@ -1,62 +1,72 @@
|
||||||
use std::iter::zip;
|
use std::fmt::Debug;
|
||||||
|
|
||||||
use rand::prelude::*;
|
use rand::prelude::*;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
||||||
pub fn load_data() -> Data<f32, OneHotVector> {
|
pub fn load_data() -> (Data<f64, OneHotVector>, Data<f64, OneHotVector>) {
|
||||||
// the mnist data is structured as
|
// the mnist data is structured as
|
||||||
// x: [[[pixels]],[[pixels]], etc],
|
// x: [[[pixels]],[[pixels]], etc],
|
||||||
// y: [label1, label2, etc]
|
// y: [label1, label2, etc]
|
||||||
// this is transformed to:
|
// this is transformed to:
|
||||||
// Data : Vec<DataLine>
|
// Data : Vec<DataLine>
|
||||||
// DataLine {inputs: Vec<pixels as f32>, label: f32}
|
// DataLine {inputs: Vec<pixels as f64>, label: f64}
|
||||||
let raw_data: RawData = serde_json::from_slice(include_bytes!("data/unittest.json")).unwrap();
|
let raw_training_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/training.json")).unwrap();
|
||||||
let mut vec = Vec::new();
|
let raw_test_data: Vec<RawData> = serde_json::from_slice(include_bytes!("data/test.json")).unwrap();
|
||||||
for (x, y) in zip(raw_data.x, raw_data.y) {
|
|
||||||
vec.push(DataLine { inputs: x, label: onehot(y) });
|
|
||||||
}
|
|
||||||
|
|
||||||
Data(vec)
|
let train = vectorize(raw_training_data);
|
||||||
|
let test = vectorize(raw_test_data);
|
||||||
|
|
||||||
|
(Data(train), Data(test))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn vectorize(raw_training_data: Vec<RawData>) -> Vec<DataLine<f64, OneHotVector>>{
|
||||||
|
let mut result = Vec::new();
|
||||||
|
for line in raw_training_data {
|
||||||
|
result.push(DataLine { inputs: line.x, label: onehot(line.y) });
|
||||||
|
}
|
||||||
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct RawData {
|
struct RawData {
|
||||||
x: Vec<Vec<f32>>,
|
x: Vec<f64>,
|
||||||
y: Vec<u8>,
|
y: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// X is type of input
|
/// X is type of input
|
||||||
/// Y is type of output
|
/// Y is type of output
|
||||||
pub struct DataLine<X, Y> {
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct DataLine<X, Y> where X: Clone, Y: Clone {
|
||||||
pub inputs: Vec<X>,
|
pub inputs: Vec<X>,
|
||||||
pub label: Y,
|
pub label: Y,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// simple way to encode a onehot vector. An object that returns 1.0 if you get the 'right' index, or 0.0 otherwise
|
||||||
pub struct OneHotVector{
|
#[derive(Debug, Clone)]
|
||||||
pub val: usize
|
pub struct OneHotVector {
|
||||||
|
pub val: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl OneHotVector{
|
impl OneHotVector {
|
||||||
fn new(val: usize) -> Self{
|
pub fn new(val: usize) -> Self {
|
||||||
Self{
|
Self {
|
||||||
val
|
val
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get(&self, index: usize) -> f32{
|
pub fn get(&self, index: usize) -> f64 {
|
||||||
if self.val == index {
|
if self.val == index {
|
||||||
1.0
|
1.0
|
||||||
} else {
|
} else {
|
||||||
0.0
|
0.0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Data<X, Y>(pub Vec<DataLine<X, Y>>);
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Data<X, Y>(pub Vec<DataLine<X, Y>>) where X: Clone, Y: Clone ;
|
||||||
|
|
||||||
impl<X, Y> Data<X, Y> {
|
impl<X, Y> Data<X, Y> where X: Clone, Y: Clone {
|
||||||
pub fn shuffle(&mut self) {
|
pub fn shuffle(&mut self) {
|
||||||
let mut rng = thread_rng();
|
let mut rng = thread_rng();
|
||||||
self.0.shuffle(&mut rng);
|
self.0.shuffle(&mut rng);
|
||||||
|
|
@ -66,7 +76,7 @@ impl<X, Y> Data<X, Y> {
|
||||||
self.0.len()
|
self.0.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_empty(&self, ) -> bool{
|
pub fn is_empty(&self) -> bool {
|
||||||
self.0.is_empty()
|
self.0.is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -77,7 +87,6 @@ impl<X, Y> Data<X, Y> {
|
||||||
batches.push(&self.0[offset..offset + batch_size]);
|
batches.push(&self.0[offset..offset + batch_size]);
|
||||||
offset += batch_size;
|
offset += batch_size;
|
||||||
}
|
}
|
||||||
batches.push(&self.0[offset..self.0.len()]);
|
|
||||||
batches
|
batches
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,10 @@
|
||||||
use mnist_rs::dataloader::load_data;
|
use mnist_rs::dataloader::load_data;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let mut net = mnist_rs::net::Network::from(vec![784, 30, 10]);
|
let mut net = mnist_rs::net::Network::gaussian(vec![784, 30, 10]);
|
||||||
let training_data = load_data();
|
let (training_data, test_data) = load_data();
|
||||||
|
|
||||||
net.sgd(training_data, 30, 10, 3.0, &None);
|
net.sgd(training_data, 30, 1, 0.01, Some(test_data));
|
||||||
|
|
||||||
|
|
||||||
// let sizes = vec![5,3,2];
|
// let sizes = vec![5,3,2];
|
||||||
|
|
|
||||||
161
src/net.rs
161
src/net.rs
|
|
@ -1,5 +1,5 @@
|
||||||
use std::iter::zip;
|
use std::iter::zip;
|
||||||
use std::ops::Add;
|
use std::ops::{Add, Sub};
|
||||||
|
|
||||||
use nalgebra::DMatrix;
|
use nalgebra::DMatrix;
|
||||||
use rand::prelude::*;
|
use rand::prelude::*;
|
||||||
|
|
@ -12,8 +12,8 @@ use crate::mat::add;
|
||||||
pub struct Network {
|
pub struct Network {
|
||||||
_sizes: Vec<usize>,
|
_sizes: Vec<usize>,
|
||||||
num_layers: usize,
|
num_layers: usize,
|
||||||
pub biases: Vec<DMatrix<f32>>,
|
pub biases: Vec<DMatrix<f64>>,
|
||||||
pub weights: Vec<DMatrix<f32>>,
|
pub weights: Vec<DMatrix<f64>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Network {
|
impl Network {
|
||||||
|
|
@ -27,25 +27,42 @@ impl Network {
|
||||||
/// layer is assumed to be an input layer, and by convention we
|
/// layer is assumed to be an input layer, and by convention we
|
||||||
/// won't set any biases for those neurons, since biases are only
|
/// won't set any biases for those neurons, since biases are only
|
||||||
/// ever used in computing the outputs from later layers.
|
/// ever used in computing the outputs from later layers.
|
||||||
pub fn from(sizes: Vec<usize>) -> Self {
|
pub fn gaussian(sizes: Vec<usize>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
_sizes: sizes.clone(),
|
_sizes: sizes.clone(),
|
||||||
num_layers: sizes.len(),
|
num_layers: sizes.len(),
|
||||||
biases: biases(sizes[1..].to_vec()),
|
biases: biases(sizes[1..].to_vec(), |size: &usize| random_matrix(*size, 1)),
|
||||||
weights: weights(zip(sizes[..sizes.len() - 1].to_vec(), sizes[1..].to_vec()).collect()),
|
weights: weights(zip(sizes[..sizes.len() - 1].to_vec(), sizes[1..].to_vec()).collect(),
|
||||||
|
|size| random_matrix(size.1, size.0)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn feed_forward(&self, input: Vec<f32>) -> Vec<f32> {
|
/// Creates a network where all weights and biases are set to 1.0
|
||||||
|
/// This is for testing the software itself
|
||||||
|
pub fn ones(sizes: Vec<usize>) -> Self {
|
||||||
|
Self {
|
||||||
|
_sizes: sizes.clone(),
|
||||||
|
num_layers: sizes.len(),
|
||||||
|
biases: biases(sizes[1..].to_vec(), |size: &usize| DMatrix::from_fn(*size, 1, |_, _| 1.0)),
|
||||||
|
weights: weights(zip(sizes[..sizes.len() - 1].to_vec(), sizes[1..].to_vec()).collect(),
|
||||||
|
|shape| DMatrix::from_fn(shape.1, shape.0, |_, _| 1.0)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn feed_forward(&self, input: Vec<f64>) -> Vec<f64> {
|
||||||
|
self.feed_forward_activation(input, sigmoid_inplace)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn feed_forward_activation(&self, input: Vec<f64>, activation: fn(&mut f64)) -> Vec<f64> {
|
||||||
let mut a = DMatrix::from_vec(input.len(), 1, input);
|
let mut a = DMatrix::from_vec(input.len(), 1, input);
|
||||||
for (b, w) in zip(&self.biases, &self.weights) {
|
for (b, w) in zip(&self.biases, &self.weights) {
|
||||||
a = add(b.clone(), w * a).unwrap();
|
a = add(b.clone(), w * a).unwrap();
|
||||||
a.apply(sigmoid_inplace);
|
a.apply(activation);
|
||||||
}
|
}
|
||||||
a.column(1).iter().copied().collect()
|
a.column(0).iter().copied().collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sgd(&mut self, mut training_data: Data<f32, OneHotVector>, epochs: usize, minibatch_size: usize, eta: f32, test_data: &Option<Data<f32, OneHotVector>>) {
|
pub fn sgd(&mut self, mut training_data: Data<f64, OneHotVector>, epochs: usize, minibatch_size: usize, eta: f64, test_data: Option<Data<f64, OneHotVector>>) {
|
||||||
for j in 0..epochs {
|
for j in 0..epochs {
|
||||||
training_data.shuffle();
|
training_data.shuffle();
|
||||||
let mini_batches = training_data.as_batches(minibatch_size);
|
let mini_batches = training_data.as_batches(minibatch_size);
|
||||||
|
|
@ -53,7 +70,7 @@ impl Network {
|
||||||
self.update_mini_batch(mini_batch, eta);
|
self.update_mini_batch(mini_batch, eta);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(test_data) = test_data {
|
if let Some(test_data) = &test_data {
|
||||||
println!("Epoch {}: {} / {}", j, self.evaluate(test_data), test_data.len());
|
println!("Epoch {}: {} / {}", j, self.evaluate(test_data), test_data.len());
|
||||||
} else {
|
} else {
|
||||||
println!("Epoch {} complete", j);
|
println!("Epoch {} complete", j);
|
||||||
|
|
@ -65,50 +82,54 @@ impl Network {
|
||||||
/// gradient descent using backpropagation to a single mini batch.
|
/// gradient descent using backpropagation to a single mini batch.
|
||||||
/// The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
|
/// The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
|
||||||
/// is the learning rate.
|
/// is the learning rate.
|
||||||
fn update_mini_batch(&mut self, mini_batch: &[DataLine<f32, OneHotVector>], eta: f32) {
|
fn update_mini_batch(&mut self, mini_batch: &[DataLine<f64, OneHotVector>], eta: f64) {
|
||||||
let mut nabla_b: Vec<DMatrix<f32>> = self.biases.iter()
|
let mut nabla_b: Vec<DMatrix<f64>> = self.biases.iter()
|
||||||
.map(|b| b.shape())
|
.map(|b| b.shape())
|
||||||
.map(|s| DMatrix::zeros(s.0, s.1))
|
.map(|s| DMatrix::zeros(s.0, s.1))
|
||||||
.collect();
|
.collect();
|
||||||
let mut nabla_w: Vec<DMatrix<f32>> = self.weights.iter()
|
let mut nabla_w: Vec<DMatrix<f64>> = self.weights.iter()
|
||||||
.map(|w| w.shape())
|
.map(|w| w.shape())
|
||||||
.map(|s| DMatrix::zeros(s.0, s.1))
|
.map(|s| DMatrix::zeros(s.0, s.1))
|
||||||
.collect();
|
.collect();
|
||||||
for line in mini_batch.iter() {
|
for line in mini_batch.iter() {
|
||||||
let (delta_nabla_b, delta_nabla_w) = self.backprop(line.inputs.to_vec(), &line.label);
|
let (delta_nabla_b, delta_nabla_w) = self.backprop(line.inputs.to_vec(), &line.label);
|
||||||
|
|
||||||
|
// nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
|
||||||
|
// nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
|
||||||
nabla_b = zip(&nabla_b, &delta_nabla_b).map(|(nb, dnb)| nb.add(dnb)).collect();
|
nabla_b = zip(&nabla_b, &delta_nabla_b).map(|(nb, dnb)| nb.add(dnb)).collect();
|
||||||
nabla_w = zip(&nabla_w, &delta_nabla_w).map(|(nw, dnw)| nw.add(dnw)).collect();
|
nabla_w = zip(&nabla_w, &delta_nabla_w).map(|(nw, dnw)| nw.add(dnw)).collect();
|
||||||
}
|
}
|
||||||
|
|
||||||
self.weights = zip(&self.weights, &nabla_w)
|
self.weights = zip(&self.weights, &nabla_w)
|
||||||
.map(|(w, nw)| (w.add_scalar(-eta / mini_batch.len() as f32)).component_mul(nw)).collect();
|
.map(|(w, nw)| w.sub(nw.scale(eta / mini_batch.len() as f64))).collect();
|
||||||
|
|
||||||
self.biases = zip(&self.biases, &nabla_b)
|
self.biases = zip(&self.biases, &nabla_b)
|
||||||
.map(|(b, nb)| (b.add_scalar(-eta / mini_batch.len() as f32)).component_mul(nb)).collect();
|
.map(|(b, nb)| b.sub(nb.scale(eta / mini_batch.len() as f64))).collect();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the number of test inputs for which the neural
|
/// Return the number of test inputs for which the neural
|
||||||
/// network outputs the correct result. Note that the neural
|
/// network outputs the correct result. Note that the neural
|
||||||
/// network's output is assumed to be the index of whichever
|
/// network's output is assumed to be the index of whichever
|
||||||
/// neuron in the final layer has the highest activation.
|
/// neuron in the final layer has the highest activation.
|
||||||
fn evaluate(&self, test_data: &Data<f32, OneHotVector>) -> usize {
|
fn evaluate(&self, test_data: &Data<f64, OneHotVector>) -> usize {
|
||||||
let test_results: Vec<(usize, usize)> = test_data.0.iter()
|
let test_results: Vec<(usize, usize)> = test_data.0.iter()
|
||||||
.map(|line| (argmax(self.feed_forward(line.inputs.clone())), line.label.val))
|
.map(|line| (argmax(self.feed_forward(line.inputs.clone())), line.label.val))
|
||||||
.collect();
|
.collect();
|
||||||
test_results.into_iter().filter(|(x, y)| x == y).count()
|
|
||||||
|
test_results.into_iter().filter(|(x, y)| *x == *y).count()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return a tuple `(nabla_b, nabla_w)` representing the
|
/// Return a tuple `(nabla_b, nabla_w)` representing the
|
||||||
/// gradient for the cost function C_x. `nabla_b` and
|
/// gradient for the cost function C_x. `nabla_b` and
|
||||||
/// `nabla_w` are layer-by-layer lists of matrices, similar
|
/// `nabla_w` are layer-by-layer lists of matrices, similar
|
||||||
/// to `self.biases` and `self.weights`.
|
/// to `self.biases` and `self.weights`.
|
||||||
fn backprop(&self, x: Vec<f32>, y: &OneHotVector) -> (Vec<DMatrix<f32>>, Vec<DMatrix<f32>>) {
|
fn backprop(&self, x: Vec<f64>, y: &OneHotVector) -> (Vec<DMatrix<f64>>, Vec<DMatrix<f64>>) {
|
||||||
// zero_grad ie. set gradient to zero
|
// zero_grad ie. set gradient to zero
|
||||||
let mut nabla_b: Vec<DMatrix<f32>> = self.biases.iter()
|
let mut nabla_b: Vec<DMatrix<f64>> = self.biases.iter()
|
||||||
.map(|b| b.shape())
|
.map(|b| b.shape())
|
||||||
.map(|s| DMatrix::zeros(s.0, s.1))
|
.map(|s| DMatrix::zeros(s.0, s.1))
|
||||||
.collect();
|
.collect();
|
||||||
let mut nabla_w: Vec<DMatrix<f32>> = self.weights.iter()
|
let mut nabla_w: Vec<DMatrix<f64>> = self.weights.iter()
|
||||||
.map(|w| w.shape())
|
.map(|w| w.shape())
|
||||||
.map(|s| DMatrix::zeros(s.0, s.1))
|
.map(|s| DMatrix::zeros(s.0, s.1))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
@ -119,14 +140,15 @@ impl Network {
|
||||||
let mut zs = vec![];
|
let mut zs = vec![];
|
||||||
|
|
||||||
for (b, w) in zip(&self.biases, &self.weights) {
|
for (b, w) in zip(&self.biases, &self.weights) {
|
||||||
let z = add(w * &activation, b.clone()).unwrap();
|
let z = (w * &activation)+b.clone();
|
||||||
zs.push(z.clone());
|
zs.push(z.clone());
|
||||||
activation = z.map(sigmoid);
|
activation = z.map(sigmoid);
|
||||||
activations.push(activation.clone());
|
activations.push(activation.clone());
|
||||||
}
|
}
|
||||||
// backward pass
|
// backward pass
|
||||||
// delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
|
// delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
|
||||||
let delta: DMatrix<f32> = self.cost_derivative(&activations[activations.len() - 1], y).component_mul(&zs[zs.len() - 1].map(sigmoid_prime));
|
let delta: DMatrix<f64> = cost_derivative(&activations[activations.len() - 1], y).component_mul(&zs[zs.len() - 1].map(sigmoid_prime));
|
||||||
|
// println!("delta {:?}", delta);
|
||||||
let index = nabla_b.len() - 1;
|
let index = nabla_b.len() - 1;
|
||||||
nabla_b[index] = delta.clone();
|
nabla_b[index] = delta.clone();
|
||||||
|
|
||||||
|
|
@ -136,75 +158,124 @@ impl Network {
|
||||||
let lens_zs = zs.len();
|
let lens_zs = zs.len();
|
||||||
for l in 2..self.num_layers {
|
for l in 2..self.num_layers {
|
||||||
let z = &zs[lens_zs - l];
|
let z = &zs[lens_zs - l];
|
||||||
let sp = z.map(sigmoid_prime);
|
|
||||||
let weight = self.weights[self.weights.len() - l + 1].transpose();
|
let weight = self.weights[self.weights.len() - l + 1].transpose();
|
||||||
let delta2 = (weight * &delta).component_mul(&sp);
|
let delta = (weight * &delta).component_mul(&z.map(sigmoid_prime));
|
||||||
let len_nb = nabla_b.len();
|
let len_nb = nabla_b.len();
|
||||||
nabla_b[len_nb - l] = delta2.clone();
|
nabla_b[len_nb - l] = delta.clone();
|
||||||
let len_nw = nabla_w.len();
|
let len_nw = nabla_w.len();
|
||||||
nabla_w[len_nw - l] = delta2 * activations[activations.len() - l - 1].transpose();
|
nabla_w[len_nw - l] = delta * activations[activations.len() - l - 1].transpose();
|
||||||
}
|
}
|
||||||
|
|
||||||
(nabla_b, nabla_w)
|
(nabla_b, nabla_w)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cost_derivative(&self, output_activations: &DMatrix<f32>, y: &OneHotVector) -> DMatrix<f32> {
|
|
||||||
// output_activations - y
|
|
||||||
let shape = output_activations.shape();
|
|
||||||
DMatrix::from_iterator(shape.0, shape.1, output_activations.iter().enumerate()
|
|
||||||
.map(|(index, a)| a - y.get(index)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn argmax(val: Vec<f32>) -> usize {
|
fn cost_derivative(output_activations: &DMatrix<f64>, y: &OneHotVector) -> DMatrix<f64> {
|
||||||
|
// output_activations - y
|
||||||
|
// println!("output {:?}", output_activations);
|
||||||
|
// println!("expected {:?}", y);
|
||||||
|
|
||||||
|
let shape = output_activations.shape();
|
||||||
|
let t = DMatrix::from_iterator(shape.0, shape.1, output_activations.iter().enumerate()
|
||||||
|
.map(|(index, a)| a - y.get(index)));
|
||||||
|
// println!("t {:?}",t);
|
||||||
|
t
|
||||||
|
}
|
||||||
|
|
||||||
|
fn argmax(val: Vec<f64>) -> usize {
|
||||||
let mut max = 0.0;
|
let mut max = 0.0;
|
||||||
let mut index = 0;
|
let mut index = 0;
|
||||||
for (i, x) in val.iter().enumerate() {
|
for (i, x) in val.iter().enumerate() {
|
||||||
|
// print!("{},",x);
|
||||||
if *x > max {
|
if *x > max {
|
||||||
index = i;
|
index = i;
|
||||||
max = *x;
|
max = *x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// println!();
|
||||||
index
|
index
|
||||||
}
|
}
|
||||||
|
|
||||||
fn biases(sizes: Vec<usize>) -> Vec<DMatrix<f32>> {
|
fn biases(sizes: Vec<usize>, init: fn(&usize) -> DMatrix<f64>) -> Vec<DMatrix<f64>> {
|
||||||
sizes.iter().map(|size| random_matrix(*size, 1)).collect()
|
sizes.iter().map(init).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn weights(sizes: Vec<(usize, usize)>) -> Vec<DMatrix<f32>> {
|
fn weights(sizes: Vec<(usize, usize)>, init: fn(&(usize, usize)) -> DMatrix<f64>) -> Vec<DMatrix<f64>> {
|
||||||
sizes.iter().map(|size| random_matrix(size.1, size.0)).collect()
|
sizes.iter().map(init).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn random_matrix(rows: usize, cols: usize) -> DMatrix<f32> {
|
fn random_matrix(rows: usize, cols: usize) -> DMatrix<f64> {
|
||||||
let normal: Normal<f32> = Normal::new(0.0, 1.0).unwrap();
|
let normal: Normal<f64> = Normal::new(0.0, 1.0).unwrap();
|
||||||
|
|
||||||
DMatrix::from_fn(rows, cols, |_, _| normal.sample(&mut thread_rng()))
|
DMatrix::from_fn(rows, cols, |_, _| normal.sample(&mut thread_rng()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sigmoid_inplace(val: &mut f32) {
|
fn sigmoid_inplace(val: &mut f64) {
|
||||||
*val = sigmoid(*val);
|
*val = sigmoid(*val);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sigmoid(val: f32) -> f32 {
|
fn sigmoid(val: f64) -> f64 {
|
||||||
1.0 / (1.0 + (-val).exp())
|
1.0 / (1.0 + (-val).exp())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Derivative of the sigmoid function.
|
/// Derivative of the sigmoid function.
|
||||||
fn sigmoid_prime(val: f32) -> f32 {
|
fn sigmoid_prime(val: f64) -> f64 {
|
||||||
sigmoid(val) * (1.0 - sigmoid(val))
|
sigmoid(val) * (1.0 - sigmoid(val))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
|
use std::convert::identity;
|
||||||
use nalgebra::DMatrix;
|
use nalgebra::DMatrix;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_sigmoid() {
|
fn test_sigmoid() {
|
||||||
let mut mat: DMatrix<f32> = DMatrix::from_vec(1, 1, vec![0.0]);
|
let mut mat: DMatrix<f64> = DMatrix::from_vec(1, 1, vec![0.0]);
|
||||||
mat.apply(sigmoid_inplace);
|
mat.apply(sigmoid_inplace);
|
||||||
assert_eq!(mat.get(0), Some(&0.5));
|
assert_eq!(mat.get(0), Some(&0.5));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_sigmoid_inplace() {
|
||||||
|
let mut v = 10.0;
|
||||||
|
sigmoid_inplace(&mut v);
|
||||||
|
assert_eq!(0.9999546, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_sigmoid_prime() {
|
||||||
|
assert_eq!(0.19661193324148185, sigmoid_prime(1.0))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_argmax() {
|
||||||
|
assert_eq!(5, argmax(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cost_derivative() {
|
||||||
|
let matrix = DMatrix::from_vec(4, 1, vec![0.0, 1.0, 2.0, -2.0]);
|
||||||
|
let delta = cost_derivative(&matrix, &OneHotVector::new(1));
|
||||||
|
assert_eq!(delta, DMatrix::from_vec(4, 1, vec![0.0, 0.0, 2.0, -2.0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_feedforward() {
|
||||||
|
// 2 layers of 2 units
|
||||||
|
let mut net = Network::ones(vec![2, 2]);
|
||||||
|
|
||||||
|
let prediction = net.feed_forward_activation(vec![2.0, 2.0], |a| {});
|
||||||
|
assert_eq!(prediction, vec![5.0, 5.0])
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_sgd() {
|
||||||
|
// 2 layers of 2 units
|
||||||
|
let mut net = Network::ones(vec![2, 2]);
|
||||||
|
let data = Data(vec![DataLine { inputs: vec![1.0, 1.0], label: OneHotVector::new(1) }]);
|
||||||
|
net.sgd(data, 1, 1, 0.001, None);
|
||||||
|
println!("{:?}", net);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
Add table
Reference in a new issue