/// <summary> /// Construct a deep belief neural network. /// </summary> /// <param name="inputCount">The input count.</param> /// <param name="hidden">The counts for the hidden layers.</param> /// <param name="outputCount">The output neuron count.</param> public DeepBeliefNetwork(int inputCount, int[] hidden, int outputCount) { int inputSize; _layers = new HiddenLayer[hidden.Length]; _rbm = new RestrictedBoltzmannMachine[hidden.Length]; for (var i = 0; i < _rbm.Length; i++) { if (i == 0) { inputSize = inputCount; } else { inputSize = hidden[i - 1]; } _layers[i] = new HiddenLayer(this, inputSize, hidden[i]); _rbm[i] = new RestrictedBoltzmannMachine(_layers[i]); } _outputLayer = new DeepLayer(this, hidden[_layers.Length - 1], outputCount); Random = new MersenneTwisterGenerateRandom(); }
/// <summary> /// Sample the visible (input), given the hidden neurons (output). Return the mean, and a sample, based on that /// mean probability. /// </summary> /// <param name="rbm">The RBM to use.</param> /// <param name="sampleH0">Hidden (h) samples.</param> /// <param name="mean">Output: Visible (v) mean.</param> /// <param name="sample">Output: Visible (v) sample.</param> public void SampleVH(RestrictedBoltzmannMachine rbm, double[] sampleH0, double[] mean, double[] sample) { for (int i = 0; i < rbm.VisibleCount; i++) { mean[i] = PropDown(rbm, sampleH0, i, rbm.BiasV[i]); sample[i] = rbm.binomial(1, mean[i]); } }
/// <summary> /// Sample the hidden neurons (output), given the visible (input). Return the mean, and a sample, based on that /// mean probability. /// </summary> /// <param name="rbm">The RBM to use.</param> /// <param name="v0Sample">The input to the layer.</param> /// <param name="mean">Output: mean value of each hidden neuron.</param> /// <param name="sample">Output: sample, based on mean.</param> public void SampleHV(RestrictedBoltzmannMachine rbm, double[] v0Sample, double[] mean, double[] sample) { for (int i = 0; i < rbm.HiddenCount; i++) { // Find the mean. mean[i] = PropUp(rbm, v0Sample, rbm.Layer.Weights[i], rbm.BiasH[i]); // Sample, based on that mean. sample[i] = rbm.binomial(1, mean[i]); } }
/// <summary> /// Estimate the mean of a visible neuron in an RBM. Propagate downward part, from hidden to visible. /// </summary> /// <param name="rbm">The RBM to use.</param> /// <param name="h">The hidden neurons.</param> /// <param name="i">The visible neuron to use.</param> /// <param name="b">Bias value.</param> /// <returns>The estimated mean.</returns> public double PropDown(RestrictedBoltzmannMachine rbm, double[] h, int i, double b) { double sum = 0.0; for (int j = 0; j < rbm.HiddenCount; j++) { sum += rbm.Layer.Weights[j][i] * h[j]; } sum += b; return(RestrictedBoltzmannMachine.Sigmoid(sum)); }
/// <summary> /// Estimate the mean of a hidden neuron in an RBM. Propagate upward part, from visible to hidden. /// </summary> /// <param name="rbm">The RBM to use.</param> /// <param name="v">The input (v), visible neurons.</param> /// <param name="w">The weights.</param> /// <param name="b">The bias.</param> /// <returns>The mean.</returns> public double PropUp(RestrictedBoltzmannMachine rbm, double[] v, double[] w, double b) { double sum = 0.0; for (int j = 0; j < rbm.VisibleCount; j++) { sum += w[j] * v[j]; } sum += b; return(RestrictedBoltzmannMachine.Sigmoid(sum)); }
/// <summary> /// Perform contrastive divergence, also known as the up-down algorithm. /// </summary> /// <param name="rbm">The RBM to use.</param> /// <param name="input">The input training pattern.</param> /// <param name="lr">The learning rate.</param> /// <param name="k">The number of cycles.</param> public void ContrastiveDivergence(RestrictedBoltzmannMachine rbm, double[] input, double lr, int k) { // The positive gradient mean & samples (P) - Only for hidden (H) double[] meanPH = new double[rbm.HiddenCount]; double[] samplePH = new double[rbm.HiddenCount]; // The negative gradient mean & samples (N) - For both visible (V) & hidden (H) double[] meansNV = new double[rbm.VisibleCount]; double[] samplesNV = new double[rbm.VisibleCount]; double[] meansNH = new double[rbm.HiddenCount]; double[] samplesNH = new double[rbm.HiddenCount]; // Calculate (sample) meanPH and samplePH SampleHV(rbm, input, meanPH, samplePH); for (int step = 0; step < k; step++) { if (step == 0) { GibbsHVH(rbm, samplePH, meansNV, samplesNV, meansNH, samplesNH); } else { GibbsHVH(rbm, samplesNH, meansNV, samplesNV, meansNH, samplesNH); } } // Adjust the weights, based on calculated mean values. // This uses the maximum likelihood learning rule. for (int i = 0; i < rbm.HiddenCount; i++) { for (int j = 0; j < rbm.VisibleCount; j++) { rbm.Layer.Weights[i][j] += lr * (meanPH[i] * input[j] - meansNH[i] * samplesNV[j]) / input.Length; } rbm.BiasH[i] += lr * (samplePH[i] - meansNH[i]) / input.Length; } // Adjust the biases for learning. for (int i = 0; i < rbm.VisibleCount; i++) { rbm.BiasV[i] += lr * (input[i] - samplesNV[i]) / input.Length; } }
/// <summary> /// Perform Gibbs sampling. Hidden to visible to hidden. /// </summary> /// <param name="rbm">The RBM to use.</param> /// <param name="sampleH0">The hidden samples.</param> /// <param name="meansNV">Output: means for the visible (v) neurons.</param> /// <param name="samplesNV">Output: samples for the visible (v) neurons.</param> /// <param name="meansNH">Output: means for the hidden (h) neurons.</param> /// <param name="samplesNH">Output: samples for the hidden (h) neurons.</param> public void GibbsHVH(RestrictedBoltzmannMachine rbm, double[] sampleH0, double[] meansNV, double[] samplesNV, double[] meansNH, double[] samplesNH) { SampleVH(rbm, sampleH0, meansNV, samplesNV); SampleHV(rbm, samplesNV, meansNH, samplesNH); }
/// <summary> /// Estimate the mean of a hidden neuron in an RBM. Propagate upward part, from visible to hidden. /// </summary> /// <param name="rbm">The RBM to use.</param> /// <param name="v">The input (v), visible neurons.</param> /// <param name="w">The weights.</param> /// <param name="b">The bias.</param> /// <returns>The mean.</returns> public double PropUp(RestrictedBoltzmannMachine rbm, double[] v, double[] w, double b) { double sum = 0.0; for (int j = 0; j < rbm.VisibleCount; j++) { sum += w[j] * v[j]; } sum += b; return RestrictedBoltzmannMachine.Sigmoid(sum); }
/// <summary> /// Estimate the mean of a visible neuron in an RBM. Propagate downward part, from hidden to visible. /// </summary> /// <param name="rbm">The RBM to use.</param> /// <param name="h">The hidden neurons.</param> /// <param name="i">The visible neuron to use.</param> /// <param name="b">Bias value.</param> /// <returns>The estimated mean.</returns> public double PropDown(RestrictedBoltzmannMachine rbm, double[] h, int i, double b) { double sum = 0.0; for (int j = 0; j < rbm.HiddenCount; j++) { sum += rbm.Layer.Weights[j][i] * h[j]; } sum += b; return RestrictedBoltzmannMachine.Sigmoid(sum); }