/// <summary> /// Propagates a weight update event upstream through the network. /// </summary> /// <param name="properties">Network training properties.</param> /// <param name="networkTrainer">Network training method.</param> public override void Update(NetworkTrainingProperties properties, INetworkTrainer networkTrainer) { double lm = (properties.Lambda / (int)properties[nameof(GatedRecurrentGenerator.SequenceLength)]); if (!this.Constrained) { this.Rx = networkTrainer.Update(this.NodeId, this.NodeId, nameof(this.Rx), this.Rx, this.DRx, properties); this.Rh = networkTrainer.Update(this.NodeId, this.NodeId, nameof(this.Rh), this.Rh, this.DRh, properties); this.Zx = networkTrainer.Update(this.NodeId, this.NodeId, nameof(this.Zx), this.Zx, this.DZx, properties); this.Zh = networkTrainer.Update(this.NodeId, this.NodeId, nameof(this.Zh), this.Zh, this.DZh, properties); this.Hh = networkTrainer.Update(this.NodeId, this.NodeId, nameof(this.Hh), this.Hh, this.DHh, properties); } for (int edge = 0; edge < this.In.Count; edge++) { Delta = (1.0 / properties.Examples) * Delta; if (!this.In[edge].Source.IsBias) { Delta = Delta + (lm * this.In[edge].Weight); } if (!this.Constrained) { this.In[edge].Weight = networkTrainer.Update(this.In[edge].ParentId, this.In[edge].ChildId, nameof(Edge.Weight), this.In[edge].Weight, Delta, properties); } this.In[edge].Source.Update(properties, networkTrainer); } }
/// <summary> /// Returns the error given the supplied error derivative. /// </summary> /// <param name="t">The error from the next layer.</param> /// <param name="properties">Network training properties object.</param> /// <returns></returns> public override double Error(double t, NetworkTrainingProperties properties) { //TODO: Return the correct error. base.Error(t, properties); return(Delta); }
/// <summary>Calculates and returns the error derivative (<see cref="Neuron.Delta"/>) of this node.</summary> /// <param name="t">The double to process.</param> /// <returns>A double.</returns> public override double Error(double t, NetworkTrainingProperties properties) { _DeltaL = Delta; this.Mu = (1.0 / properties.Examples) * this.Output; if (Out.Count == 0) { Delta = delta = -(t - Output); } else { if (In.Count > 0 && Out.Count > 0) { double hp = this.ActivationFunction.Derivative(this.Input); double divergence = AutoencoderNeuron.Divergence((double)properties[nameof(AutoencoderGenerator.Sparsity)], (double)properties[nameof(AutoencoderGenerator.SparsityWeight)], this.Mu); delta = (Out.Sum(e => e.Weight * t) + divergence) * hp; } Delta = Out.Sum(s => s.Target.delta * this.Output); } if (this.In.Count > 0) { for (int edge = 0; edge < this.In.Count; edge++) { this.In[edge].Source.Error(this.Delta, properties); } } return(Delta); }
/// <summary> /// Stores state information prior to computing the error derivatives. /// </summary> /// <param name="properties">Network training properties object.</param> public void State(NetworkTrainingProperties properties) { this.StatesH[(int)properties[TimeStepLabel]] = this.Output; this.StatesHP[(int)properties[TimeStepLabel]] = this.HtP; this.StatesI[(int)properties[TimeStepLabel]] = this.Input; this.StatesR[(int)properties[TimeStepLabel]] = this.R; this.StatesZ[(int)properties[TimeStepLabel]] = this.Z; }
/// <summary> /// Resets the state of the current neuron. /// </summary> /// <param name="properties">Network training properties.</param> public override void Reset(NetworkTrainingProperties properties) { H = 0; DeltaH = Vector.Zeros((int)properties[nameof(GatedRecurrentGenerator.SequenceLength)]); base.Reset(properties); }
/// <summary> /// Resets the state of the current neuron. /// </summary> /// <param name="properties">Network training properties.</param> public override void Reset(NetworkTrainingProperties properties) { this.H = 0; this.HtP = 0; this.DRx = 0; this.DRh = 0; this.DZx = 0; this.DZh = 0; this.DHh = 0; this.DeltaH = new Dictionary <int, double>(); this.StatesH = new Dictionary <int, double>(); base.Reset(properties); }
/// <summary> /// Returns the error given the supplied error derivative. /// </summary> /// <param name="t">The error from the next layer.</param> /// <param name="properties">Network training properties object.</param> /// <returns></returns> public override double Error(double t, NetworkTrainingProperties properties) { //TODO: Return the correct error. _DeltaL = Delta; int timestep = (int)properties[TimeStepLabel]; double ht = this.StatesH.ContainsKey(timestep) ? this.StatesH[timestep] : 0; if (Out.Count == 0) { Delta = delta = -(t - ht); } else { if (In.Count > 0 && Out.Count > 0) { int seqlength = (int)properties[nameof(GatedRecurrentGenerator.SequenceLength)]; double htm1 = this.StatesH.ContainsKey(timestep - 1) ? this.StatesH[timestep - 1] : 0; double input = this.StatesI[timestep]; double r = this.StatesR[timestep]; double z = this.StatesZ[timestep]; // seq mod double seqmod = (1.0 / seqlength); // dyhh = delta(htm1) = 1-Z, dyhz = delta(Z) = HtP double dyhh = (1.0 - z), dyhz = this.StatesHP[timestep]; double dHtP = this.ActivationFunction.Derivative(input + r * (htm1 * this.Hh)); this.DHh = ((dHtP * dyhh) * htm1); this.DHh = this.DeltaH[timestep] = this.DeltaH.GetValueOrDefault(timestep + 1, 0) + this.DHh; double dr = this.ResetGate.Derivative((this.Rx * input) + (this.Rh * ht) + this.Rb); double dz = this.UpdateGate.Derivative((this.Zx * input) + (this.Zh * ht) + this.Zb); this.DRx = (seqmod * (dr * input)); this.DRh = (seqmod * (dr * ht)); this.DZx = (seqmod * (dz * input)); this.DZh = (seqmod * (dz * ht)); delta = Out.Sum(e => e.Weight * t); } this.Delta = Out.Sum(s => s.Target.delta * ht); } if (this.In.Count > 0) { for (int edge = 0; edge < this.In.Count; edge++) { this.In[edge].Source.Error(this.Delta, properties); } } return(Delta); }
public override ISequenceModel Generate(Matrix X, Matrix Y) { // dense autoencoders learn the approximation identity function so ignore labels. // the output layer is the number of columns in X // default hidden layer to 2/3 of the input this.Preprocess(X); if (this.Density <= 0) { this.Density = (int)System.Math.Ceiling(X.Cols * (2.0 / 3.0)); } if (this.MaxIterations <= 0) { MaxIterations = 400; // because Seth said so... } Network network = Network.New().Create(X.Cols, X.Cols, this.Activation, this.OutputFunction, (i, j) => new AutoencoderNeuron(), epsilon: this.Epsilon, hiddenLayers: new int[] { this.Density }); var model = new AutoencoderModel { Descriptor = Descriptor, NormalizeFeatures = base.NormalizeFeatures, FeatureNormalizer = base.FeatureNormalizer, FeatureProperties = base.FeatureProperties, Network = network, OutputFunction = this.OutputFunction }; OnModelChanged(this, ModelEventArgs.Make(model, "Initialized")); NetworkTrainingProperties properties = NetworkTrainingProperties.Create(network, X.Rows, X.Cols, this.LearningRate, this.Lambda, this.MaxIterations, new { this.Density, this.Sparsity, this.SparsityWeight }); for (int i = 0; i < this.MaxIterations; i++) { properties.Iteration = i; for (int x = 0; x < X.Rows; x++) { network.Forward(X[x, VectorType.Row]); //OnModelChanged(this, ModelEventArgs.Make(model, "Forward")); network.Back(X[x, VectorType.Row], properties); } var result = String.Format("Run ({0}/{1}): {2}", i, MaxIterations, network.Cost); OnModelChanged(this, ModelEventArgs.Make(model, result)); } return(model); }
/// <summary> /// Propagates a weight update event upstream through the network. /// </summary> /// <param name="properties">Network training properties.</param> /// <param name="networkTrainer">Network training method.</param> public override void Update(NetworkTrainingProperties properties, INetworkTrainer networkTrainer) { for (int edge = 0; edge < this.In.Count; edge++) { Delta = (1.0 / properties.Examples) * Delta; if (edge > 0) { Delta = Delta + ((properties.Lambda / properties.Examples) * this.In[edge].Weight); } if (!this.Constrained) { this.In[edge].Weight = networkTrainer.Update(this.In[edge].ParentId, this.In[edge].ChildId, nameof(Edge.Weight), this.In[edge].Weight, this.Delta, properties); } this.In[edge].Source.Update(properties, networkTrainer); } }
/// <summary>Propagates a weight update event upstream through the network using the supplied learning rate.</summary> /// <param name="properties">Network training properties.</param> public override void Update(NetworkTrainingProperties properties) { for (var edge = 0; edge < In.Count; edge++) { Delta = 1.0 / properties.Examples * Delta; if (edge > 0) { Delta = Delta + properties.Lambda / properties.Examples * In[edge].Weight; } if (!Constrained) { In[edge].Weight = In[edge].Weight - properties.LearningRate * Delta; } In[edge].Source.Update(properties); } Mu = 0; }
/// <summary>Propagates a weight update event upstream through the network using the supplied learning rate.</summary> /// <param name="properties">Network training properties.</param> public override void Update(NetworkTrainingProperties properties) { for (int edge = 0; edge < this.In.Count; edge++) { Delta = (1.0 / properties.Examples) * Delta; if (edge > 0) { Delta = Delta + ((properties.Lambda / properties.Examples) * this.In[edge].Weight); } if (!this.Constrained) { // using stochastic gradient descent averaged over training examples. this.In[edge].Weight = this.In[edge].Weight - properties.LearningRate * Delta; } this.In[edge].Source.Update(properties); } this.Mu = 0; }
/// <summary>Calculates and returns the error derivative (<see cref="Neuron.Delta" />) of this node.</summary> /// <param name="t">The double to process.</param> /// <param name="properties">Training properties.</param> /// <returns>A double.</returns> public override double Error(double t, NetworkTrainingProperties properties) { _DeltaL = Delta; Mu = 1.0 / properties.Examples * Output; if (Out.Count == 0) { Delta = delta = -(t - Output); } else { if (In.Count > 0 && Out.Count > 0) { var hp = ActivationFunction.Derivative(Input); var divergence = Divergence( (double)properties[nameof(AutoencoderGenerator.Sparsity)], (double)properties[nameof(AutoencoderGenerator.SparsityWeight)], Mu); delta = (Out.Sum(e => e.Weight * t) + divergence) * hp; } Delta = Out.Sum(s => s.Target.delta * Output); } if (In.Count > 0) { for (var edge = 0; edge < In.Count; edge++) { In[edge].Source.Error(Delta, properties); } } return(Delta); }
/// <summary> /// Generates a GRU neural network model for predicting sequences. /// </summary> /// <param name="X">Matrix of training data.</param> /// <param name="Y">Matrix of matching sequence labels.</param> /// <returns>GatedRecurrentModel.</returns> public ISequenceModel Generate(Matrix X, Matrix Y) { this.Preprocess(X); // because Seth said so... if (MaxIterations <= 0) { MaxIterations = 500; } Network network = Network.New().Create(X.Cols, Y.Cols, Activation, OutputFunction, fnNodeInitializer: (i, j) => new RecurrentNeuron() { ActivationFunction = this.Activation, ResetGate = this.ResetGate, MemoryGate = this.UpdateGate, DeltaH = Vector.Zeros(this.SequenceLength) }, epsilon: Epsilon); var model = new GatedRecurrentModel { Descriptor = Descriptor, NormalizeFeatures = base.NormalizeFeatures, FeatureNormalizer = base.FeatureNormalizer, FeatureProperties = base.FeatureProperties, Network = network, OutputFunction = this.OutputFunction }; int m = X.Rows; OnModelChanged(this, ModelEventArgs.Make(model, "Initialized")); NetworkTrainingProperties properties = NetworkTrainingProperties.Create(network, X.Rows, X.Cols, this.LearningRate, this.Lambda, this.MaxIterations, new { this.SequenceLength }); Vector loss = Vector.Zeros(MaxIterations); var tuples = X.GetRows().Select((s, si) => new Tuple <Vector, Vector>(s, Y[si])); for (int pass = 0; pass < MaxIterations; pass++) { properties.Iteration = pass; tuples.Batch(SequenceLength, (idx, items) => { network.ResetStates(properties); for (int i = 0; idx < items.Count(); idx++) { network.Forward(items.ElementAt(i).Item1); network.Back(items.ElementAt(i).Item2, properties); } }, asParallel: false); loss[pass] = network.Cost; var output = String.Format("Run ({0}/{1}): {2}", pass, MaxIterations, network.Cost); OnModelChanged(this, ModelEventArgs.Make(model, output)); } return(model); }
public override void Reset(NetworkTrainingProperties properties) { this.Mu = 0; base.Reset(properties); }
public override ISequenceModel Generate(Matrix X, Matrix Y) { // autoencoders learn the approximation identity function so ignore labels. // the output layer is the number of columns in X this.Preprocess(X); // default hidden layer to 2/3 of the input if (this.Density <= 0) { this.Density = (int)System.Math.Ceiling(X.Cols * (2.0 / 3.0)); } if (this.MaxIterations <= 0) { MaxIterations = 400; } var identity = new Ident(); Network network = Network.New().Create(X.Cols, Y.Cols, this.Activation, this.OutputFunction, (i, j, type) => new AutoencoderNeuron { ActivationFunction = (type == NodeType.Output ? identity : null) }, epsilon: this.Epsilon, hiddenLayers: new int[] { this.Density }); INetworkTrainer trainer = new RMSPropTrainer(); // because Geoffrey Hinton :) ... var model = new AutoencoderModel { Descriptor = Descriptor, NormalizeFeatures = base.NormalizeFeatures, FeatureNormalizer = base.FeatureNormalizer, FeatureProperties = base.FeatureProperties, Network = network }; OnModelChanged(this, ModelEventArgs.Make(model, "Initialized")); NetworkTrainingProperties properties = NetworkTrainingProperties.Create(network, X.Rows, Y.Cols, this.LearningRate, this.Lambda, this.MaxIterations, new { this.Density, this.Sparsity, this.SparsityWeight }); Vector loss = Vector.Zeros(this.MaxIterations); for (int i = 0; i < this.MaxIterations; i++) { properties.Iteration = i; network.ResetStates(properties); for (int x = 0; x < X.Rows; x++) { network.Forward(X[x, VectorType.Row]); //OnModelChanged(this, ModelEventArgs.Make(model, "Forward")); network.Back(Y[x, VectorType.Row], properties, trainer); loss[i] += network.Cost; } var result = String.Format("Run ({0}/{1}): {2}", i, MaxIterations, network.Cost); OnModelChanged(this, ModelEventArgs.Make(model, result)); if (this.LossMinimized(loss, i)) { break; } } return(model); }
/// <summary> /// Updates the weights using the supplied (<see cref="NetworkTrainingProperties" />) /// </summary> /// <param name="properties">Network training properties.</param> public override void Update(NetworkTrainingProperties properties) { // TODO: Update recurrent weights. base.Update(properties); }
/// <summary> /// Updates the weights using the supplied (<see cref="NetworkTrainingProperties"/>) /// </summary> /// <param name="properties">Network training properties.</param> public override void Update(NetworkTrainingProperties properties) { // TODO: Update recurrent weights. base.Update(properties); }
/// <summary> /// Applies an update using Adam to theta w.r.t the gradient for the specified node in the layer. /// </summary> /// <param name="sourceId">Source node identifier.</param> /// <param name="targetId">Target node identifier.</param> /// <param name="paramName">Name of the theta parameter being optimized.</param> /// <param name="theta">Current theta or weight value.</param> /// <param name="gradient">Current gradient value.</param> /// <param name="properties">Networking training properties instance.</param> /// <returns>double.</returns> public double Update(int sourceId, int targetId, string paramName, double theta, double gradient, NetworkTrainingProperties properties) { string label = $"{sourceId}:{targetId}:{paramName}"; Mu[label] = (this.Beta * this.Mu.GetValueOrDefault(label, 0.0)) + ((1.0 - this.Beta) * gradient); Tau[label] = (this.Gamma * this.Tau.GetValueOrDefault(label, 0.0)) + ((1.0 - this.Gamma) * (gradient * gradient)); return(theta - (properties.LearningRate * this.Mu[label] / (System.Math.Sqrt(Tau[label]) + properties.Epsilon))); }
/// <summary> /// Resets the state of the current neuron. /// </summary> /// <param name="properties">Network training properties.</param> public override void Reset(NetworkTrainingProperties properties) { this.H = 0; this.DeltaH = Vector.Zeros((int)properties[nameof(GatedRecurrentGenerator.SequenceLength)]); base.Reset(properties); }
/// <summary> /// Applies an update using AdaGrad to theta w.r.t the gradient for the specified node in the layer. /// </summary> /// <param name="sourceId">Source node identifier.</param> /// <param name="targetId">Target node identifier.</param> /// <param name="paramName">Name of the theta parameter being optimized.</param> /// <param name="theta">Current theta or weight value.</param> /// <param name="gradient">Current gradient value.</param> /// <param name="properties">Networking training properties instance.</param> /// <returns>double.</returns> public double Update(int sourceId, int targetId, string paramName, double theta, double gradient, NetworkTrainingProperties properties) { string label = $"{sourceId}:{targetId}:{paramName}"; this.Mu[label] = this.Mu.GetValueOrDefault(label, 0) + System.Math.Pow(gradient, 2.0); return(theta - properties.LearningRate * gradient / (System.Math.Sqrt(this.Mu[label]) + properties.Epsilon)); }
/// <summary> /// Generates a GRU neural network model for predicting sequences. /// </summary> /// <param name="X">Matrix of training data.</param> /// <param name="Y">Matrix of matching sequence labels.</param> /// <returns>GatedRecurrentModel.</returns> public override ISequenceModel Generate(Matrix X, Matrix Y) { this.Preprocess(X); // because Seth said so... if (MaxIterations <= 0) { MaxIterations = 500; } Network network = Network.New().Create(X.Cols, Y.Cols, Activation, OutputFunction, fnNodeInitializer: (i, j, type) => { if (type == NodeType.Hidden || type == NodeType.Output) { return new RecurrentNeuron() { ActivationFunction = this.Activation, ResetGate = this.ResetGate, UpdateGate = this.UpdateGate } } ; else { return(new Neuron()); } }, epsilon: Epsilon, lossFunction: new CrossEntropyLoss()); var model = new GatedRecurrentModel { Descriptor = Descriptor, NormalizeFeatures = base.NormalizeFeatures, FeatureNormalizer = base.FeatureNormalizer, FeatureProperties = base.FeatureProperties, Network = network, OutputFunction = this.OutputFunction }; int m = X.Rows; OnModelChanged(this, ModelEventArgs.Make(model, "Initialized")); NetworkTrainingProperties properties = NetworkTrainingProperties.Create(network, X.Rows, X.Cols, this.LearningRate, this.Lambda, this.MaxIterations, new { this.SequenceLength }); INetworkTrainer trainer = new GradientDescentTrainer(); Vector loss = Vector.Zeros(MaxIterations); Matrix Yt = Matrix.Zeros(Y.Rows, Y.Cols); var tuples = X.GetRows().Select((s, si) => new Tuple <Vector, Vector>(s, Y[si])); for (int pass = 0; pass < MaxIterations; pass++) { properties.Iteration = pass; tuples.Batch(SequenceLength, (idx, items) => { network.ResetStates(properties); for (int i = 0; i < Enumerable.Count <Tuple <Vector, Vector> >(items); i++) { properties[RecurrentNeuron.TimeStepLabel] = i; network.Forward(Enumerable.ElementAt <Tuple <Vector, Vector> >(items, i).Item1); foreach (RecurrentNeuron node in network.GetVertices().OfType <RecurrentNeuron>()) { if (node.IsHidden || node.IsOutput) { node.State(properties); } } Yt[idx + i] = network.Output(); } for (int i = Enumerable.Count <Tuple <Vector, Vector> >(items) - 1; i >= 0; i--) { properties[RecurrentNeuron.TimeStepLabel] = i; network.Back(Enumerable.ElementAt <Tuple <Vector, Vector> >(items, i).Item2, properties, trainer); loss[pass] += network.Cost; } }, asParallel: false); var output = String.Format("Run ({0}/{1}): {2}", pass, MaxIterations, network.Cost); OnModelChanged(this, ModelEventArgs.Make(model, output)); if (this.LossMinimized(loss, pass)) { break; } } return(model); }
/// <summary> /// Applies an update using gradient descent to theta w.r.t the gradient for the specified node in the layer. /// </summary> /// <param name="sourceId">Source node identifier.</param> /// <param name="targetId">Target node identifier.</param> /// <param name="paramName">Name of the theta parameter being optimized.</param> /// <param name="theta">Current theta or weight value.</param> /// <param name="gradient">Current gradient value.</param> /// <param name="properties">Networking training properties instance.</param> /// <returns>double.</returns> public double Update(int sourceId, int targetId, string paramName, double theta, double gradient, NetworkTrainingProperties properties) { return(theta - properties.LearningRate * gradient); }
/// <summary> /// Applies an update using accelerated gradient descent to theta w.r.t the gradient for the specified node in the layer. /// </summary> /// <param name="sourceId">Source node identifier.</param> /// <param name="targetId">Target node identifier.</param> /// <param name="paramName">Name of the theta parameter being optimized.</param> /// <param name="theta">Current theta or weight value.</param> /// <param name="gradient">Current gradient value.</param> /// <param name="properties">Networking training properties instance.</param> /// <returns>double.</returns> public double Update(int sourceId, int targetId, string paramName, double theta, double gradient, NetworkTrainingProperties properties) { string label = $"{sourceId}:{targetId}:{paramName}"; this.Omega[label] = properties.Momentum * this.Omega.GetValueOrDefault(label, 0) - properties.LearningRate * gradient; return(theta - this.Omega[label]); }
/// <summary> /// Returns the error given the supplied error derivative. /// </summary> /// <param name="t">The error from the next layer.</param> /// <param name="properties">Network training properties object.</param> /// <returns></returns> public override double Error(double t, NetworkTrainingProperties properties) { //TODO: Return the correct error. base.Error(t, properties); return this.Delta; }