private void GetOrCreateModel() { Directory.CreateDirectory(ModelDirectory); // create the inputs Axis axis = new Axis("inputAxis"); var features = Variable.InputVariable(new[] { Vocab.CharCount }, DataType.Float, "features", new List <Axis> { axis, Axis.DefaultBatchAxis() }); var labels = Variable.InputVariable(new[] { Vocab.CharCount }, DataType.Float, "labels", new List <Axis> { axis, Axis.DefaultBatchAxis() }); if (TryGetModel(out string filename)) { // load the previous model and use it's features // the labels should be identical as before Model = Function.Load(filename, Device); Inputs = new IOPair <Variable>(Model.Arguments[0], labels); Console.WriteLine($"Loaded {Path.GetFileName(filename)}"); } else { // create a new model from the features Model = features; for (int i = 0; i < Layers; i++) { Model = Stabilizer.Build(Model, Device); Model = LSTM.Build(Model, HiddenDimensions, Device); } Model = Dense.Build(Model, Vocab.CharCount, Device); Inputs = new IOPair <Variable>(features, labels); } }
/// <summary> /// Initialize a neural network. /// </summary> /// <param name="fileName">The file name of the model to import.</param> public NeuralNetwork(string fileName) { layers = new List <Layer>(); var file = new BinaryReader(new FileStream(fileName, FileMode.Open)); int numLayers = file.ReadInt32(); for (int i = 0; i < numLayers; i++) { int layerType = file.ReadInt32(); Layer layer; switch (layerType) { case 0: case 3: layer = new Dense(file); break; case 1: layer = new SimpleRNN(file); break; case 2: layer = new LSTM(file); break; default: throw new ArgumentException("Unsupported/invalid layer type"); } layers.Add(layer); } file.Close(); }
private async void DeletePaper(PaperDataListItem paperListItem) { if (Messages.ShowQuestion("Are you sure you want to delete this Paper?", "Hold On", MessageBoxButtons.YesNo, MessageBoxIcon.Exclamation) == DialogResult.No) { return; } bool isDeleted = await GeneralManager.RemovePaper(paperListItem.PaperCode); try { if (isDeleted) { containerFlowPanel.Controls.Remove(paperListItem); if (GeneralManager.GetExamPapers.GetPapers.Count == 0) { if (!containerFlowPanel.Controls.Contains(emptyListLabel)) { containerFlowPanel.Controls.Add(emptyListLabel); } emptyListLabel.Visible = true; } await LSTM.SavePapers(GeneralManager.GetExamPapers); } } catch (Exception ex) { Messages.ShowError(ex.Message); } }
public void LSTM_Test_AGate() { //define values, and variables Variable x = Variable.InputVariable(new int[] { 2 }, DataType.Float, "input"); Variable ht_1 = Variable.InputVariable(new int[] { 3 }, DataType.Float, "prevOutput"); Variable ct_1 = Variable.InputVariable(new int[] { 3 }, DataType.Float, "prevCellState"); // Variable ht = Variable.InputVariable(new int[] { 3 }, DataType.Float, "output"); //data 01 var x1Values = Value.CreateBatch <float>(new int[] { 2 }, new float[] { 1f, 2f }, device); var ct_1Values = Value.CreateBatch <float>(new int[] { 3 }, new float[] { 0f, 0f, 0f }, device); var ht_1Values = Value.CreateBatch <float>(new int[] { 3 }, new float[] { 0f, 0f, 0f }, device); //data 02 var x2Values = Value.CreateBatch <float>(new NDShape(1, 2), new float[] { 3f, 4f }, device); // uint seed = 1; //evaluate //Evaluate model after weights are setup var inV = new Dictionary <Variable, Value>(); inV.Add(x, x1Values); inV.Add(ht_1, ht_1Values); inV.Add(ct_1, ct_1Values); //evaluate forgetgate var lstmCell = new LSTM(); var fGate = (Function)lstmCell.AGate(x, ht_1, ct_1, DataType.Float, false, false, device, ref seed, "ForgetGate"); //setup weights var ftparam = fGate.Inputs.Where(l => l.Uid.StartsWith("Parameter")).ToList(); var pa11 = new Parameter(ftparam[0]); pa11.SetValue(new NDArrayView(pa11.Shape, new float[] { 0.16f, 0.17f, 0.18f }, device)); var ws00 = new Parameter(ftparam[1]); //column based order (ws00).SetValue(new NDArrayView(ws00.Shape, new float[] { 0.01f, 0.03f, 0.05f, 0.02f, 0.04f, 0.06f }, device)); var us22 = new Parameter(ftparam[2]); (us22).SetValue(new NDArrayView(us22.Shape, new float[] { 0.07f, 0.10f, 0.13f, 0.08f, 0.11f, 0.14f, 0.09f, 0.12f, 0.15f }, device)); var outFt = new Dictionary <Variable, Value>(); outFt.Add(fGate, null); fGate.Evaluate(inV, outFt, device); var resulft = outFt[fGate].GetDenseData <float>(fGate); Assert.Equal(0.5523079f, resulft[0][0]); // Assert.Equal(0.5695462f, resulft[0][1]); // Assert.Equal(0.5866176f, resulft[0][2]); // }
public async static Task Initialize() { GetExamPapers = await LSTM.LoadPapers(); if (GetExamPapers == null) { GetExamPapers = new ExamPapers(new List <Paper>()); } }
private async Task LoadAllTemplates() { var objectDatas = await LSTM.LoadTemplateListItemsAsync(); for (int i = 0; i < objectDatas.Count; i++) { TemplateListItem templateListItem = TemplateListItem.Create(objectDatas[i]); templateListItem.OnSelectedChangedEvent += TemplateSelect; templateListItem.OnPinnedChangedEvent += TemplatePin; TemplateListItems.Add(templateListItem); templatesLayoutPanel.Controls.Add(templateListItem); } }
private Func <Variable, Function> CreateModel(int numOutputDimension, int numLstmLayer, int numHiddenDimension) { return((input) => { Function model = input; for (int i = 0; i < numLstmLayer; i++) { model = Stabilizer.Build(model, device); model = LSTM.Build(model, numHiddenDimension, device); } model = Dense.Build(model, numOutputDimension, device); return model; }); }
public ILayer CreateProduct(IKernelDescriptor descriptor) { if (descriptor is LSTM) { LSTM rnn = descriptor as LSTM; ILayer layer = new LSTMLayer(rnn.Units, rnn.InputDim, rnn.Activation, rnn.RecurrentActivation); return(layer); } return(null); }
private async void SaveTemplateItems() { Task.Run(() => { List <TemplateListItem.ObjectData> templateListItemsObjects = new List <TemplateListItem.ObjectData>(); for (int i = 0; i < templatesLayoutPanel.Controls.Count; i++) { TemplateListItem templateListItem = (TemplateListItem)templatesLayoutPanel.Controls[i]; TemplateListItem.ObjectData objectData = templateListItem.GetObjectData(); objectData.ListIndex = i; templateListItemsObjects.Add(objectData); } LSTM.SaveTemplateListItems(templateListItemsObjects); }); }
public async static Task <bool> RemovePaper(int paperCode) { Paper paper = GetExamPapers.GetPapers.Find(x => x.Code == paperCode); if (paper == null) { return(false); } bool isSuccess = GetExamPapers.GetPapers.Remove(paper); await LSTM.SavePapers(GetExamPapers); return(isSuccess); }
public Encoder(int sequenceLength, int vocabularySize, int wordVectorSize, int hiddenSize) : base(sequenceLength, hiddenSize) { this.embedding = new Embedding(sequenceLength, vocabularySize, wordVectorSize, (fanIn, fanOut) => 0.01 * Initializers.LeCunNormal(fanIn)); this.recurrent = new LSTM(wordVectorSize, hiddenSize, sequenceLength, false, false, (fanIn, fanOut) => Initializers.LeCunNormal(fanIn)); this.weights = new double[this.embedding.Weights.Length + this.recurrent.Weights.Length]; for (int i = 0; i < this.embedding.Weights.Length; i++) { this.weights[i] = this.embedding.Weights[i]; } for (int i = 0, j = this.embedding.Weights.Length; i < this.recurrent.Weights.Length; i++, j++) { this.weights[j] = this.recurrent.Weights[i]; } }
/// <summary> /// Creates the recurrence network based on LSTM cell /// </summary> /// <param name="input">Input variable.</param> /// <param name="outputDim">Placeholder for previous output.</param> /// <param name="cellDim">Dimension of the LSTM cell.</param> /// <param name="dataType">Type of data.</param> /// <param name="device">Device where computing will happen.</param> /// <param name="returnSequence">Determines if the return value full sequence or the last element of sequence</param> /// <param name="actFun">Type of activation function for update cell state.</param> /// <param name="usePeephole">Include peephole connection in the gate.</param> /// <param name="useStabilizer">Use self stabilization for output.</param> /// <param name="seed">Random seed.</param> /// <returns></returns> public static Function RecurrenceLSTM(Variable input, int outputDim, int cellDim, DataType dataType, DeviceDescriptor device, bool returnSequence = false, Activation actFun = Activation.TanH, bool usePeephole = true, bool useStabilizer = true, uint seed = 1) { if (outputDim <= 0 || cellDim <= 0) { throw new Exception("Dimension of LSTM cell cannot be zero."); } //prepare output and cell dimensions NDShape hShape = new int[] { outputDim }; NDShape cShape = new int[] { cellDim }; //create placeholders //Define previous output and previous cell state as placeholder which will be replace with past values later var dh = Variable.PlaceholderVariable(hShape, input.DynamicAxes); var dc = Variable.PlaceholderVariable(cShape, input.DynamicAxes); //create lstm cell var lstmCell = new LSTM(input, dh, dc, dataType, actFun, usePeephole, useStabilizer, seed, device); //get actual values of output and cell state var actualDh = CNTKLib.PastValue(lstmCell.H); var actualDc = CNTKLib.PastValue(lstmCell.C); // Form the recurrence loop by replacing the dh and dc placeholders with the actualDh and actualDc lstmCell.H.ReplacePlaceholders(new Dictionary <Variable, Variable> { { dh, actualDh }, { dc, actualDc } }); //return value depending of type of LSTM layer //For Stacked LSTM (with more than one LSTM layer in the network), the last LSTM must return last Sequence element, // otherwise full sequence is returned if (returnSequence) { return(lstmCell.H); } else { return(CNTKLib.SequenceLast(lstmCell.H)); } }
public void RnnLSTMRandomTest() { Python.Initialize(); Chainer.Initialize(); Real[,] input = { { 1.0f }, { 3.0f }, { 5.0f }, { 7.0f }, { 9.0f } }; Real[,] teach = { { 3.0f }, { 5.0f }, { 7.0f }, { 9.0f }, { 11.0f } }; Real[,] input2 = { { 3.0f }, { 5.0f }, { 7.0f }, { 9.0f }, { 11.0f } }; Real[,] teach2 = { { 5.0f }, { 7.0f }, { 9.0f }, { 11.0f }, { 13.0f } }; int outputCount = 1; int inputCount = 1; int hiddenCount = 2; Real[,] upwardInit = Initializer.GetRandomValues <Real[, ]>(hiddenCount, hiddenCount); Real[,] lateralInit = Initializer.GetRandomValues <Real[, ]>(hiddenCount, hiddenCount); Real[,,] biasInit = Initializer.GetRandomValues <Real[, , ]>(1, hiddenCount, 1); Real[,,] forgetBiasInit = Initializer.GetRandomValues <Real[, , ]>(1, hiddenCount, 1); //Chainer Real[,] w1 = Initializer.GetRandomValues <Real[, ]>(hiddenCount, inputCount); Real[] b1 = Initializer.GetRandomValues <Real[]>(hiddenCount); //Chainer Linear <Real> cLinear1 = new Linear <Real>(inputCount, hiddenCount, false, w1, b1); NChainer.LSTM <Real> cLstm = new NChainer.LSTM <Real>(hiddenCount, hiddenCount, lateralInit, upwardInit, biasInit, forgetBiasInit); Real[,] w2 = Initializer.GetRandomValues <Real[, ]>(outputCount, hiddenCount); Real[] b2 = Initializer.GetRandomValues <Real[]>(outputCount); Linear <Real> cLinear2 = new Linear <Real>(hiddenCount, outputCount, false, w2, b2); Variable <Real> cX1 = new Variable <Real>(input); Variable <Real> cY11 = cLinear1.Forward(cX1); Variable <Real> cY12 = cLstm.Forward(cY11); Variable <Real> cY13 = cLinear2.Forward(cY12); Variable <Real> cT = new Variable <Real>(teach); Variable <Real> cLoss = new NChainer.MeanSquaredError <Real>().Forward(cY13, cT); cLoss.Backward(); //KelpNet CL.Linear <Real> linear1 = new CL.Linear <Real>(inputCount, hiddenCount, false, w1, b1); LSTM <Real> lstm = new LSTM <Real>(hiddenCount, hiddenCount, lateralInit, upwardInit, biasInit, forgetBiasInit); CL.Linear <Real> linear2 = new CL.Linear <Real>(hiddenCount, outputCount, false, w2, b2); NdArray <Real> x1 = new NdArray <Real>(input, asBatch: true); NdArray <Real> y11 = linear1.Forward(x1)[0]; NdArray <Real> y12 = lstm.Forward(y11)[0]; NdArray <Real> y13 = linear2.Forward(y12)[0]; NdArray <Real> t = new NdArray <Real>(teach, asBatch: true); NdArray <Real> loss = new MeanSquaredError <Real>().Evaluate(y13, t); y13.Backward(); Real[] cY11data = ((Real[, ])cY11.Data).Flatten(); Real[] cY12data = ((Real[, ])cY12.Data).Flatten(); Real[] cY13data = ((Real[, ])cY13.Data).Flatten(); Real[] cXgrad = ((Real[, ])cX1.Grad).Flatten(); Real[] cupwardWGrad = ((Real[, ])cLstm.upward.W.Grad).Flatten(); Real[] cupwardbGrad = (Real[])cLstm.upward.b.Grad; //許容範囲を設定 Real delta = 0.00001f; //y11 Assert.AreEqual(cY11data.Length, y11.Data.Length); for (int i = 0; i < cY11data.Length; i++) { Assert.AreEqual(cY11data[i], y11.Data[i], delta); } //y12 Assert.AreEqual(cY12data.Length, y12.Data.Length); for (int i = 0; i < cY12data.Length; i++) { Assert.AreEqual(cY12data[i], y12.Data[i], delta); } //y13 Assert.AreEqual(cY13data.Length, y13.Data.Length); for (int i = 0; i < cY13data.Length; i++) { Assert.AreEqual(cY13data[i], y13.Data[i], delta); } //許容範囲を設定 delta = 0.0001f; //loss Assert.AreEqual(cLoss.Data[0], loss.Data[0], delta); //x.Grad Assert.AreEqual(cXgrad.Length, x1.Grad.Length); for (int i = 0; i < cXgrad.Length; i++) { Assert.AreEqual(cXgrad[i], x1.Grad[i], delta); } Real[] cWgrad11 = ((Real[, ])cLinear1.W.Grad).Flatten(); Real[] cbgrad11 = (Real[])cLinear1.b.Grad; //W.grad Assert.AreEqual(cWgrad11.Length, linear1.Weight.Grad.Length); for (int i = 0; i < linear1.Weight.Grad.Length; i++) { Assert.AreEqual(cWgrad11[i], linear1.Weight.Grad[i], delta); } //b.grad Assert.AreEqual(cbgrad11.Length, linear1.Bias.Grad.Length); for (int i = 0; i < linear1.Bias.Grad.Length; i++) { Assert.AreEqual(cbgrad11[i], linear1.Bias.Grad[i], delta); } Real[] cWgrad12 = ((Real[, ])cLinear2.W.Grad).Flatten(); Real[] cbgrad12 = (Real[])cLinear2.b.Grad; //W.grad Assert.AreEqual(cWgrad12.Length, linear2.Weight.Grad.Length); for (int i = 0; i < linear2.Weight.Grad.Length; i++) { Assert.AreEqual(cWgrad12[i], linear2.Weight.Grad[i], delta); } //b.grad Assert.AreEqual(cbgrad12.Length, linear2.Bias.Grad.Length); for (int i = 0; i < linear2.Bias.Grad.Length; i++) { Assert.AreEqual(cbgrad12[i], linear2.Bias.Grad[i], delta); } //W.grad int wLen = lstm.upward.Weight.Grad.Length; Assert.AreEqual(cupwardWGrad.Length, lstm.upward.Weight.Grad.Length); for (int i = 0; i < wLen; i++) { Assert.AreEqual(cupwardWGrad[i + wLen * 0], lstm.upward.Weight.Grad[i], delta); } //b.grad int bLen = lstm.upward.Bias.Length; Assert.AreEqual(cupwardbGrad.Length, lstm.upward.Bias.Grad.Length); for (int i = 0; i < bLen; i++) { Assert.AreEqual(cupwardbGrad[i + wLen * 0], lstm.upward.Bias.Grad[i], delta); } //2周目 Variable <Real> cX2 = new Variable <Real>(input2); Variable <Real> cY21 = cLinear1.Forward(cX2); Variable <Real> cY22 = cLstm.Forward(cY21); Variable <Real> cY23 = cLinear2.Forward(cY22); Variable <Real> cT2 = new Variable <Real>(teach2); Variable <Real> cLoss2 = new NChainer.MeanSquaredError <Real>().Forward(cY23, cT2); //KelpNet NdArray <Real> x2 = new NdArray <Real>(input2, asBatch: true); NdArray <Real> y21 = linear1.Forward(x2)[0]; NdArray <Real> y22 = lstm.Forward(y21)[0]; NdArray <Real> y23 = linear2.Forward(y22)[0]; NdArray <Real> t2 = new NdArray <Real>(teach2, asBatch: true); NdArray <Real> loss2 = new MeanSquaredError <Real>().Evaluate(y23, t2); Assert.AreEqual(cLoss2.Data[0], loss2.Data[0], delta); //Backwardを実行 cLoss2.Backward(); y23.Backward(); Real[] cYdata21 = ((Real[, ])cY21.Data).Flatten(); Real[] cYdata22 = ((Real[, ])cY22.Data).Flatten(); Real[] cYdata23 = ((Real[, ])cY23.Data).Flatten(); Real[] cXgrad2 = ((Real[, ])cX2.Grad).Flatten(); Real[] cupwardWGrad2 = ((Real[, ])cLstm.upward.W.Grad).Flatten(); Real[] cupwardbGrad2 = (Real[])cLstm.upward.b.Grad; Real[] clateralWGrad = ((Real[, ])cLstm.lateral.W.Grad).Flatten(); //y21 Assert.AreEqual(cYdata21.Length, y21.Data.Length); for (int i = 0; i < cYdata21.Length; i++) { Assert.AreEqual(cYdata21[i], y21.Data[i], delta); } //y22 Assert.AreEqual(cYdata22.Length, y22.Data.Length); for (int i = 0; i < cYdata22.Length; i++) { Assert.AreEqual(cYdata22[i], y22.Data[i], delta); } //y23 Assert.AreEqual(cYdata23.Length, y23.Data.Length); for (int i = 0; i < cYdata23.Length; i++) { Assert.AreEqual(cYdata23[i], y23.Data[i], delta); } //x.Grad Assert.AreEqual(cXgrad2.Length, x2.Grad.Length); for (int i = 0; i < cXgrad2.Length; i++) { Assert.AreEqual(cXgrad2[i], x2.Grad[i], delta); } //経由が多くかなり誤差が大きい為 delta = 1.0f; Real[] cWgrad22 = ((Real[, ])cLinear2.W.Grad).Flatten(); Real[] cbgrad22 = (Real[])cLinear2.b.Grad; //W.grad Assert.AreEqual(cWgrad22.Length, linear2.Weight.Grad.Length); for (int i = 0; i < linear2.Weight.Grad.Length; i++) { Assert.AreEqual(cWgrad22[i], linear2.Weight.Grad[i], delta); } //b.grad Assert.AreEqual(cbgrad22.Length, linear2.Bias.Grad.Length); for (int i = 0; i < linear2.Bias.Grad.Length; i++) { Assert.AreEqual(cbgrad22[i], linear2.Bias.Grad[i], delta); } delta = 2.0f; //W.grad Assert.AreEqual(clateralWGrad.Length, lstm.lateral.Weight.Grad.Length); for (int i = 0; i < clateralWGrad.Length; i++) { Assert.AreEqual(clateralWGrad[i + wLen * 0], lstm.lateral.Weight.Grad[i], delta); } for (int i = 0; i < wLen; i++) { Assert.AreEqual(cupwardWGrad2[i + wLen * 0], lstm.upward.Weight.Grad[i], delta); } //b.grad for (int i = 0; i < bLen; i++) { Assert.AreEqual(cupwardbGrad2[i + wLen * 0], lstm.upward.Bias.Grad[i], delta); } delta = 20.0f; Real[] cWgrad21 = ((Real[, ])cLinear1.W.Grad).Flatten(); Real[] cbgrad21 = (Real[])cLinear1.b.Grad; //W.grad Assert.AreEqual(cWgrad21.Length, linear1.Weight.Grad.Length); for (int i = 0; i < linear1.Weight.Grad.Length; i++) { Assert.AreEqual(cWgrad21[i], linear1.Weight.Grad[i], delta); } //b.grad Assert.AreEqual(cbgrad21.Length, linear1.Bias.Grad.Length); for (int i = 0; i < linear1.Bias.Grad.Length; i++) { Assert.AreEqual(cbgrad21[i], linear1.Bias.Grad[i], delta); } }
private async void finishPaperBtn_Click(object sender, EventArgs e) { int paperCode = (int)paperCodeField.IntegerValue; if (paperCodeField.Text == "") { Messages.ShowError("Paper code cannot be empty"); return; } else if (!isEditingPaper && GeneralManager.GetExamPapers != null && GeneralManager.GetExamPapers.GetPapers.Exists(x => x.Code == paperCode)) { Messages.ShowError("Paper code already exists"); return; } string paperTitle = paperTitleField.Text; if (paperTitleField.Text == "") { Messages.ShowError("Paper title cannot be empty"); return; } else if (!isEditingPaper && GeneralManager.GetExamPapers != null && GeneralManager.GetExamPapers.GetPapers.Exists(x => x.Title == paperTitle)) { Messages.ShowError("Paper with that title already exists"); return; } PaperDirection paperDirection = (PaperDirection)paperDirectionField.SelectedValue; int totalFields = (int)paperFieldsCountField.IntegerValue; int totalOptions = (int)paperOptionsCountField.IntegerValue; if (paperFieldsCountField.Text == "") { Messages.ShowError("Paper fields cannot be empty"); return; } if (paperOptionsCountField.Text == "") { Messages.ShowError("Paper options cannot be empty"); return; } else if (totalOptions < 2) { Messages.ShowError("There must atleast be more than one option per field."); return; } Paper newPaper = new Paper(paperCode, paperTitle, totalFields, totalOptions, paperDirection); if (!isEditingPaper) { GeneralManager.GetExamPapers.GetPapers.Add(newPaper); PaperDataListItem paperDataListItem = PaperDataListItem.Create(newPaper); containerFlowPanel.Controls.Add(paperDataListItem); emptyListLabel.Visible = false; } else { var oldCode = paperToEdit.Code; paperToEdit.Reset(newPaper); isEditingPaper = false; for (int i = 0; i < containerFlowPanel.Controls.Count; i++) { PaperDataListItem paperDataListItem = (PaperDataListItem)containerFlowPanel.Controls[i]; if (paperDataListItem.PaperCode == oldCode) { paperDataListItem.PaperCode = paperCode; paperDataListItem.PaperTitle = paperTitle; } } } papersListTable.Visible = true; paperConfigurationPanel.Visible = false; papersListTable.Dock = DockStyle.Fill; await LSTM.SavePapers(GeneralManager.GetExamPapers); }
public void LSTMRandomTest() { Python.Initialize(); Chainer.Initialize(); int batchCount = Mother.Dice.Next(1, 5); int inputCount = Mother.Dice.Next(1, 5); int outputCount = Mother.Dice.Next(1, 5); Real[,] input = Initializer.GetRandomValues <Real[, ]>(batchCount, inputCount); Real[,] dummyGy = Initializer.GetRandomValues <Real[, ]>(batchCount, outputCount); Real[,] upwardInit = Initializer.GetRandomValues <Real[, ]>(outputCount, inputCount); Real[,] lateralInit = Initializer.GetRandomValues <Real[, ]>(outputCount, outputCount); Real[,,] biasInit = Initializer.GetRandomValues <Real[, , ]>(1, outputCount, 1); Real[,,] forgetBiasInit = Initializer.GetRandomValues <Real[, , ]>(1, outputCount, 1); //Chainer NChainer.LSTM <Real> clstm = new NChainer.LSTM <Real>(inputCount, outputCount, lateralInit, upwardInit, biasInit, forgetBiasInit); Variable <Real> cX = new Variable <Real>(input); Variable <Real> cY = clstm.Forward(cX); cY.Grad = dummyGy; cY.Backward(); //KelpNet LSTM <Real> lstm = new LSTM <Real>(inputCount, outputCount, lateralInit, upwardInit, biasInit, forgetBiasInit); NdArray <Real> x = new NdArray <Real>(input, asBatch: true); NdArray <Real> y = lstm.Forward(x)[0]; y.Grad = dummyGy.Flatten(); y.Backward(); //許容範囲を算出 Real delta = 0.00001f; Real[] cYdata = ((Real[, ])cY.Data).Flatten(); Real[] cXgrad = ((Real[, ])cX.Grad).Flatten(); Real[] cupwardWGrad = ((Real[, ])clstm.upward.W.Grad).Flatten(); Real[] cupwardbGrad = (Real[])clstm.upward.b.Grad; //y Assert.AreEqual(cYdata.Length, y.Data.Length); for (int i = 0; i < cYdata.Length; i++) { Assert.AreEqual(cYdata[i], y.Data[i], delta); } //x.Grad Assert.AreEqual(cXgrad.Length, x.Grad.Length); for (int i = 0; i < cXgrad.Length; i++) { Assert.AreEqual(cXgrad[i], x.Grad[i], delta); } //W.grad int wLen = lstm.upward.Weight.Grad.Length; Assert.AreEqual(cupwardWGrad.Length, lstm.upward.Weight.Grad.Length); for (int i = 0; i < wLen; i++) { Assert.AreEqual(cupwardWGrad[i + wLen * 0], lstm.upward.Weight.Grad[i], delta); } //b.grad int bLen = lstm.upward.Bias.Length; Assert.AreEqual(cupwardbGrad.Length, lstm.upward.Bias.Grad.Length); for (int i = 0; i < bLen; i++) { Assert.AreEqual(cupwardbGrad[i + wLen * 0], lstm.upward.Bias.Grad[i], delta); } ////////////////////////////////////////////////////////////////////////////////////////// // 1度目はlateralに値はない // ////////////////////////////////////////////////////////////////////////////////////////// //Real[] clateralWGrad = Real.ToRealArray((Real[,])clstm.lateral.W.Grad); //Assert.AreEqual(clateralWGrad.Length, lstm.lateral.Weight.Grad.Length); //for (int i = 0; i < wLen; i++) //{ // Assert.AreEqual(clateralWGrad[i + wLen * 0], lstm.lateral.Weight.Grad[i], delta); //} ////////////////////////////////////////////////////////////////////////////////////////// /////////// //2周目 // /////////// Real[,] input2 = Initializer.GetRandomValues <Real[, ]>(batchCount, inputCount); Real[,] dummyGy2 = Initializer.GetRandomValues <Real[, ]>(batchCount, outputCount); //Chainer Variable <Real> cX2 = new Variable <Real>(input2); Variable <Real> cY2 = clstm.Forward(cX2); cY2.Grad = dummyGy2; cY2.Backward(); //KelpNet NdArray <Real> x2 = new NdArray <Real>(input2, asBatch: true); NdArray <Real> y2 = lstm.Forward(x2)[0]; y2.Grad = dummyGy2.Flatten(); y2.Backward(); Real[] cYdata2 = ((Real[, ])cY2.Data).Flatten(); Real[] cXgrad2 = ((Real[, ])cX2.Grad).Flatten(); Real[] cupwardWGrad2 = ((Real[, ])clstm.upward.W.Grad).Flatten(); Real[] cupwardbGrad2 = (Real[])clstm.upward.b.Grad; Real[] clateralWGrad = ((Real[, ])clstm.lateral.W.Grad).Flatten(); //y Assert.AreEqual(cYdata2.Length, y2.Data.Length); for (int i = 0; i < cYdata2.Length; i++) { Assert.AreEqual(cYdata2[i], y2.Data[i], delta); } //x.Grad Assert.AreEqual(cXgrad2.Length, x2.Grad.Length); for (int i = 0; i < cXgrad2.Length; i++) { Assert.AreEqual(cXgrad2[i], x2.Grad[i], delta); } //W.grad Assert.AreEqual(clateralWGrad.Length, lstm.lateral.Weight.Grad.Length); for (int i = 0; i < clateralWGrad.Length; i++) { Assert.AreEqual(clateralWGrad[i + wLen * 0], lstm.lateral.Weight.Grad[i], delta); } //経由が多いため誤差が大きい delta = 1.0f; for (int i = 0; i < wLen; i++) { Assert.AreEqual(cupwardWGrad2[i + wLen * 0], lstm.upward.Weight.Grad[i], delta); } //b.grad for (int i = 0; i < bLen; i++) { Assert.AreEqual(cupwardbGrad2[i + wLen * 0], lstm.upward.Bias.Grad[i], delta); } }
public bool CreateLayer(int nCount, ELayerType type, ActivationSettings activationSettings) { Layer.Utility.Layer layer; switch (type) { case ELayerType.Invalid: throw new ArgumentException("Invalid \"type\" argument."); case ELayerType.AveragePooling: layer = new AveragePooling(nCount, Layers.Count, activationSettings); Layers.Add(layer); return(true); case ELayerType.AverageUnpooling: layer = new AverageUnpooling(nCount, Layers.Count, activationSettings); Layers.Add(layer); return(true); case ELayerType.Convolutional: layer = new Convolutional(nCount, Layers.Count, activationSettings); Layers.Add(layer); return(true); case ELayerType.Deconvolutional: layer = new Deconvolutional(nCount, Layers.Count, activationSettings); Layers.Add(layer); return(true); case ELayerType.Dropout: layer = new Dropout(nCount, Layers.Count, activationSettings); Layers.Add(layer); return(true); case ELayerType.FullyConnected: layer = new FullyConnected(nCount, Layers.Count, activationSettings); Layers.Add(layer); return(true); case ELayerType.GatedRecurrent: layer = new GatedRecurrent(nCount, Layers.Count, activationSettings); Layers.Add(layer); return(true); case ELayerType.LSTM: layer = new LSTM(nCount, Layers.Count, activationSettings); Layers.Add(layer); return(true); case ELayerType.MaxPooling: layer = new MaxPooling(nCount, Layers.Count, activationSettings); Layers.Add(layer); return(true); case ELayerType.MaxUnpooling: layer = new MaxUnpooling(nCount, Layers.Count, activationSettings); Layers.Add(layer); return(true); case ELayerType.Recurrent: layer = new Recurrent(nCount, Layers.Count, activationSettings); Layers.Add(layer); return(true); default: throw new ArgumentException("Invalid \"type\" argument."); } }
private List <IKernelDescriptor> ReadDescriptors(JObject model) { List <IKernelDescriptor> dscps = model.SelectToken("descriptors").Select(layer => { IKernelDescriptor descriptor = null; String layerName = (String)layer.SelectToken("layer"); switch (layerName) { case "AvgPooling1D": descriptor = new AvgPooling1D( (int)layer.SelectToken("padding"), (int)layer.SelectToken("stride"), (int)layer.SelectToken("kernel_size")); break; case "GlobalAveragePooling1D": descriptor = new GlobalAvgPooling1D(); break; case "AvgPooling2D": descriptor = new AvgPooling2D((int)layer.SelectToken("padding_vl"), (int)layer.SelectToken("padding_hz"), (int)layer.SelectToken("stride_vl"), (int)layer.SelectToken("stride_hz"), (int)layer.SelectToken("kernel_height"), (int)layer.SelectToken("kernel_width")); break; case "GlobalAveragePooling2D": descriptor = new GlobalAvgPooling2D(); break; case "BatchNormalization": descriptor = new BatchNormalization( (int)layer.SelectToken("epsilon")); break; case "Cropping1D": descriptor = new Cropping1D( (int)layer.SelectToken("trimBegin"), (int)layer.SelectToken("trimEnd")); break; case "Cropping2D": descriptor = new Cropping2D( (int)layer.SelectToken("topTrim"), (int)layer.SelectToken("bottomTrim"), (int)layer.SelectToken("leftTrim"), (int)layer.SelectToken("rightTrim")); break; case "MaxPooling1D": descriptor = new MaxPooling1D( (int)layer.SelectToken("padding"), (int)layer.SelectToken("stride"), (int)layer.SelectToken("kernel_size")); break; case "GlobalMaxPooling1D": descriptor = new GlobalMaxPooling1D(); break; case "MaxPooling2D": descriptor = new MaxPooling2D((int)layer.SelectToken("padding_vl"), (int)layer.SelectToken("padding_hz"), (int)layer.SelectToken("stride_vl"), (int)layer.SelectToken("stride_hz"), (int)layer.SelectToken("kernel_height"), (int)layer.SelectToken("kernel_width")); break; case "GlobalMaxPooling2D": descriptor = new GlobalMaxPooling2D(); break; case "Convolution1D": descriptor = new Convolution1D( (int)layer.SelectToken("padding"), (int)layer.SelectToken("stride"), (int)layer.SelectToken("kernel_size"), (int)layer.SelectToken("kernel_num")); break; case "Convolution2D": descriptor = new Convolution2D((int)layer.SelectToken("padding_vl"), (int)layer.SelectToken("padding_hz"), (int)layer.SelectToken("stride_vl"), (int)layer.SelectToken("stride_hz"), (int)layer.SelectToken("kernel_height"), (int)layer.SelectToken("kernel_width"), (int)layer.SelectToken("kernel_num")); break; case "Dense2D": descriptor = new Dense2D((int)layer.SelectToken("units")); break; case "Input2D": descriptor = new Input2D((int)layer.SelectToken("height"), (int)layer.SelectToken("width"), (int)layer.SelectToken("channel"), (int)layer.SelectToken("batch")); break; case "Bias2D": descriptor = new Bias2D(); break; case "Permute": descriptor = new Permute( (int)layer.SelectToken("dim1"), (int)layer.SelectToken("dim2"), (int)layer.SelectToken("dim3")); break; case "Reshape": descriptor = new Reshape2D( (int)layer.SelectToken("height"), (int)layer.SelectToken("width"), (int)layer.SelectToken("channel"), 1); break; case "RepeatVector": descriptor = new RepeatVector( (int)layer.SelectToken("num")); break; case "SimpleRNN": descriptor = new SimpleRNN( (int)layer.SelectToken("units"), (int)layer.SelectToken("input_dim"), ANR((string)layer.SelectToken("activation"))); break; case "LSTM": descriptor = new LSTM( (int)layer.SelectToken("units"), (int)layer.SelectToken("input_dim"), ANR((string)layer.SelectToken("activation")), ANR((string)layer.SelectToken("rec_act"))); break; case "GRU": descriptor = new GRU( (int)layer.SelectToken("units"), (int)layer.SelectToken("input_dim"), ANR((string)layer.SelectToken("activation")), ANR((string)layer.SelectToken("rec_act"))); break; case "ELu": descriptor = new ELu(1); break; case "HardSigmoid": descriptor = new HardSigmoid(); break; case "ReLu": descriptor = new ReLu(); break; case "Sigmoid": descriptor = new Sigmoid(); break; case "Flatten": descriptor = new Flatten(); break; case "Softmax": descriptor = new Softmax(); break; case "SoftPlus": descriptor = new SoftPlus(); break; case "SoftSign": descriptor = new Softsign(); break; case "TanH": descriptor = new TanH(); break; default: throw new Exception("Unknown layer type!"); } return(descriptor); }).ToList(); return(dscps); }
public void LSTM_Test_Output_CellState_Result() { //define values, and variables Variable x = Variable.InputVariable(new int[] { 2 }, DataType.Float, "input"); Variable ht_1 = Variable.InputVariable(new int[] { 3 }, DataType.Float, "prevOutput"); Variable ct_1 = Variable.InputVariable(new int[] { 3 }, DataType.Float, "prevCellState"); // Variable ht = Variable.InputVariable(new int[] { 3 }, DataType.Float, "output"); //data 01 var x1Values = Value.CreateBatch <float>(new NDShape(1, 2), new float[] { 1f, 2f }, device); var ct_1Values = Value.CreateBatch <float>(new NDShape(1, 3), new float[] { 0f, 0f, 0f }, device); var ht_1Values = Value.CreateBatch <float>(new NDShape(1, 3), new float[] { 0f, 0f, 0f }, device); // uint seed = 1; var lstmCell = new LSTM(); var ct = lstmCell.CellState(x, ht_1, ct_1, DataType.Float, Activation.TanH, false, false, device, ref seed); var ht = lstmCell.CellOutput(x, ht_1, ct, DataType.Float, device, false, false, Activation.TanH, ref seed); var ft = ht.Inputs.Where(l => l.Uid.StartsWith("Parameter")).ToList(); var pCount = ft.Sum(p => p.Shape.TotalSize); var consts = ht.Inputs.Where(l => l.Uid.StartsWith("Constant")).ToList(); var inp = ht.Inputs.Where(l => l.Uid.StartsWith("Input")).ToList(); //bias params var bs = ft.Where(p => p.Name.Contains("_b")).ToList(); var pa = new Parameter(bs[0]); pa.SetValue(new NDArrayView(pa.Shape, new float[] { 0.16f, 0.17f, 0.18f }, device)); var pa1 = new Parameter(bs[1]); pa1.SetValue(new NDArrayView(pa1.Shape, new float[] { 0.16f, 0.17f, 0.18f }, device)); var pa2 = new Parameter(bs[2]); pa2.SetValue(new NDArrayView(pa2.Shape, new float[] { 0.16f, 0.17f, 0.18f }, device)); var pa3 = new Parameter(bs[3]); pa3.SetValue(new NDArrayView(pa3.Shape, new float[] { 0.16f, 0.17f, 0.18f }, device)); //set value to weights parameters var ws = ft.Where(p => p.Name.Contains("_w")).ToList(); var ws0 = new Parameter(ws[0]); var ws1 = new Parameter(ws[1]); var ws2 = new Parameter(ws[2]); var ws3 = new Parameter(ws[3]); (ws0).SetValue(new NDArrayView(ws0.Shape, new float[] { 0.01f, 0.03f, 0.05f, 0.02f, 0.04f, 0.06f }, device)); (ws1).SetValue(new NDArrayView(ws1.Shape, new float[] { 0.01f, 0.03f, 0.05f, 0.02f, 0.04f, 0.06f }, device)); (ws2).SetValue(new NDArrayView(ws2.Shape, new float[] { 0.01f, 0.03f, 0.05f, 0.02f, 0.04f, 0.06f }, device)); (ws3).SetValue(new NDArrayView(ws3.Shape, new float[] { 0.01f, 0.03f, 0.05f, 0.02f, 0.04f, 0.06f }, device)); //set value to update parameters var us = ft.Where(p => p.Name.Contains("_u")).ToList(); var us0 = new Parameter(us[0]); var us1 = new Parameter(us[1]); var us2 = new Parameter(us[2]); var us3 = new Parameter(us[3]); (us0).SetValue(new NDArrayView(us0.Shape, new float[] { 0.07f, 0.10f, 0.13f, 0.08f, 0.11f, 0.14f, 0.09f, 0.12f, 0.15f }, device)); (us1).SetValue(new NDArrayView(us1.Shape, new float[] { 0.07f, 0.10f, 0.13f, 0.08f, 0.11f, 0.14f, 0.09f, 0.12f, 0.15f }, device)); (us2).SetValue(new NDArrayView(us2.Shape, new float[] { 0.07f, 0.10f, 0.13f, 0.08f, 0.11f, 0.14f, 0.09f, 0.12f, 0.15f }, device)); (us3).SetValue(new NDArrayView(us3.Shape, new float[] { 0.07f, 0.10f, 0.13f, 0.08f, 0.11f, 0.14f, 0.09f, 0.12f, 0.15f }, device)); //evaluate //Evaluate model after weights are setup var inV = new Dictionary <Variable, Value>(); inV.Add(x, x1Values); inV.Add(ht_1, ht_1Values); inV.Add(ct_1, ct_1Values); //evaluate output when previous values are zero var outV11 = new Dictionary <Variable, Value>(); outV11.Add(ht, null); ht.Evaluate(inV, outV11, device); //test result values var result = outV11[ht].GetDenseData <float>(ht); Assert.Equal(0.06286035f, result[0][0]); // Assert.Equal(0.0878196657f, result[0][1]); // Assert.Equal(0.114274316f, result[0][2]); // //evaluate cell state var outV = new Dictionary <Variable, Value>(); outV.Add(ct, null); ct.Evaluate(inV, outV, device); var resultc = outV[ct].GetDenseData <float>(ct); Assert.Equal(0.114309236f, resultc[0][0]); // Assert.Equal(0.15543206f, resultc[0][1]); // Assert.Equal(0.197323829f, resultc[0][2]); // //evaluate second value, with previous values as previous state //setup previous state and output //data 02 var x2Values = Value.CreateBatch <float>(new NDShape(1, 2), new float[] { 3f, 4f }, device); ct_1Values = Value.CreateBatch <float>(new NDShape(1, 3), new float[] { resultc[0][0], resultc[0][1], resultc[0][2] }, device); ht_1Values = Value.CreateBatch <float>(new NDShape(1, 3), new float[] { result[0][0], result[0][1], result[0][2] }, device); inV = new Dictionary <Variable, Value>(); inV.Add(x, x2Values); inV.Add(ht_1, ht_1Values); inV.Add(ct_1, ct_1Values); //evaluate output when previous values are zero outV11 = new Dictionary <Variable, Value>(); outV11.Add(ht, null); ht.Evaluate(inV, outV11, device); //test result values result = outV11[ht].GetDenseData <float>(ht); Assert.Equal(0.128203362f, result[0][0]); // Assert.Equal(0.206633776f, result[0][1]); // Assert.Equal(0.288335562f, result[0][2]); // //evaluate cell state outV = new Dictionary <Variable, Value>(); outV.Add(ct, null); ct.Evaluate(inV, outV, device); resultc = outV[ct].GetDenseData <float>(ct); Assert.Equal(0.227831185f, resultc[0][0]); // Assert.Equal(0.3523231f, resultc[0][1]); // Assert.Equal(0.4789199f, resultc[0][2]); // }
static void SetParams <T>(Function <T> func, NpzDictionary modelData) where T : unmanaged, IComparable <T> { if (func is Linear <T> ) { Linear <T> linear = (Linear <T>)func; linear.Weight.Data = modelData[func.Name + "/W.npy"].FlattenEx <T>(); if (linear.Bias != null) { linear.Bias.Data = modelData[func.Name + "/b.npy"].FlattenEx <T>(); } } else if (func is Convolution2D <T> ) { Convolution2D <T> conv2D = (Convolution2D <T>)func; conv2D.Weight.Data = modelData[func.Name + "/W.npy"].FlattenEx <T>(); if (conv2D.Bias != null) { conv2D.Bias.Data = modelData[func.Name + "/b.npy"].FlattenEx <T>(); } } else if (func is Deconvolution2D <T> ) { Deconvolution2D <T> deconv2D = (Deconvolution2D <T>)func; deconv2D.Weight.Data = modelData[func.Name + "/W.npy"].FlattenEx <T>(); if (deconv2D.Bias != null) { deconv2D.Bias.Data = modelData[func.Name + "/b.npy"].FlattenEx <T>(); } } else if (func is EmbedID <T> ) { EmbedID <T> embed = (EmbedID <T>)func; embed.Weight.Data = modelData[func.Name + "/W.npy"].FlattenEx <T>(); } else if (func is BatchNormalization <T> ) { BatchNormalization <T> bn = (BatchNormalization <T>)func; bn.Beta.Data = modelData[func.Name + "/beta.npy"].FlattenEx <T>(); bn.Gamma.Data = modelData[func.Name + "/gamma.npy"].FlattenEx <T>(); if (bn.Train) { if (modelData.ContainsKey(func.Name + "/avg_mean.npy")) { bn.AvgMean.Data = modelData[func.Name + "/avg_mean.npy"].FlattenEx <T>(); } if (modelData.ContainsKey(func.Name + "/avg_var.npy")) { bn.AvgVar.Data = modelData[func.Name + "/avg_var.npy"].FlattenEx <T>(); } } } else if (func is MultiplyScale <T> ) { MultiplyScale <T> scale = (MultiplyScale <T>)func; scale.Weight.Data = modelData[func.Name + "/W.npy"].FlattenEx <T>(); if (scale.BiasTerm) { scale.Bias.Data = modelData[func.Name + "/bias/b.npy"].FlattenEx <T>(); } } else if (func is LSTM <T> ) { LSTM <T> lstm = (LSTM <T>)func; lstm.lateral.Weight.Data = modelData[func.Name + "/lateral/W.npy"].FlattenEx <T>(); lstm.upward.Weight.Data = modelData[func.Name + "/upward/W.npy"].FlattenEx <T>(); lstm.upward.Bias.Data = modelData[func.Name + "/upward/b.npy"].FlattenEx <T>(); } }
static void SetParams(Function func, NpzDictionary modelData) { if (func is Linear) { Linear linear = (Linear)func; Array.Copy(Real.ToRealArray(modelData[func.Name + "/W.npy"]), linear.Weight.Data, linear.Weight.Data.Length); if (!linear.NoBias) { Array.Copy(Real.ToRealArray(modelData[func.Name + "/b.npy"]), linear.Bias.Data, linear.Bias.Data.Length); } } else if (func is Convolution2D) { Convolution2D conv2D = (Convolution2D)func; Array.Copy(Real.ToRealArray(modelData[func.Name + "/W.npy"]), conv2D.Weight.Data, conv2D.Weight.Data.Length); if (!conv2D.NoBias) { Array.Copy(Real.ToRealArray(modelData[func.Name + "/b.npy"]), conv2D.Bias.Data, conv2D.Bias.Data.Length); } } else if (func is Deconvolution2D) { Deconvolution2D deconv2D = (Deconvolution2D)func; Array.Copy(Real.ToRealArray(modelData[func.Name + "/W.npy"]), deconv2D.Weight.Data, deconv2D.Weight.Data.Length); if (!deconv2D.NoBias) { Array.Copy(Real.ToRealArray(modelData[func.Name + "/b.npy"]), deconv2D.Bias.Data, deconv2D.Bias.Data.Length); } } else if (func is EmbedID) { EmbedID embed = (EmbedID)func; Array.Copy(Real.ToRealArray(modelData[func.Name + "/W.npy"]), embed.Weight.Data, embed.Weight.Data.Length); } else if (func is BatchNormalization) { BatchNormalization bn = (BatchNormalization)func; Array.Copy(Real.ToRealArray(modelData[func.Name + "/beta.npy"]), bn.Beta.Data, bn.Beta.Data.Length); Array.Copy(Real.ToRealArray(modelData[func.Name + "/gamma.npy"]), bn.Gamma.Data, bn.Gamma.Data.Length); if (bn.Train) { if (modelData.ContainsKey(func.Name + "/avg_mean.npy")) { Array.Copy(Real.ToRealArray(modelData[func.Name + "/avg_mean.npy"]), bn.AvgMean.Data, bn.AvgMean.Data.Length); } if (modelData.ContainsKey(func.Name + "/avg_var.npy")) { Array.Copy(Real.ToRealArray(modelData[func.Name + "/avg_var.npy"]), bn.AvgVar.Data, bn.AvgVar.Data.Length); } } } else if (func is MultiplyScale) { MultiplyScale scale = (MultiplyScale)func; Array.Copy(Real.ToRealArray(modelData[func.Name + "/W.npy"]), scale.Weight.Data, scale.Weight.Data.Length); if (scale.BiasTerm) { Array.Copy(Real.ToRealArray(modelData[func.Name + "/bias/b.npy"]), scale.Bias.Data, scale.Bias.Data.Length); } } else if (func is LSTM) { LSTM lstm = (LSTM)func; Real[] lateral = Real.ToRealArray(modelData[func.Name + "/lateral/W.npy"]); Real[] upwardW = Real.ToRealArray(modelData[func.Name + "/upward/W.npy"]); Real[] upwardb = Real.ToRealArray(modelData[func.Name + "/upward/b.npy"]); Array.Copy(lateral, 0, lstm.lateral.Weight.Data, 0, lstm.lateral.Weight.Data.Length); Array.Copy(upwardW, 0, lstm.upward.Weight.Data, 0, lstm.upward.Weight.Data.Length); Array.Copy(upwardb, 0, lstm.upward.Bias.Data, 0, lstm.upward.Bias.Data.Length); } }