public void TestAddTensorTensorBP() { TensorAllocator.InitDevices(ProcessorTypeEnums.CPU, new int[] { 0 }); var graph = new ComputeGraphTensor(new WeightTensorFactory(), 0, true); var tensorA = new WeightTensor(new long[2] { 2, 2 }, 1, 0, name: "tensorA", isTrainable: true); var tensorB = new WeightTensor(new long[2] { 2, 2 }, 2, 0, name: "tensorB", isTrainable: true); var tensorSum = graph.Add(tensorA, tensorB); tensorSum.CopyWeightsToGradients(tensorSum); graph.Backward(); float gA = tensorA.GetGradientAt(new long[] { 1, 1 }); float gB = tensorB.GetGradientAt(new long[] { 1, 1, }); Assert.IsTrue(gA == 3.0f); Assert.IsTrue(gB == 3.0f); }
public static IWeightTensor BuildSrcTgtMask(IComputeGraph g, int srcPaddedLength, int tgtPaddedLength, List <int> tgtOriginalLengths, List <int> srcOriginalLengths, int deviceId) { float[] buf = new float[tgtOriginalLengths.Count * tgtPaddedLength * srcPaddedLength]; Array.Fill(buf, -99999999.0f); for (int k = 0; k < tgtOriginalLengths.Count; k++) // batch size { int offset_k = k * (tgtPaddedLength * srcPaddedLength); for (int i = 0; i < tgtOriginalLengths[k]; i++) { int offset_k_i = offset_k + i * srcPaddedLength; for (int j = 0; j < srcOriginalLengths[k]; j++) { buf[offset_k_i + j] = 0.0f; } } } WeightTensor tensor = new WeightTensor(new long[] { tgtOriginalLengths.Count, tgtPaddedLength, srcPaddedLength }, deviceId, $"SrcTgtMask_{deviceId}", isTrainable: false); tensor.SetWeightArray(buf); return(tensor); }
private void UpdateWeightsTensor(WeightTensor m, int batchSize, float step_size, float clipval, float regc, int rowId) { Tensor TWeight = m.TWeight.Narrow(0, rowId, 1); Tensor TGradient = m.TGradient.Narrow(0, rowId, 1); Tensor TCash = m.TCash.Narrow(0, rowId, 1); Tensor TLrW = m.TLrW.Narrow(0, rowId, 1); if (batchSize != 1) { Ops.Mul(TGradient, TGradient, 1.0f / batchSize); } Ops.Clamp(TGradient, TGradient, -clipval, clipval); Ops.UpdateCash(TCash, TCash, TGradient, decay_rate); Ops.UpdateDelta(TGradient, TGradient, TCash, smooth_eps); Ops.UpdateCash(TLrW, TLrW, TGradient, lr_decay_rate); // Ops.AddMul(TLrW, TLrW, TGradient, TGradient); Ops.UpdateWeight2(TWeight, TWeight, TGradient, TLrW, -step_size, -regc); // Ops.UpdateWeight3(TWeight, TWeight, TGradient, -step_size, -regc); TWeight.Dispose(); TGradient.Dispose(); TCash.Dispose(); TLrW.Dispose(); }
public LSTMAttentionDecoderCell(int batchSize, int hdim, int dim, ArchTypeEnums archType, int deviceId) { int contextSize = hdim * 2; this.hdim = hdim; this.dim = dim; m_deviceId = deviceId; m_batchSize = batchSize; if (archType == ArchTypeEnums.GPU_CUDA) { Wxhc = new WeightTensor(dim + hdim + contextSize, hdim * 4, deviceId, true); b = new WeightTensor(1, hdim * 4, 0, deviceId); this.ht = new WeightTensor(batchSize, hdim, 0, deviceId); this.ct = new WeightTensor(batchSize, hdim, 0, deviceId); } else { Wxhc = new WeightMatrix(dim + hdim + contextSize, hdim * 4, true); b = new WeightMatrix(1, hdim * 4, 0); this.ht = new WeightMatrix(batchSize, hdim, 0); this.ct = new WeightMatrix(batchSize, hdim, 0); } layerNorm1 = new LayerNormalization(hdim * 4, archType, deviceId); layerNorm2 = new LayerNormalization(hdim, archType, deviceId); }
public LSTMAttentionDecoderCell(string name, int batchSize, int hdim, int dim, int contextSize, int deviceId) { m_name = name; m_hdim = hdim; m_dim = dim; m_deviceId = deviceId; m_batchSize = batchSize; m_Wxhc = new WeightTensor(new long[2] { dim + hdim + contextSize, hdim * 4 }, deviceId, normal: true, name: $"{name}.{nameof(m_Wxhc)}", isTrainable: true); m_b = new WeightTensor(new long[2] { 1, hdim * 4 }, 0, deviceId, name: $"{name}.{nameof(m_b)}", isTrainable: true); Hidden = new WeightTensor(new long[2] { batchSize, hdim }, 0, deviceId, name: $"{name}.{nameof(Hidden)}", isTrainable: true); Cell = new WeightTensor(new long[2] { batchSize, hdim }, 0, deviceId, name: $"{name}.{nameof(Cell)}", isTrainable: true); layerNorm1 = new LayerNormalization($"{name}.{nameof(layerNorm1)}", hdim * 4, deviceId); layerNorm2 = new LayerNormalization($"{name}.{nameof(layerNorm2)}", hdim, deviceId); }
public static IWeightTensor BuildPadSelfMask(IComputeGraph g, int paddedLength, List <int> originalLengths, int deviceId) { var buf = new float[originalLengths.Count * paddedLength * paddedLength]; for (var i = 0; i < buf.Length; i++) { buf[i] = -1e30f; } for (var k = 0; k < originalLengths.Count; k++) { for (var i = 0; i < originalLengths[k]; i++) { for (var j = 0; j < originalLengths[k]; j++) { // ReSharper disable once ArrangeRedundantParentheses buf[k * (paddedLength * paddedLength) + i * paddedLength + j] = 0.0f; } } } var tensor = new WeightTensor(new long[] { originalLengths.Count, paddedLength, paddedLength }, 0.0f, deviceId, $"TriMask_{deviceId}", false); tensor.SetWeightArray(buf); return(tensor); }
public static void ScatterFill(IWeightTensor res, float val, IWeightTensor indices, int dim) { WeightTensor i = indices as WeightTensor; WeightTensor r = res as WeightTensor; Ops.ScatterFill(r.TWeight, val, dim, i.TWeight); }
public static WeightTensor BuildPositionWeightTensor(int row, int column, int deviceId, string name = "", bool isTrainable = false) { Logger.WriteLine($"Building position weights tensor. Row = '{row}', Column = '{column}', DeviceId = '{deviceId}', Name = '{name}', Trainable = '{isTrainable}'"); WeightTensor t = new WeightTensor(new long[2] { row, column }, deviceId, name: name, isTrainable: isTrainable, needGradient: isTrainable); float[] posWeights = new float[row * column]; float numTimescales = (float)column / 2; float logTimescaleIncrement = (float)(Math.Log(10000.0f) / (numTimescales - 1.0f)); for (int p = 0; p < row; ++p) { for (int i = 0; i < numTimescales; i++) { float v = (float)(p * Math.Exp(i * -logTimescaleIncrement)); posWeights[p * column + i] = (float)Math.Sin(v); posWeights[p * column + (int)numTimescales + i] = (float)Math.Cos(v); } } t.TWeight.CopyFrom(posWeights); return(t); }
public IWeightTensor Softmax(IWeightTensor w, bool runGradients = true, bool inPlace = false) { WeightTensor m = w as WeightTensor; WeightTensor res = null; if (inPlace) { res = m.CopyWeightsRef($"{GetHashString(w.Name)}.Softmax"); } else { res = m_weightTensorFactory.CreateWeightTensor(m.Sizes, m_deviceId, name: $"{GetHashString(w.Name)}.Softmax"); } VisualizeNodes(w, res); Ops.Softmax(res.TWeight, m.TWeight); if (m_needsBackprop && runGradients) { Action backward = () => { if (inPlace) { m.TGradient = res.TGradient.CopyRef(); } m.AddSoftmaxGradient(res, inPlace); res.Dispose(); }; m_backprop.Add(backward); } return(res); }
private WeightTensor BuildRandomLabelTensor(int batchSize, int categoryNum, string name) { var tensorIdx = new WeightTensor(new long[] { batchSize, 1 }, 1, 0, name: name, isTrainable: false); //Build ground truth labels float[] arrayIdx = new float[batchSize]; for (int i = 0; i < batchSize; i++) { arrayIdx[i] = rnd.Next(0, categoryNum); } tensorIdx.SetWeightArray(arrayIdx); return(tensorIdx); }
private WeightTensor BuildRandomTensor(long[] shape, string name, bool isTrainable = true) { var tensorA = new WeightTensor(shape, 1, 0, name: name, isTrainable: isTrainable); //Build test data and ground truth data float[] arrayA = new float[tensorA.ElementCount]; for (int i = 0; i < tensorA.ElementCount; i++) { arrayA[i] = (float)rnd.NextDouble(); } tensorA.SetWeightArray(arrayA); return(tensorA); }
private void UpdateWeightsTensor(WeightTensor m, int batchSize, float step_size, float regc, int iter) { try { Ops.RMSProp(m.TWeight, m.TGradient, m_cacheName2V[m.Name], batchSize, step_size, m_clipval, regc, m_decayRate, m_smoothEps); } catch (Exception err) { Logger.WriteLine(Logger.Level.err, $"Exception: '{err.Message}'"); Logger.WriteLine(Logger.Level.err, $"Call stack: '{err.StackTrace}'"); throw; } }