示例#1
0
    public void TestAddTensorTensorBP()
    {
        TensorAllocator.InitDevices(ProcessorTypeEnums.CPU, new int[] { 0 });

        var graph = new ComputeGraphTensor(new WeightTensorFactory(), 0, true);

        var tensorA = new WeightTensor(new long[2] {
            2, 2
        }, 1, 0, name: "tensorA", isTrainable: true);
        var tensorB = new WeightTensor(new long[2] {
            2, 2
        }, 2, 0, name: "tensorB", isTrainable: true);

        var tensorSum = graph.Add(tensorA, tensorB);

        tensorSum.CopyWeightsToGradients(tensorSum);

        graph.Backward();


        float gA = tensorA.GetGradientAt(new long[] { 1, 1 });
        float gB = tensorB.GetGradientAt(new long[] { 1, 1, });

        Assert.IsTrue(gA == 3.0f);
        Assert.IsTrue(gB == 3.0f);
    }
示例#2
0
        public static IWeightTensor BuildSrcTgtMask(IComputeGraph g, int srcPaddedLength, int tgtPaddedLength, List <int> tgtOriginalLengths, List <int> srcOriginalLengths, int deviceId)
        {
            float[] buf = new float[tgtOriginalLengths.Count * tgtPaddedLength * srcPaddedLength];
            Array.Fill(buf, -99999999.0f);


            for (int k = 0; k < tgtOriginalLengths.Count; k++) // batch size
            {
                int offset_k = k * (tgtPaddedLength * srcPaddedLength);
                for (int i = 0; i < tgtOriginalLengths[k]; i++)
                {
                    int offset_k_i = offset_k + i * srcPaddedLength;
                    for (int j = 0; j < srcOriginalLengths[k]; j++)
                    {
                        buf[offset_k_i + j] = 0.0f;
                    }
                }
            }

            WeightTensor tensor = new WeightTensor(new long[] { tgtOriginalLengths.Count, tgtPaddedLength, srcPaddedLength }, deviceId, $"SrcTgtMask_{deviceId}", isTrainable: false);

            tensor.SetWeightArray(buf);

            return(tensor);
        }
示例#3
0
        private void UpdateWeightsTensor(WeightTensor m, int batchSize, float step_size, float clipval, float regc, int rowId)
        {
            Tensor TWeight   = m.TWeight.Narrow(0, rowId, 1);
            Tensor TGradient = m.TGradient.Narrow(0, rowId, 1);
            Tensor TCash     = m.TCash.Narrow(0, rowId, 1);
            Tensor TLrW      = m.TLrW.Narrow(0, rowId, 1);

            if (batchSize != 1)
            {
                Ops.Mul(TGradient, TGradient, 1.0f / batchSize);
            }

            Ops.Clamp(TGradient, TGradient, -clipval, clipval);
            Ops.UpdateCash(TCash, TCash, TGradient, decay_rate);

            Ops.UpdateDelta(TGradient, TGradient, TCash, smooth_eps);

            Ops.UpdateCash(TLrW, TLrW, TGradient, lr_decay_rate);


            //    Ops.AddMul(TLrW, TLrW, TGradient, TGradient);

            Ops.UpdateWeight2(TWeight, TWeight, TGradient, TLrW, -step_size, -regc);

            //      Ops.UpdateWeight3(TWeight, TWeight, TGradient, -step_size, -regc);


            TWeight.Dispose();
            TGradient.Dispose();
            TCash.Dispose();
            TLrW.Dispose();
        }
        public LSTMAttentionDecoderCell(int batchSize, int hdim, int dim, ArchTypeEnums archType, int deviceId)
        {
            int contextSize = hdim * 2;

            this.hdim  = hdim;
            this.dim   = dim;
            m_deviceId = deviceId;

            m_batchSize = batchSize;

            if (archType == ArchTypeEnums.GPU_CUDA)
            {
                Wxhc = new WeightTensor(dim + hdim + contextSize, hdim * 4, deviceId, true);
                b    = new WeightTensor(1, hdim * 4, 0, deviceId);

                this.ht = new WeightTensor(batchSize, hdim, 0, deviceId);
                this.ct = new WeightTensor(batchSize, hdim, 0, deviceId);
            }
            else
            {
                Wxhc = new WeightMatrix(dim + hdim + contextSize, hdim * 4, true);
                b    = new WeightMatrix(1, hdim * 4, 0);

                this.ht = new WeightMatrix(batchSize, hdim, 0);
                this.ct = new WeightMatrix(batchSize, hdim, 0);
            }

            layerNorm1 = new LayerNormalization(hdim * 4, archType, deviceId);
            layerNorm2 = new LayerNormalization(hdim, archType, deviceId);
        }
        public LSTMAttentionDecoderCell(string name, int batchSize, int hdim, int dim, int contextSize, int deviceId)
        {
            m_name      = name;
            m_hdim      = hdim;
            m_dim       = dim;
            m_deviceId  = deviceId;
            m_batchSize = batchSize;

            m_Wxhc = new WeightTensor(new long[2] {
                dim + hdim + contextSize, hdim * 4
            }, deviceId, normal: true, name: $"{name}.{nameof(m_Wxhc)}", isTrainable: true);
            m_b = new WeightTensor(new long[2] {
                1, hdim * 4
            }, 0, deviceId, name: $"{name}.{nameof(m_b)}", isTrainable: true);

            Hidden = new WeightTensor(new long[2] {
                batchSize, hdim
            }, 0, deviceId, name: $"{name}.{nameof(Hidden)}", isTrainable: true);
            Cell = new WeightTensor(new long[2] {
                batchSize, hdim
            }, 0, deviceId, name: $"{name}.{nameof(Cell)}", isTrainable: true);

            layerNorm1 = new LayerNormalization($"{name}.{nameof(layerNorm1)}", hdim * 4, deviceId);
            layerNorm2 = new LayerNormalization($"{name}.{nameof(layerNorm2)}", hdim, deviceId);
        }
示例#6
0
        public static IWeightTensor BuildPadSelfMask(IComputeGraph g, int paddedLength, List <int> originalLengths, int deviceId)
        {
            var buf = new float[originalLengths.Count * paddedLength * paddedLength];

            for (var i = 0; i < buf.Length; i++)
            {
                buf[i] = -1e30f;
            }

            for (var k = 0; k < originalLengths.Count; k++)
            {
                for (var i = 0; i < originalLengths[k]; i++)
                {
                    for (var j = 0; j < originalLengths[k]; j++)
                    {
                        // ReSharper disable once ArrangeRedundantParentheses
                        buf[k * (paddedLength * paddedLength) + i * paddedLength + j] = 0.0f;
                    }
                }
            }

            var tensor = new WeightTensor(new long[] { originalLengths.Count, paddedLength, paddedLength }, 0.0f, deviceId, $"TriMask_{deviceId}", false);

            tensor.SetWeightArray(buf);

            return(tensor);
        }
示例#7
0
        public static void ScatterFill(IWeightTensor res, float val, IWeightTensor indices, int dim)
        {
            WeightTensor i = indices as WeightTensor;
            WeightTensor r = res as WeightTensor;

            Ops.ScatterFill(r.TWeight, val, dim, i.TWeight);
        }
示例#8
0
        public static WeightTensor BuildPositionWeightTensor(int row, int column, int deviceId, string name = "", bool isTrainable = false)
        {
            Logger.WriteLine($"Building position weights tensor. Row = '{row}', Column = '{column}', DeviceId = '{deviceId}', Name = '{name}', Trainable = '{isTrainable}'");

            WeightTensor t = new WeightTensor(new long[2] {
                row, column
            }, deviceId, name: name, isTrainable: isTrainable, needGradient: isTrainable);

            float[] posWeights = new float[row * column];

            float numTimescales         = (float)column / 2;
            float logTimescaleIncrement = (float)(Math.Log(10000.0f) / (numTimescales - 1.0f));

            for (int p = 0; p < row; ++p)
            {
                for (int i = 0; i < numTimescales; i++)
                {
                    float v = (float)(p * Math.Exp(i * -logTimescaleIncrement));

                    posWeights[p * column + i] = (float)Math.Sin(v);
                    posWeights[p * column + (int)numTimescales + i] = (float)Math.Cos(v);
                }
            }

            t.TWeight.CopyFrom(posWeights);

            return(t);
        }
示例#9
0
        public IWeightTensor Softmax(IWeightTensor w, bool runGradients = true, bool inPlace = false)
        {
            WeightTensor m   = w as WeightTensor;
            WeightTensor res = null;

            if (inPlace)
            {
                res = m.CopyWeightsRef($"{GetHashString(w.Name)}.Softmax");
            }
            else
            {
                res = m_weightTensorFactory.CreateWeightTensor(m.Sizes, m_deviceId, name: $"{GetHashString(w.Name)}.Softmax");
            }

            VisualizeNodes(w, res);

            Ops.Softmax(res.TWeight, m.TWeight);
            if (m_needsBackprop && runGradients)
            {
                Action backward = () =>
                {
                    if (inPlace)
                    {
                        m.TGradient = res.TGradient.CopyRef();
                    }

                    m.AddSoftmaxGradient(res, inPlace);
                    res.Dispose();
                };
                m_backprop.Add(backward);
            }

            return(res);
        }
示例#10
0
    private WeightTensor BuildRandomLabelTensor(int batchSize, int categoryNum, string name)
    {
        var tensorIdx = new WeightTensor(new long[] { batchSize, 1 }, 1, 0, name: name, isTrainable: false);

        //Build ground truth labels
        float[] arrayIdx = new float[batchSize];
        for (int i = 0; i < batchSize; i++)
        {
            arrayIdx[i] = rnd.Next(0, categoryNum);
        }
        tensorIdx.SetWeightArray(arrayIdx);

        return(tensorIdx);
    }
示例#11
0
    private WeightTensor BuildRandomTensor(long[] shape, string name, bool isTrainable = true)
    {
        var tensorA = new WeightTensor(shape, 1, 0, name: name, isTrainable: isTrainable);

        //Build test data and ground truth data
        float[] arrayA = new float[tensorA.ElementCount];
        for (int i = 0; i < tensorA.ElementCount; i++)
        {
            arrayA[i] = (float)rnd.NextDouble();
        }
        tensorA.SetWeightArray(arrayA);

        return(tensorA);
    }
示例#12
0
        private void UpdateWeightsTensor(WeightTensor m, int batchSize, float step_size, float regc, int iter)
        {
            try
            {
                Ops.RMSProp(m.TWeight, m.TGradient, m_cacheName2V[m.Name], batchSize, step_size, m_clipval, regc, m_decayRate, m_smoothEps);
            }
            catch (Exception err)
            {
                Logger.WriteLine(Logger.Level.err, $"Exception: '{err.Message}'");
                Logger.WriteLine(Logger.Level.err, $"Call stack: '{err.StackTrace}'");

                throw;
            }
        }