예제 #1
0
        public virtual IWeightMatrix ConcatRows(List <IWeightMatrix> wl, bool bp = true)
        {
            List <WeightMatrix> twl = new List <WeightMatrix>();
            int sx = 0;
            int sy = 0;

            foreach (IWeightMatrix item in wl)
            {
                WeightMatrix m = item as WeightMatrix;
                sx += m.Rows;
                sy  = m.Columns;

                twl.Add(m);
            }

            var res = weightMatrixFactory.CreateWeightMatrix(sx, sy);

            int startIdx = 0;

            for (var i = 0; i < twl.Count; i++)
            {
                Array.Copy(twl[i].Weight, 0, res.Weight, startIdx, twl[i].Weight.Length);
                startIdx += twl[i].Weight.Length;
            }

            if (this.needs_backprop)
            {
                Action backward = () =>
                {
                    startIdx = 0;
                    for (var i = 0; i < twl.Count; i++)
                    {
                        var k        = 0;
                        var n        = twl[i].Gradient.Length;
                        var moreItem = (n % Vector <float> .Count);
                        var Gradient = twl[i].Gradient;

                        while (k < n - moreItem)
                        {
                            var vecResG = new Vector <float>(res.Gradient, startIdx + k);
                            var vecM1G  = new Vector <float>(Gradient, k);
                            vecM1G += vecResG;
                            vecM1G.CopyTo(Gradient, k);

                            k += Vector <float> .Count;
                        }

                        while (k < n)
                        {
                            Gradient[k] += res.Gradient[startIdx + k];
                            k++;
                        }

                        startIdx += n;
                    }
                };
                this.backprop.Add(backward);
            }

            return(res);
        }
예제 #2
0
        public virtual IWeightMatrix Softmax(IWeightMatrix src)
        {
            WeightMatrix m = src as WeightMatrix;

            var res = weightMatrixFactory.CreateWeightMatrix(m.Rows, m.Columns); // probability volume

            var maxval   = -999999.0f;
            var n        = m.Weight.Length;
            var moreItem = (n % Vector <float> .Count);

            var k         = 0;
            var vecMaxVal = new Vector <float>(maxval);

            while (k < n - moreItem)
            {
                var vecMW = new Vector <float>(m.Weight, k);
                vecMaxVal = Vector.Max(vecMW, vecMaxVal);

                k += Vector <float> .Count;
            }

            for (int i = 0; i < Vector <float> .Count; i++)
            {
                if (vecMaxVal[i] > maxval)
                {
                    maxval = vecMaxVal[i];
                }
            }


            while (k < n)
            {
                if (m.Weight[k] > maxval)
                {
                    maxval = m.Weight[k];
                }

                k++;
            }


            double s = 0.0;

            k         = 0;
            vecMaxVal = new Vector <float>(maxval);
            while (k < n - moreItem)
            {
                var vecMW = new Vector <float>(m.Weight, k);
                var vecV  = FastExp(vecMW - vecMaxVal);
                vecV.CopyTo(res.Weight, k);

                s += Vector.Dot(vecV, Vector <float> .One);

                k += Vector <float> .Count;
            }

            k = n - moreItem;
            while (k < n)
            {
                float v = FastExp(m.Weight[k] - maxval);
                res.Weight[k] = (float)v;
                s            += v;

                k++;
            }

            k = 0;
            var vecS = new Vector <float>((float)s);

            while (k < n - moreItem)
            {
                var vecResW = new Vector <float>(res.Weight, k);
                vecResW = vecResW / vecS;
                vecResW.CopyTo(res.Weight, k);

                k += Vector <float> .Count;
            }

            while (k < n)
            {
                float v = (float)(res.Weight[k] / s);
                res.Weight[k] = v;
                k++;
            }

            if (this.needs_backprop)
            {
                Action backward = () =>
                {
                    double ss = 0.0;
                    for (int i = 0; i < n; i++)
                    {
                        var v = res.Gradient[i] * res.Weight[i];
                        m.Gradient[i] += v;

                        ss += v;
                    }
                    for (int i = 0; i < n; i++)
                    {
                        m.Gradient[i] = (float)(m.Gradient[i] - ss * res.Weight[i]);
                    }
                };
                this.backprop.Add(backward);
            }

            return(res);
        }
예제 #3
0
        public virtual IWeightMatrix ConcatColumns(params IWeightMatrix[] wl)
        {
            List <WeightMatrix> twl = new List <WeightMatrix>();
            int sx = 0;
            int sy = 0;

            foreach (IWeightMatrix item in wl)
            {
                WeightMatrix m = item as WeightMatrix;
                sx  = m.Rows;
                sy += m.Columns;

                twl.Add(m);
            }

            var res = weightMatrixFactory.CreateWeightMatrix(sx, sy);

            for (var i = 0; i < sx; i++)
            {
                int startIdx = 0;
                for (var j = 0; j < twl.Count; j++)
                {
                    Array.Copy(twl[j].Weight, i * twl[j].Columns, res.Weight, i * res.Columns + startIdx, twl[j].Columns);
                    startIdx += twl[j].Columns;
                }
            }

            if (this.needs_backprop)
            {
                Action backward = () =>
                {
                    for (var i = 0; i < sx; i++)
                    {
                        int startIdx = 0;
                        for (var j = 0; j < twl.Count; j++)
                        {
                            var k         = 0;
                            var tw_j      = twl[j];
                            var moreItem  = (tw_j.Columns % Vector <float> .Count);
                            var offsetM1  = i * tw_j.Columns;
                            var offsetRes = i * res.Columns + startIdx;

                            while (k < tw_j.Columns - moreItem)
                            {
                                var vecResG = new Vector <float>(res.Gradient, offsetRes + k);
                                var vecM1G  = new Vector <float>(tw_j.Gradient, offsetM1 + k);
                                vecM1G += vecResG;
                                vecM1G.CopyTo(tw_j.Gradient, offsetM1 + k);

                                k += Vector <float> .Count;
                            }

                            while (k < twl[j].Columns)
                            {
                                tw_j.Gradient[offsetM1 + k] += res.Gradient[offsetRes + k];
                                k++;
                            }

                            startIdx += tw_j.Columns;
                        }
                    }
                };
                this.backprop.Add(backward);
            }
            return(res);
        }
예제 #4
0
        public IWeightMatrix LayerNorm(IWeightMatrix src, IWeightMatrix alpha, IWeightMatrix beta, float eps = 1e-09f)
        {
            WeightMatrix srcM   = src as WeightMatrix;
            WeightMatrix alphaM = alpha as WeightMatrix;
            WeightMatrix betaM  = beta as WeightMatrix;

            int rows = srcM.Rows;
            int cols = srcM.Columns;

            var res = weightMatrixFactory.CreateWeightMatrix(rows, cols);

            for (int j = 0; j < rows; j++)
            {
                int baseIdx = j * cols;

                var sum = 0.0f;
                for (int i = 0; i < cols; i++)
                {
                    sum += srcM.Weight[baseIdx + i];
                }

                float mean  = sum / cols;
                float sqSum = 0.0f;

                for (int i = 0; i < cols; i++)
                {
                    float ex = srcM.Weight[baseIdx + i] - mean;
                    sqSum += ex * ex;
                }

                float sigma = (float)Math.Sqrt(eps + sqSum / cols);

                for (int i = 0; i < cols; i++)
                {
                    float t = alphaM.Weight[baseIdx + i] * ((srcM.Weight[baseIdx + i] - mean) / sigma);
                    t += betaM.Weight[baseIdx + i];

                    res.Weight[baseIdx + i] = t;
                }
            }


            if (this.needs_backprop)
            {
                Action backward = () =>
                {
                    for (int j = 0; j < rows; j++)
                    {
                        float sum_x     = 0.0f;
                        float sum_adj   = 0.0f;
                        float sum_adj_x = 0.0f;
                        float sum_sqr   = 0.0f;
                        int   baseIdx   = j * cols;

                        for (int i = 0; i < cols; i++)
                        {
                            sum_x     += srcM.Weight[baseIdx + i];
                            sum_adj_x += res.Gradient[baseIdx + i] * (res.Weight[baseIdx + i] - betaM.Weight[baseIdx + i]);
                            sum_adj   += res.Gradient[baseIdx + i];
                        }

                        float mean = sum_x / cols;

                        for (int i = 0; i < cols; i++)
                        {
                            float ex = srcM.Weight[baseIdx + i] - mean;
                            sum_sqr += ex * ex;
                        }

                        float sigma = (float)Math.Sqrt(eps + sum_sqr / cols);

                        for (int i = 0; i < cols; i++)
                        {
                            float grad_x = 0.0f;

                            float x_hat = (res.Weight[baseIdx + i] - betaM.Weight[baseIdx + i]) / alphaM.Weight[baseIdx + i];

                            grad_x += cols * res.Gradient[baseIdx + i];
                            grad_x -= sum_adj;
                            grad_x -= sum_adj_x * x_hat;
                            grad_x /= cols * sigma;

                            srcM.Gradient[baseIdx + i]   += alphaM.Weight[baseIdx + i] * grad_x;
                            alphaM.Gradient[baseIdx + i] += res.Gradient[baseIdx + i] * x_hat;
                            betaM.Gradient[baseIdx + i]  += res.Gradient[baseIdx + i];
                        }
                    }
                };
                this.backprop.Add(backward);
            }


            return(res);
        }
예제 #5
0
        public virtual IWeightMatrix SoftmaxWithCrossEntropy(IWeightMatrix src)
        {
            WeightMatrix m = src as WeightMatrix;

            var res = weightMatrixFactory.CreateWeightMatrix(m.Rows, m.Columns); // probability volume

            var maxval   = -999999.0f;
            var n        = m.Weight.Length;
            var moreItem = (n % Vector <float> .Count);

            var k         = 0;
            var vecMaxVal = new Vector <float>(maxval);

            while (k < n - moreItem)
            {
                var vecMW = new Vector <float>(m.Weight, k);
                vecMaxVal = Vector.Max(vecMW, vecMaxVal);

                k += Vector <float> .Count;
            }

            for (int i = 0; i < Vector <float> .Count; i++)
            {
                if (vecMaxVal[i] > maxval)
                {
                    maxval = vecMaxVal[i];
                }
            }


            while (k < n)
            {
                if (m.Weight[k] > maxval)
                {
                    maxval = m.Weight[k];
                }

                k++;
            }


            double s = 0.0;

            k         = 0;
            vecMaxVal = new Vector <float>(maxval);
            while (k < n - moreItem)
            {
                var vecMW = new Vector <float>(m.Weight, k);
                var vecV  = FastExp(vecMW - vecMaxVal);
                vecV.CopyTo(res.Weight, k);

                s += Vector.Dot(vecV, Vector <float> .One);

                k += Vector <float> .Count;
            }

            k = n - moreItem;
            while (k < n)
            {
                float v = FastExp(m.Weight[k] - maxval);
                res.Weight[k] = (float)v;
                s            += v;

                k++;
            }


            k = 0;
            var vecS = new Vector <float>((float)s);

            while (k < n - moreItem)
            {
                var vecResW = new Vector <float>(res.Weight, k);
                vecResW = vecResW / vecS;
                vecResW.CopyTo(res.Weight, k);

                k += Vector <float> .Count;
            }

            while (k < n)
            {
                float v = (float)(res.Weight[k] / s);
                res.Weight[k] = v;
                k++;
            }


            // no backward pass here needed
            // since we will use the computed probabilities outside
            // to set gradients directly on m
            return(res);
        }
예제 #6
0
        public virtual WeightMatrix MulAdd(WeightMatrix m1, WeightMatrix m2, WeightMatrix m3)
        {
            var n          = m1.Rows;
            var d          = m2.Columns;
            var res        = weightMatrixFactory.CreateWeightMatrix(n, d);
            var moreItemsD = (d % Vector <float> .Count);

            //            Parallel.For(0, m1.Rows, i =>
            for (int i = 0; i < m1.Rows; i++)
            {
                // loop over rows of m1

                var m1BaseIndex    = d * i;
                var m1ColBaseIndex = m1.Columns * i;

                Array.Copy(m3.Weight, m1BaseIndex, res.Weight, m1BaseIndex, d);

                for (var k = 0; k < m1.Columns; k++)
                { // dot product loop
                    var j           = 0;
                    var m1w         = m1.Weight[m1ColBaseIndex + k];
                    var m2BaseIndex = m2.Columns * k;

                    while (j < d - moreItemsD)
                    {
                        int offset = m1BaseIndex + j;

                        var vecM2W       = new Vector <float>(m2.Weight, m2BaseIndex + j);
                        var vecResWeight = new Vector <float>(res.Weight, offset);
                        vecResWeight += m1w * vecM2W;

                        vecResWeight.CopyTo(res.Weight, offset);

                        j += Vector <float> .Count;
                    }

                    while (j < d)
                    {
                        res.Weight[m1BaseIndex + j] += m1w * m2.Weight[m2BaseIndex + j];
                        j++;
                    }
                }
            }//);

            if (this.needs_backprop)
            {
                Action backward = () =>
                {
                    //                    Parallel.For(0, m1.Rows, i =>
                    for (int i = 0; i < m1.Rows; i++)
                    {
                        // loop over rows of m1

                        var resBaseIndex = d * i;
                        var m1BaseIndex  = m1.Columns * i;

                        var j = 0;
                        while (j < d - moreItemsD)
                        {
                            int offset  = resBaseIndex + j;
                            var vecResG = new Vector <float>(res.Gradient, offset);
                            var vecM3G  = new Vector <float>(m3.Gradient, offset);
                            vecM3G += vecResG;
                            vecM3G.CopyTo(m3.Gradient, offset);

                            j += Vector <float> .Count;
                        }

                        while (j < d)
                        {
                            int offset = resBaseIndex + j;
                            m3.Gradient[offset] += res.Gradient[offset];

                            j++;
                        }

                        // loop over cols of m2
                        for (var k = 0; k < m1.Columns; k++)
                        {
                            var m1GIndex     = m1BaseIndex + k;
                            var m2GBaseIndex = m2.Columns * k;
                            var m1G          = 0.0f;
                            var m1W          = m1.Weight[m1GIndex];

                            j = 0;
                            while (j < d - moreItemsD)
                            {
                                int m2Index = m2GBaseIndex + j;
                                int offset  = resBaseIndex + j;
                                var vecResG = new Vector <float>(res.Gradient, offset);
                                var vecM2W  = new Vector <float>(m2.Weight, m2Index);
                                var vecM2G  = new Vector <float>(m2.Gradient, m2Index);

                                m1G += Vector.Dot(vecM2W, vecResG);

                                vecM2G += m1W * vecResG;
                                vecM2G.CopyTo(m2.Gradient, m2Index);


                                j += Vector <float> .Count;
                            }

                            while (j < d)
                            {
                                int m2Index = m2GBaseIndex + j;
                                var b       = res.Gradient[resBaseIndex + j];

                                m1G += m2.Weight[m2Index] * b;
                                m2.Gradient[m2Index] += m1W * b;
                                j++;
                            }

                            m1.Gradient[m1GIndex] += m1G;
                        }
                    }//);
                };
                this.backprop.Add(backward);
            }
            return(res);
        }
예제 #7
0
        private void UpdateWeightsCPU(float step_size, float regc, float clipval, Vector <float> vecMaxClipval, Vector <float> vecMinClipval, WeightMatrix m)
        {
            if (m.RowToBeUpdated.Count == 0)
            {
                UpdateWeights(step_size, regc, clipval, m, vecMaxClipval, vecMinClipval, m.Weight.Length, 0);
            }
            else
            {
                foreach (var kv in m.RowToBeUpdated)
                {
                    int rowId = kv.Key;
                    UpdateWeights(step_size, regc, clipval, m, vecMaxClipval, vecMinClipval, m.Columns, rowId * m.Columns);
                }

                m.RowToBeUpdated.Clear();
            }
        }
예제 #8
0
        private void UpdateWeights(float step_size, float regc, float clipval, WeightMatrix m, Vector <float> vecMaxClipval, Vector <float> vecMinClipval, int n, int i)
        {
            var s         = m.Cash;
            var l         = m.LrW;
            var vecBaseLR = new Vector <float>(step_size);

            var moreItems = (n % Vector <float> .Count);

            while (i < n - moreItems)
            {
                var vecMDWI = new Vector <float>(m.Gradient, i);

                vecMDWI = Vector.Min(vecMDWI, vecMaxClipval);
                vecMDWI = Vector.Max(vecMDWI, vecMinClipval);

                var vecS = new Vector <float>(s, i);
                vecS = vecS * vecDecayRate + (Vector <float> .One - vecDecayRate) * vecMDWI * vecMDWI;
                vecS.CopyTo(s, i);

                var vecMDWIDelta = vecMDWI / Vector.SquareRoot(vecS + vecSmoothEPS);
                var vecLRWeight  = new Vector <float>(l, i);
                var vecLR        = ComputeLearningRate(vecMDWIDelta, ref vecLRWeight, vecBaseLR);
                vecLRWeight.CopyTo(l, i);

                var vecMW    = new Vector <float>(m.Weight, i);
                var vecDelta = -vecLR * vecMDWIDelta - regc * vecMW;

                vecMW += vecDelta;
                vecMW.CopyTo(m.Weight, i);

                i += Vector <float> .Count;
            }

            while (i < n)
            {
                // rmsprop adaptive learning rate
                var mdwi = m.Gradient[i];
                // gradient clip
                if (mdwi > clipval)
                {
                    mdwi = clipval;
                }
                if (mdwi < -clipval)
                {
                    mdwi = -clipval;
                }

                s[i] = (float)(s[i] * decay_rate + (1.0 - decay_rate) * mdwi * mdwi);

                var wDelta = (float)(mdwi / Math.Sqrt(s[i] + smooth_eps));
                var lr     = ComputeLearningRate(wDelta, l, i, step_size);

                var delta = (float)(-lr * wDelta - regc * m.Weight[i]);

                // update (and regularize)
                m.Weight[i] += delta;


                i++;
            }
        }
예제 #9
0
        public void CopyWeights(IWeightMatrix src)
        {
            WeightMatrix m = src as WeightMatrix;

            Array.Copy(m.Weight, Weight, m.Weight.Length);
        }
예제 #10
0
        public WeightMatrix Step(SparseWeightMatrix sparseInput, WeightMatrix context, WeightMatrix input, IComputeGraph innerGraph)
        {
            var hidden_prev = ht;
            var cell_prev   = ct;

            var          cell        = this;
            WeightMatrix input_gate  = null;
            WeightMatrix forget_gate = null;
            WeightMatrix output_gate = null;
            WeightMatrix cell_write  = null;

            Parallel.Invoke(
                () =>
            {
                var h0  = innerGraph.mul(input, cell.Wix);
                var h1  = innerGraph.mul(hidden_prev, cell.Wih);
                var h11 = innerGraph.mul(context, cell.WiC);

                if (sdim > 0)
                {
                    var h111   = innerGraph.mul(sparseInput, cell.WiS);
                    input_gate = innerGraph.addsigmoid(h0, h1, h11, h111, cell.bi);
                }
                else
                {
                    input_gate = innerGraph.addsigmoid(h0, h1, h11, cell.bi);
                }
            },
                () =>
            {
                var h2  = innerGraph.mul(input, cell.Wfx);
                var h3  = innerGraph.mul(hidden_prev, cell.Wfh);
                var h33 = innerGraph.mul(context, cell.WfC);

                if (sdim > 0)
                {
                    var h333    = innerGraph.mul(sparseInput, cell.WfS);
                    forget_gate = innerGraph.addsigmoid(h3, h2, h33, h333, cell.bf);
                }
                else
                {
                    forget_gate = innerGraph.addsigmoid(h3, h2, h33, cell.bf);
                }
            },
                () =>
            {
                var h4  = innerGraph.mul(input, cell.Wox);
                var h5  = innerGraph.mul(hidden_prev, cell.Woh);
                var h55 = innerGraph.mul(context, cell.WoC);

                if (sdim > 0)
                {
                    var h555    = innerGraph.mul(sparseInput, cell.WoS);
                    output_gate = innerGraph.addsigmoid(h5, h4, h55, h555, cell.bo);
                }
                else
                {
                    output_gate = innerGraph.addsigmoid(h5, h4, h55, cell.bo);
                }
            },
                () =>
            {
                var h6  = innerGraph.mul(input, cell.Wcx);
                var h7  = innerGraph.mul(hidden_prev, cell.Wch);
                var h77 = innerGraph.mul(context, cell.WcC);

                if (sdim > 0)
                {
                    var h777   = innerGraph.mul(sparseInput, cell.WcS);
                    cell_write = innerGraph.addtanh(h7, h6, h77, h777, cell.bc);
                }
                else
                {
                    cell_write = innerGraph.addtanh(h7, h6, h77, cell.bc);
                }
            });

            // compute new cell activation
            var retain_cell = innerGraph.eltmul(forget_gate, cell_prev); // what do we keep from cell
            var write_cell  = innerGraph.eltmul(input_gate, cell_write); // what do we write to cell
            var cell_d      = innerGraph.add(retain_cell, write_cell);   // new cell contents

            // compute hidden state as gated, saturated cell activations
            var hidden_d = innerGraph.eltmul(output_gate, innerGraph.tanh(cell_d));

            this.ht = hidden_d;
            this.ct = cell_d;

            return(ht);
        }