/// <summary>Creates a random context matrix.</summary>
        /// <remarks>
        /// Creates a random context matrix.  This will be numRows x
        /// 2*numCols big.  These can be appended to the end of either a
        /// unary or binary transform matrix to get the transform matrix
        /// which uses context words.
        /// </remarks>
        private SimpleMatrix RandomContextMatrix()
        {
            SimpleMatrix matrix = new SimpleMatrix(numRows, numCols * 2);

            matrix.InsertIntoThis(0, 0, identity.Scale(op.trainOptions.scalingForInit * 0.1));
            matrix.InsertIntoThis(0, numCols, identity.Scale(op.trainOptions.scalingForInit * 0.1));
            matrix = matrix.Plus(SimpleMatrix.Random(numRows, numCols * 2, -1.0 / Math.Sqrt((double)numCols * 100.0), 1.0 / Math.Sqrt((double)numCols * 100.0), rand));
            return(matrix);
        }
        private static double ScaleAndRegularize(IDictionary <string, SimpleMatrix> derivatives, IDictionary <string, SimpleMatrix> currentMatrices, double scale, double regCost, bool activeMatricesOnly, bool dropBiasColumn)
        {
            double cost = 0.0;

            // the regularization cost
            foreach (KeyValuePair <string, SimpleMatrix> entry in currentMatrices)
            {
                SimpleMatrix D = derivatives[entry.Key];
                if (activeMatricesOnly && D == null)
                {
                    // Fill in an emptpy matrix so the length of theta can match.
                    // TODO: might want to allow for sparse parameter vectors
                    derivatives[entry.Key] = new SimpleMatrix(entry.Value.NumRows(), entry.Value.NumCols());
                    continue;
                }
                SimpleMatrix regMatrix = entry.Value;
                if (dropBiasColumn)
                {
                    regMatrix = new SimpleMatrix(regMatrix);
                    regMatrix.InsertIntoThis(0, regMatrix.NumCols() - 1, new SimpleMatrix(regMatrix.NumRows(), 1));
                }
                D = D.Scale(scale).Plus(regMatrix.Scale(regCost));
                derivatives[entry.Key] = D;
                cost += regMatrix.ElementMult(regMatrix).ElementSum() * regCost / 2.0;
            }
            return(cost);
        }
        public virtual void AddRandomBinaryMatrix(string leftBasic, string rightBasic)
        {
            if (binaryTransform.Get(leftBasic, rightBasic) != null)
            {
                return;
            }
            ++numBinaryMatrices;
            // scoring matrix
            SimpleMatrix score = SimpleMatrix.Random(1, numCols, -1.0 / Math.Sqrt((double)numCols), 1.0 / Math.Sqrt((double)numCols), rand);

            binaryScore.Put(leftBasic, rightBasic, score.Scale(op.trainOptions.scalingForInit));
            SimpleMatrix binary;

            if (op.trainOptions.useContextWords)
            {
                binary = new SimpleMatrix(numRows, numCols * 4 + 1);
                // leave room for bias term
                binary.InsertIntoThis(0, numCols * 2 + 1, RandomContextMatrix());
            }
            else
            {
                binary = new SimpleMatrix(numRows, numCols * 2 + 1);
            }
            SimpleMatrix left  = RandomTransformMatrix();
            SimpleMatrix right = RandomTransformMatrix();

            binary.InsertIntoThis(0, 0, left);
            binary.InsertIntoThis(0, numCols, right);
            binaryTransform.Put(leftBasic, rightBasic, binary.Scale(op.trainOptions.scalingForInit));
        }
        public virtual void AddRandomUnaryMatrix(string childBasic)
        {
            if (unaryTransform[childBasic] != null)
            {
                return;
            }
            ++numUnaryMatrices;
            // scoring matrix
            SimpleMatrix score = SimpleMatrix.Random(1, numCols, -1.0 / Math.Sqrt((double)numCols), 1.0 / Math.Sqrt((double)numCols), rand);

            unaryScore[childBasic] = score.Scale(op.trainOptions.scalingForInit);
            SimpleMatrix transform;

            if (op.trainOptions.useContextWords)
            {
                transform = new SimpleMatrix(numRows, numCols * 3 + 1);
                // leave room for bias term
                transform.InsertIntoThis(0, numCols + 1, RandomContextMatrix());
            }
            else
            {
                transform = new SimpleMatrix(numRows, numCols + 1);
            }
            SimpleMatrix unary = RandomTransformMatrix();

            transform.InsertIntoThis(0, 0, unary);
            unaryTransform[childBasic] = transform.Scale(op.trainOptions.scalingForInit);
        }
        internal virtual SimpleMatrix RandomTransformMatrix()
        {
            SimpleMatrix binary = new SimpleMatrix(numHid, numHid * 2 + 1);

            // bias column values are initialized zero
            binary.InsertIntoThis(0, 0, RandomTransformBlock());
            binary.InsertIntoThis(0, numHid, RandomTransformBlock());
            return(binary.Scale(op.trainOptions.scalingForInit));
        }
        /// <summary>Returns matrices of the right size for either binary or unary (terminal) classification</summary>
        internal virtual SimpleMatrix RandomClassificationMatrix()
        {
            SimpleMatrix score = new SimpleMatrix(numClasses, numHid + 1);
            double       range = 1.0 / (Math.Sqrt((double)numHid));

            score.InsertIntoThis(0, 0, SimpleMatrix.Random(numClasses, numHid, -range, range, rand));
            // bias column goes from 0 to 1 initially
            score.InsertIntoThis(0, numHid, SimpleMatrix.Random(numClasses, 1, 0.0, 1.0, rand));
            return(score.Scale(op.trainOptions.scalingForInit));
        }
        private static SimpleTensor GetTensorGradient(SimpleMatrix deltaFull, SimpleMatrix leftVector, SimpleMatrix rightVector)
        {
            int          size  = deltaFull.GetNumElements();
            SimpleTensor Wt_df = new SimpleTensor(size * 2, size * 2, size);
            // TODO: combine this concatenation with computeTensorDeltaDown?
            SimpleMatrix fullVector = NeuralUtils.Concatenate(leftVector, rightVector);

            for (int slice = 0; slice < size; ++slice)
            {
                Wt_df.SetSlice(slice, fullVector.Scale(deltaFull.Get(slice)).Mult(fullVector.Transpose()));
            }
            return(Wt_df);
        }
        private static SimpleMatrix ComputeTensorDeltaDown(SimpleMatrix deltaFull, SimpleMatrix leftVector, SimpleMatrix rightVector, SimpleMatrix W, SimpleTensor Wt)
        {
            SimpleMatrix WTDelta       = W.Transpose().Mult(deltaFull);
            SimpleMatrix WTDeltaNoBias = WTDelta.ExtractMatrix(0, deltaFull.NumRows() * 2, 0, 1);
            int          size          = deltaFull.GetNumElements();
            SimpleMatrix deltaTensor   = new SimpleMatrix(size * 2, 1);
            SimpleMatrix fullVector    = NeuralUtils.Concatenate(leftVector, rightVector);

            for (int slice = 0; slice < size; ++slice)
            {
                SimpleMatrix scaledFullVector = fullVector.Scale(deltaFull.Get(slice));
                deltaTensor = deltaTensor.Plus(Wt.GetSlice(slice).Plus(Wt.GetSlice(slice).Transpose()).Mult(scaledFullVector));
            }
            return(deltaTensor.Plus(WTDeltaNoBias));
        }
示例#9
0
        /// <summary>Applies softmax to all of the elements of the matrix.</summary>
        /// <remarks>
        /// Applies softmax to all of the elements of the matrix.  The return
        /// matrix will have all of its elements sum to 1.  If your matrix is
        /// not already a vector, be sure this is what you actually want.
        /// </remarks>
        public static SimpleMatrix Softmax(SimpleMatrix input)
        {
            SimpleMatrix output = new SimpleMatrix(input);

            for (int i = 0; i < output.NumRows(); ++i)
            {
                for (int j = 0; j < output.NumCols(); ++j)
                {
                    output.Set(i, j, Math.Exp(output.Get(i, j)));
                }
            }
            double sum = output.ElementSum();

            // will be safe, since exp should never return 0
            return(output.Scale(1.0 / sum));
        }
        private static double ScaleAndRegularize(TwoDimensionalMap <string, string, SimpleMatrix> derivatives, TwoDimensionalMap <string, string, SimpleMatrix> currentMatrices, double scale, double regCost, bool dropBiasColumn)
        {
            double cost = 0.0;

            // the regularization cost
            foreach (TwoDimensionalMap.Entry <string, string, SimpleMatrix> entry in currentMatrices)
            {
                SimpleMatrix D         = derivatives.Get(entry.GetFirstKey(), entry.GetSecondKey());
                SimpleMatrix regMatrix = entry.GetValue();
                if (dropBiasColumn)
                {
                    regMatrix = new SimpleMatrix(regMatrix);
                    regMatrix.InsertIntoThis(0, regMatrix.NumCols() - 1, new SimpleMatrix(regMatrix.NumRows(), 1));
                }
                D = D.Scale(scale).Plus(regMatrix.Scale(regCost));
                derivatives.Put(entry.GetFirstKey(), entry.GetSecondKey(), D);
                cost += regMatrix.ElementMult(regMatrix).ElementSum() * regCost / 2.0;
            }
            return(cost);
        }