/// <summary>Creates a random context matrix.</summary> /// <remarks> /// Creates a random context matrix. This will be numRows x /// 2*numCols big. These can be appended to the end of either a /// unary or binary transform matrix to get the transform matrix /// which uses context words. /// </remarks> private SimpleMatrix RandomContextMatrix() { SimpleMatrix matrix = new SimpleMatrix(numRows, numCols * 2); matrix.InsertIntoThis(0, 0, identity.Scale(op.trainOptions.scalingForInit * 0.1)); matrix.InsertIntoThis(0, numCols, identity.Scale(op.trainOptions.scalingForInit * 0.1)); matrix = matrix.Plus(SimpleMatrix.Random(numRows, numCols * 2, -1.0 / Math.Sqrt((double)numCols * 100.0), 1.0 / Math.Sqrt((double)numCols * 100.0), rand)); return(matrix); }
private static double ScaleAndRegularize(IDictionary <string, SimpleMatrix> derivatives, IDictionary <string, SimpleMatrix> currentMatrices, double scale, double regCost, bool activeMatricesOnly, bool dropBiasColumn) { double cost = 0.0; // the regularization cost foreach (KeyValuePair <string, SimpleMatrix> entry in currentMatrices) { SimpleMatrix D = derivatives[entry.Key]; if (activeMatricesOnly && D == null) { // Fill in an emptpy matrix so the length of theta can match. // TODO: might want to allow for sparse parameter vectors derivatives[entry.Key] = new SimpleMatrix(entry.Value.NumRows(), entry.Value.NumCols()); continue; } SimpleMatrix regMatrix = entry.Value; if (dropBiasColumn) { regMatrix = new SimpleMatrix(regMatrix); regMatrix.InsertIntoThis(0, regMatrix.NumCols() - 1, new SimpleMatrix(regMatrix.NumRows(), 1)); } D = D.Scale(scale).Plus(regMatrix.Scale(regCost)); derivatives[entry.Key] = D; cost += regMatrix.ElementMult(regMatrix).ElementSum() * regCost / 2.0; } return(cost); }
public virtual void AddRandomBinaryMatrix(string leftBasic, string rightBasic) { if (binaryTransform.Get(leftBasic, rightBasic) != null) { return; } ++numBinaryMatrices; // scoring matrix SimpleMatrix score = SimpleMatrix.Random(1, numCols, -1.0 / Math.Sqrt((double)numCols), 1.0 / Math.Sqrt((double)numCols), rand); binaryScore.Put(leftBasic, rightBasic, score.Scale(op.trainOptions.scalingForInit)); SimpleMatrix binary; if (op.trainOptions.useContextWords) { binary = new SimpleMatrix(numRows, numCols * 4 + 1); // leave room for bias term binary.InsertIntoThis(0, numCols * 2 + 1, RandomContextMatrix()); } else { binary = new SimpleMatrix(numRows, numCols * 2 + 1); } SimpleMatrix left = RandomTransformMatrix(); SimpleMatrix right = RandomTransformMatrix(); binary.InsertIntoThis(0, 0, left); binary.InsertIntoThis(0, numCols, right); binaryTransform.Put(leftBasic, rightBasic, binary.Scale(op.trainOptions.scalingForInit)); }
public virtual void AddRandomUnaryMatrix(string childBasic) { if (unaryTransform[childBasic] != null) { return; } ++numUnaryMatrices; // scoring matrix SimpleMatrix score = SimpleMatrix.Random(1, numCols, -1.0 / Math.Sqrt((double)numCols), 1.0 / Math.Sqrt((double)numCols), rand); unaryScore[childBasic] = score.Scale(op.trainOptions.scalingForInit); SimpleMatrix transform; if (op.trainOptions.useContextWords) { transform = new SimpleMatrix(numRows, numCols * 3 + 1); // leave room for bias term transform.InsertIntoThis(0, numCols + 1, RandomContextMatrix()); } else { transform = new SimpleMatrix(numRows, numCols + 1); } SimpleMatrix unary = RandomTransformMatrix(); transform.InsertIntoThis(0, 0, unary); unaryTransform[childBasic] = transform.Scale(op.trainOptions.scalingForInit); }
internal virtual SimpleMatrix RandomTransformMatrix() { SimpleMatrix binary = new SimpleMatrix(numHid, numHid * 2 + 1); // bias column values are initialized zero binary.InsertIntoThis(0, 0, RandomTransformBlock()); binary.InsertIntoThis(0, numHid, RandomTransformBlock()); return(binary.Scale(op.trainOptions.scalingForInit)); }
/// <summary>Returns matrices of the right size for either binary or unary (terminal) classification</summary> internal virtual SimpleMatrix RandomClassificationMatrix() { SimpleMatrix score = new SimpleMatrix(numClasses, numHid + 1); double range = 1.0 / (Math.Sqrt((double)numHid)); score.InsertIntoThis(0, 0, SimpleMatrix.Random(numClasses, numHid, -range, range, rand)); // bias column goes from 0 to 1 initially score.InsertIntoThis(0, numHid, SimpleMatrix.Random(numClasses, 1, 0.0, 1.0, rand)); return(score.Scale(op.trainOptions.scalingForInit)); }
private static SimpleTensor GetTensorGradient(SimpleMatrix deltaFull, SimpleMatrix leftVector, SimpleMatrix rightVector) { int size = deltaFull.GetNumElements(); SimpleTensor Wt_df = new SimpleTensor(size * 2, size * 2, size); // TODO: combine this concatenation with computeTensorDeltaDown? SimpleMatrix fullVector = NeuralUtils.Concatenate(leftVector, rightVector); for (int slice = 0; slice < size; ++slice) { Wt_df.SetSlice(slice, fullVector.Scale(deltaFull.Get(slice)).Mult(fullVector.Transpose())); } return(Wt_df); }
private static SimpleMatrix ComputeTensorDeltaDown(SimpleMatrix deltaFull, SimpleMatrix leftVector, SimpleMatrix rightVector, SimpleMatrix W, SimpleTensor Wt) { SimpleMatrix WTDelta = W.Transpose().Mult(deltaFull); SimpleMatrix WTDeltaNoBias = WTDelta.ExtractMatrix(0, deltaFull.NumRows() * 2, 0, 1); int size = deltaFull.GetNumElements(); SimpleMatrix deltaTensor = new SimpleMatrix(size * 2, 1); SimpleMatrix fullVector = NeuralUtils.Concatenate(leftVector, rightVector); for (int slice = 0; slice < size; ++slice) { SimpleMatrix scaledFullVector = fullVector.Scale(deltaFull.Get(slice)); deltaTensor = deltaTensor.Plus(Wt.GetSlice(slice).Plus(Wt.GetSlice(slice).Transpose()).Mult(scaledFullVector)); } return(deltaTensor.Plus(WTDeltaNoBias)); }
/// <summary>Applies softmax to all of the elements of the matrix.</summary> /// <remarks> /// Applies softmax to all of the elements of the matrix. The return /// matrix will have all of its elements sum to 1. If your matrix is /// not already a vector, be sure this is what you actually want. /// </remarks> public static SimpleMatrix Softmax(SimpleMatrix input) { SimpleMatrix output = new SimpleMatrix(input); for (int i = 0; i < output.NumRows(); ++i) { for (int j = 0; j < output.NumCols(); ++j) { output.Set(i, j, Math.Exp(output.Get(i, j))); } } double sum = output.ElementSum(); // will be safe, since exp should never return 0 return(output.Scale(1.0 / sum)); }
private static double ScaleAndRegularize(TwoDimensionalMap <string, string, SimpleMatrix> derivatives, TwoDimensionalMap <string, string, SimpleMatrix> currentMatrices, double scale, double regCost, bool dropBiasColumn) { double cost = 0.0; // the regularization cost foreach (TwoDimensionalMap.Entry <string, string, SimpleMatrix> entry in currentMatrices) { SimpleMatrix D = derivatives.Get(entry.GetFirstKey(), entry.GetSecondKey()); SimpleMatrix regMatrix = entry.GetValue(); if (dropBiasColumn) { regMatrix = new SimpleMatrix(regMatrix); regMatrix.InsertIntoThis(0, regMatrix.NumCols() - 1, new SimpleMatrix(regMatrix.NumRows(), 1)); } D = D.Scale(scale).Plus(regMatrix.Scale(regCost)); derivatives.Put(entry.GetFirstKey(), entry.GetSecondKey(), D); cost += regMatrix.ElementMult(regMatrix).ElementSum() * regCost / 2.0; } return(cost); }