public AbstractEvaluate(RNNOptions options) { // the matrix will be [gold][predicted] // TODO: make this an option this.op = options; this.Reset(); }
/// <summary> /// Expected arguments are /// <c>-gold gold -predicted predicted</c> /// For example <br /> /// <c>java edu.stanford.nlp.sentiment.ExternalEvaluate annotatedTrees.txt predictedTrees.txt</c> /// </summary> public static void Main(string[] args) { RNNOptions curOptions = new RNNOptions(); string goldPath = null; string predictedPath = null; for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-gold")) { goldPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-predicted")) { predictedPath = args[argIndex + 1]; argIndex += 2; } else { int newArgIndex = curOptions.SetOption(args, argIndex); if (newArgIndex == argIndex) { throw new ArgumentException("Unknown argument " + args[argIndex]); } argIndex = newArgIndex; } } } if (goldPath == null) { log.Info("goldPath not set. Exit."); System.Environment.Exit(-1); } if (predictedPath == null) { log.Info("predictedPath not set. Exit."); System.Environment.Exit(-1); } // filterUnknown not supported because I'd need to know which sentences // are removed to remove them from predicted IList <Tree> goldTrees = SentimentUtils.ReadTreesWithGoldLabels(goldPath); IList <Tree> predictedTrees = SentimentUtils.ReadTreesWithPredictedLabels(predictedPath); Edu.Stanford.Nlp.Sentiment.ExternalEvaluate evaluator = new Edu.Stanford.Nlp.Sentiment.ExternalEvaluate(curOptions, predictedTrees); evaluator.Eval(goldTrees); evaluator.PrintSummary(); }
private SentimentModel(TwoDimensionalMap <string, string, SimpleMatrix> binaryTransform, TwoDimensionalMap <string, string, SimpleTensor> binaryTensors, TwoDimensionalMap <string, string, SimpleMatrix> binaryClassification, IDictionary <string, SimpleMatrix> unaryClassification, IDictionary <string, SimpleMatrix> wordVectors, RNNOptions op) { this.op = op; this.binaryTransform = binaryTransform; this.binaryTensors = binaryTensors; this.binaryClassification = binaryClassification; this.unaryClassification = unaryClassification; this.wordVectors = wordVectors; this.numClasses = op.numClasses; if (op.numHid <= 0) { int nh = 0; foreach (SimpleMatrix wv in wordVectors.Values) { nh = wv.GetNumElements(); } this.numHid = nh; } else { this.numHid = op.numHid; } this.numBinaryMatrices = binaryTransform.Size(); binaryTransformSize = numHid * (2 * numHid + 1); if (op.useTensors) { binaryTensorSize = numHid * numHid * numHid * 4; } else { binaryTensorSize = 0; } binaryClassificationSize = (op.combineClassification) ? 0 : numClasses * (numHid + 1); numUnaryMatrices = unaryClassification.Count; unaryClassificationSize = numClasses * (numHid + 1); rand = new Random(op.randomSeed); identity = SimpleMatrix.Identity(numHid); }
/// <summary>The traditional way of initializing an empty model suitable for training.</summary> public SentimentModel(RNNOptions op, IList <Tree> trainingTrees) { this.op = op; rand = new Random(op.randomSeed); if (op.randomWordVectors) { InitRandomWordVectors(trainingTrees); } else { ReadWordVectors(); } if (op.numHid > 0) { this.numHid = op.numHid; } else { int size = 0; foreach (SimpleMatrix vector in wordVectors.Values) { size = vector.GetNumElements(); break; } this.numHid = size; } TwoDimensionalSet <string, string> binaryProductions = TwoDimensionalSet.HashSet(); if (op.simplifiedModel) { binaryProductions.Add(string.Empty, string.Empty); } else { // TODO // figure out what binary productions we have in these trees // Note: the current sentiment training data does not actually // have any constituent labels throw new NotSupportedException("Not yet implemented"); } ICollection <string> unaryProductions = Generics.NewHashSet(); if (op.simplifiedModel) { unaryProductions.Add(string.Empty); } else { // TODO // figure out what unary productions we have in these trees (preterminals only, after the collapsing) throw new NotSupportedException("Not yet implemented"); } this.numClasses = op.numClasses; identity = SimpleMatrix.Identity(numHid); binaryTransform = TwoDimensionalMap.TreeMap(); binaryTensors = TwoDimensionalMap.TreeMap(); binaryClassification = TwoDimensionalMap.TreeMap(); // When making a flat model (no symantic untying) the // basicCategory function will return the same basic category for // all labels, so all entries will map to the same matrix foreach (Pair <string, string> binary in binaryProductions) { string left = BasicCategory(binary.first); string right = BasicCategory(binary.second); if (binaryTransform.Contains(left, right)) { continue; } binaryTransform.Put(left, right, RandomTransformMatrix()); if (op.useTensors) { binaryTensors.Put(left, right, RandomBinaryTensor()); } if (!op.combineClassification) { binaryClassification.Put(left, right, RandomClassificationMatrix()); } } numBinaryMatrices = binaryTransform.Size(); binaryTransformSize = numHid * (2 * numHid + 1); if (op.useTensors) { binaryTensorSize = numHid * numHid * numHid * 4; } else { binaryTensorSize = 0; } binaryClassificationSize = (op.combineClassification) ? 0 : numClasses * (numHid + 1); unaryClassification = Generics.NewTreeMap(); // When making a flat model (no symantic untying) the // basicCategory function will return the same basic category for // all labels, so all entries will map to the same matrix foreach (string unary in unaryProductions) { unary = BasicCategory(unary); if (unaryClassification.Contains(unary)) { continue; } unaryClassification[unary] = RandomClassificationMatrix(); } numUnaryMatrices = unaryClassification.Count; unaryClassificationSize = numClasses * (numHid + 1); }
/* * // An example of how you could read in old models with readObject to fix the serialization * // You would first read in the old model, then reserialize it * private void readObject(ObjectInputStream in) * throws IOException, ClassNotFoundException * { * ObjectInputStream.GetField fields = in.readFields(); * binaryTransform = ErasureUtils.uncheckedCast(fields.get("binaryTransform", null)); * * // transform binaryTensors * binaryTensors = TwoDimensionalMap.treeMap(); * TwoDimensionalMap<String, String, edu.stanford.nlp.rnn.SimpleTensor> oldTensors = ErasureUtils.uncheckedCast(fields.get("binaryTensors", null)); * for (String first : oldTensors.firstKeySet()) { * for (String second : oldTensors.get(first).keySet()) { * binaryTensors.put(first, second, new SimpleTensor(oldTensors.get(first, second).slices)); * } * } * * binaryClassification = ErasureUtils.uncheckedCast(fields.get("binaryClassification", null)); * unaryClassification = ErasureUtils.uncheckedCast(fields.get("unaryClassification", null)); * wordVectors = ErasureUtils.uncheckedCast(fields.get("wordVectors", null)); * * if (fields.defaulted("numClasses")) { * throw new RuntimeException(); * } * numClasses = fields.get("numClasses", 0); * * if (fields.defaulted("numHid")) { * throw new RuntimeException(); * } * numHid = fields.get("numHid", 0); * * if (fields.defaulted("numBinaryMatrices")) { * throw new RuntimeException(); * } * numBinaryMatrices = fields.get("numBinaryMatrices", 0); * * if (fields.defaulted("binaryTransformSize")) { * throw new RuntimeException(); * } * binaryTransformSize = fields.get("binaryTransformSize", 0); * * if (fields.defaulted("binaryTensorSize")) { * throw new RuntimeException(); * } * binaryTensorSize = fields.get("binaryTensorSize", 0); * * if (fields.defaulted("binaryClassificationSize")) { * throw new RuntimeException(); * } * binaryClassificationSize = fields.get("binaryClassificationSize", 0); * * if (fields.defaulted("numUnaryMatrices")) { * throw new RuntimeException(); * } * numUnaryMatrices = fields.get("numUnaryMatrices", 0); * * if (fields.defaulted("unaryClassificationSize")) { * throw new RuntimeException(); * } * unaryClassificationSize = fields.get("unaryClassificationSize", 0); * * rand = ErasureUtils.uncheckedCast(fields.get("rand", null)); * op = ErasureUtils.uncheckedCast(fields.get("op", null)); * op.classNames = op.DEFAULT_CLASS_NAMES; * op.equivalenceClasses = op.APPROXIMATE_EQUIVALENCE_CLASSES; * op.equivalenceClassNames = op.DEFAULT_EQUIVALENCE_CLASS_NAMES; * } */ /// <summary> /// Given single matrices and sets of options, create the /// corresponding SentimentModel. /// </summary> /// <remarks> /// Given single matrices and sets of options, create the /// corresponding SentimentModel. Useful for creating a Java version /// of a model trained in some other manner, such as using the /// original Matlab code. /// </remarks> internal static Edu.Stanford.Nlp.Sentiment.SentimentModel ModelFromMatrices(SimpleMatrix W, SimpleMatrix Wcat, SimpleTensor Wt, IDictionary <string, SimpleMatrix> wordVectors, RNNOptions op) { if (!op.combineClassification || !op.simplifiedModel) { throw new ArgumentException("Can only create a model using this method if combineClassification and simplifiedModel are turned on"); } TwoDimensionalMap <string, string, SimpleMatrix> binaryTransform = TwoDimensionalMap.TreeMap(); binaryTransform.Put(string.Empty, string.Empty, W); TwoDimensionalMap <string, string, SimpleTensor> binaryTensors = TwoDimensionalMap.TreeMap(); binaryTensors.Put(string.Empty, string.Empty, Wt); TwoDimensionalMap <string, string, SimpleMatrix> binaryClassification = TwoDimensionalMap.TreeMap(); IDictionary <string, SimpleMatrix> unaryClassification = Generics.NewTreeMap(); unaryClassification[string.Empty] = Wcat; return(new Edu.Stanford.Nlp.Sentiment.SentimentModel(binaryTransform, binaryTensors, binaryClassification, unaryClassification, wordVectors, op)); }
public ExternalEvaluate(RNNOptions op, IList <Tree> predictedTrees) : base(op) { this.predicted = predictedTrees; }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { string basePath = "/user/socherr/scr/projects/semComp/RNTN/src/params/"; int numSlices = 25; bool useEscapedParens = false; for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-slices")) { numSlices = System.Convert.ToInt32(args[argIndex + 1]); argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-path")) { basePath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-useEscapedParens")) { useEscapedParens = true; argIndex += 1; } else { log.Info("Unknown argument " + args[argIndex]); System.Environment.Exit(2); } } } } SimpleMatrix[] slices = new SimpleMatrix[numSlices]; for (int i = 0; i < numSlices; ++i) { slices[i] = LoadMatrix(basePath + "bin/Wt_" + (i + 1) + ".bin", basePath + "Wt_" + (i + 1) + ".txt"); } SimpleTensor tensor = new SimpleTensor(slices); log.Info("W tensor size: " + tensor.NumRows() + "x" + tensor.NumCols() + "x" + tensor.NumSlices()); SimpleMatrix W = LoadMatrix(basePath + "bin/W.bin", basePath + "W.txt"); log.Info("W matrix size: " + W.NumRows() + "x" + W.NumCols()); SimpleMatrix Wcat = LoadMatrix(basePath + "bin/Wcat.bin", basePath + "Wcat.txt"); log.Info("W cat size: " + Wcat.NumRows() + "x" + Wcat.NumCols()); SimpleMatrix combinedWV = LoadMatrix(basePath + "bin/Wv.bin", basePath + "Wv.txt"); log.Info("Word matrix size: " + combinedWV.NumRows() + "x" + combinedWV.NumCols()); File vocabFile = new File(basePath + "vocab_1.txt"); if (!vocabFile.Exists()) { vocabFile = new File(basePath + "words.txt"); } IList <string> lines = Generics.NewArrayList(); foreach (string line in IOUtils.ReadLines(vocabFile)) { lines.Add(line.Trim()); } log.Info("Lines in vocab file: " + lines.Count); IDictionary <string, SimpleMatrix> wordVectors = Generics.NewTreeMap(); for (int i_1 = 0; i_1 < lines.Count && i_1 < combinedWV.NumCols(); ++i_1) { string[] pieces = lines[i_1].Split(" +"); if (pieces.Length == 0 || pieces.Length > 1) { continue; } wordVectors[pieces[0]] = combinedWV.ExtractMatrix(0, numSlices, i_1, i_1 + 1); if (pieces[0].Equals("UNK")) { wordVectors[SentimentModel.UnknownWord] = wordVectors["UNK"]; } } // If there is no ",", we first try to look for an HTML escaping, // then fall back to "." as better than just a random word vector. // Same for "``" and ";" CopyWordVector(wordVectors, ",", ","); CopyWordVector(wordVectors, ".", ","); CopyWordVector(wordVectors, ";", ";"); CopyWordVector(wordVectors, ".", ";"); CopyWordVector(wordVectors, "``", "``"); CopyWordVector(wordVectors, "''", "``"); if (useEscapedParens) { ReplaceWordVector(wordVectors, "(", "-LRB-"); ReplaceWordVector(wordVectors, ")", "-RRB-"); } RNNOptions op = new RNNOptions(); op.numHid = numSlices; op.lowercaseWordVectors = false; if (Wcat.NumRows() == 2) { op.classNames = new string[] { "Negative", "Positive" }; op.equivalenceClasses = new int[][] { new int[] { 0 }, new int[] { 1 } }; // TODO: set to null once old models are updated op.numClasses = 2; } if (!wordVectors.Contains(SentimentModel.UnknownWord)) { wordVectors[SentimentModel.UnknownWord] = SimpleMatrix.Random(numSlices, 1, -0.00001, 0.00001, new Random()); } SentimentModel model = SentimentModel.ModelFromMatrices(W, Wcat, tensor, wordVectors, op); model.SaveSerialized("matlab.ser.gz"); }
/// <summary>Trains a sentiment model.</summary> /// <remarks> /// Trains a sentiment model. /// The -trainPath argument points to a labeled sentiment treebank. /// The trees in this data will be used to train the model parameters (also to seed the model vocabulary). /// The -devPath argument points to a second labeled sentiment treebank. /// The trees in this data will be used to periodically evaluate the performance of the model. /// We won't train on this data; it will only be used to test how well the model generalizes to unseen data. /// The -model argument specifies where to save the learned sentiment model. /// </remarks> /// <param name="args">Command line arguments</param> public static void Main(string[] args) { RNNOptions op = new RNNOptions(); string trainPath = "sentimentTreesDebug.txt"; string devPath = null; bool runGradientCheck = false; bool runTraining = false; bool filterUnknown = false; string modelPath = null; for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-train")) { runTraining = true; argIndex++; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-gradientcheck")) { runGradientCheck = true; argIndex++; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-trainpath")) { trainPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-devpath")) { devPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model")) { modelPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-filterUnknown")) { filterUnknown = true; argIndex++; } else { int newArgIndex = op.SetOption(args, argIndex); if (newArgIndex == argIndex) { throw new ArgumentException("Unknown argument " + args[argIndex]); } argIndex = newArgIndex; } } } } } } } // read in the trees IList <Tree> trainingTrees = SentimentUtils.ReadTreesWithGoldLabels(trainPath); log.Info("Read in " + trainingTrees.Count + " training trees"); if (filterUnknown) { trainingTrees = SentimentUtils.FilterUnknownRoots(trainingTrees); log.Info("Filtered training trees: " + trainingTrees.Count); } IList <Tree> devTrees = null; if (devPath != null) { devTrees = SentimentUtils.ReadTreesWithGoldLabels(devPath); log.Info("Read in " + devTrees.Count + " dev trees"); if (filterUnknown) { devTrees = SentimentUtils.FilterUnknownRoots(devTrees); log.Info("Filtered dev trees: " + devTrees.Count); } } // TODO: binarize the trees, then collapse the unary chains. // Collapsed unary chains always have the label of the top node in // the chain // Note: the sentiment training data already has this done. // However, when we handle trees given to us from the Stanford Parser, // we will have to perform this step // build an uninitialized SentimentModel from the binary productions log.Info("Sentiment model options:\n" + op); SentimentModel model = new SentimentModel(op, trainingTrees); if (op.trainOptions.initialMatrixLogPath != null) { StringUtils.PrintToFile(new File(op.trainOptions.initialMatrixLogPath), model.ToString(), false, false, "utf-8"); } // TODO: need to handle unk rules somehow... at test time the tree // structures might have something that we never saw at training // time. for example, we could put a threshold on all of the // rules at training time and anything that doesn't meet that // threshold goes into the unk. perhaps we could also use some // component of the accepted training rules to build up the "unk" // parameter in case there are no rules that don't meet the // threshold if (runGradientCheck) { RunGradientCheck(model, trainingTrees); } if (runTraining) { Train(model, modelPath, trainingTrees, devTrees); model.SaveSerialized(modelPath); } }