/// <summary> /// Expected arguments are <code> -model model -treebank treebank </code> /// <br /> /// For example <br /> /// <code> /// java edu.stanford.nlp.sentiment.Evaluate /// edu/stanford/nlp/models/sentiment/sentiment.ser.gz /// /u/nlp/data/sentiment/trees/dev.txt /// </code> /// Other arguments are available, for example <code> -numClasses</code>. /// </summary> /// <remarks> /// Expected arguments are <code> -model model -treebank treebank </code> /// <br /> /// For example <br /> /// <code> /// java edu.stanford.nlp.sentiment.Evaluate /// edu/stanford/nlp/models/sentiment/sentiment.ser.gz /// /u/nlp/data/sentiment/trees/dev.txt /// </code> /// Other arguments are available, for example <code> -numClasses</code>. /// See RNNOptions.java, RNNTestOptions.java and RNNTrainOptions.java for /// more arguments. /// The configuration is usually derived from the RNN model file, which is /// not available here as the predictions are external. It is the caller's /// responsibility to provide a configuration matching the settings of /// the external predictor. Flags of interest include /// <code> -equivalenceClasses </code>. /// </remarks> public static void Main(string[] args) { string modelPath = null; string treePath = null; bool filterUnknown = false; IList <string> remainingArgs = Generics.NewArrayList(); for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model")) { modelPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-treebank")) { treePath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-filterUnknown")) { filterUnknown = true; argIndex++; } else { remainingArgs.Add(args[argIndex]); argIndex++; } } } } string[] newArgs = new string[remainingArgs.Count]; Sharpen.Collections.ToArray(remainingArgs, newArgs); SentimentModel model = SentimentModel.LoadSerialized(modelPath); for (int argIndex_1 = 0; argIndex_1 < newArgs.Length;) { int newIndex = model.op.SetOption(newArgs, argIndex_1); if (argIndex_1 == newIndex) { log.Info("Unknown argument " + newArgs[argIndex_1]); throw new ArgumentException("Unknown argument " + newArgs[argIndex_1]); } argIndex_1 = newIndex; } IList <Tree> trees = SentimentUtils.ReadTreesWithGoldLabels(treePath); if (filterUnknown) { trees = SentimentUtils.FilterUnknownRoots(trees); } Edu.Stanford.Nlp.Sentiment.Evaluate eval = new Edu.Stanford.Nlp.Sentiment.Evaluate(model); eval.Eval(trees); eval.PrintSummary(); }
/// <summary>Reads an annotation from the given filename using the requested input.</summary> public static IList <Annotation> GetAnnotations(StanfordCoreNLP tokenizer, SentimentPipeline.Input inputFormat, string filename, bool filterUnknown) { switch (inputFormat) { case SentimentPipeline.Input.Text: { string text = IOUtils.SlurpFileNoExceptions(filename); Annotation annotation = new Annotation(text); tokenizer.Annotate(annotation); IList <Annotation> annotations = Generics.NewArrayList(); foreach (ICoreMap sentence in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { Annotation nextAnnotation = new Annotation(sentence.Get(typeof(CoreAnnotations.TextAnnotation))); nextAnnotation.Set(typeof(CoreAnnotations.SentencesAnnotation), Java.Util.Collections.SingletonList(sentence)); annotations.Add(nextAnnotation); } return(annotations); } case SentimentPipeline.Input.Trees: { IList <Tree> trees; if (filterUnknown) { trees = SentimentUtils.ReadTreesWithGoldLabels(filename); trees = SentimentUtils.FilterUnknownRoots(trees); } else { MemoryTreebank treebank = new MemoryTreebank("utf-8"); treebank.LoadPath(filename, null); trees = new List <Tree>(treebank); } IList <Annotation> annotations = Generics.NewArrayList(); foreach (Tree tree in trees) { ICoreMap sentence = new Annotation(SentenceUtils.ListToString(tree.Yield())); sentence.Set(typeof(TreeCoreAnnotations.TreeAnnotation), tree); IList <ICoreMap> sentences = Java.Util.Collections.SingletonList(sentence); Annotation annotation = new Annotation(string.Empty); annotation.Set(typeof(CoreAnnotations.SentencesAnnotation), sentences); annotations.Add(annotation); } return(annotations); } default: { throw new ArgumentException("Unknown format " + inputFormat); } } }
/// <summary> /// Expected arguments are /// <c>-gold gold -predicted predicted</c> /// For example <br /> /// <c>java edu.stanford.nlp.sentiment.ExternalEvaluate annotatedTrees.txt predictedTrees.txt</c> /// </summary> public static void Main(string[] args) { RNNOptions curOptions = new RNNOptions(); string goldPath = null; string predictedPath = null; for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-gold")) { goldPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-predicted")) { predictedPath = args[argIndex + 1]; argIndex += 2; } else { int newArgIndex = curOptions.SetOption(args, argIndex); if (newArgIndex == argIndex) { throw new ArgumentException("Unknown argument " + args[argIndex]); } argIndex = newArgIndex; } } } if (goldPath == null) { log.Info("goldPath not set. Exit."); System.Environment.Exit(-1); } if (predictedPath == null) { log.Info("predictedPath not set. Exit."); System.Environment.Exit(-1); } // filterUnknown not supported because I'd need to know which sentences // are removed to remove them from predicted IList <Tree> goldTrees = SentimentUtils.ReadTreesWithGoldLabels(goldPath); IList <Tree> predictedTrees = SentimentUtils.ReadTreesWithPredictedLabels(predictedPath); Edu.Stanford.Nlp.Sentiment.ExternalEvaluate evaluator = new Edu.Stanford.Nlp.Sentiment.ExternalEvaluate(curOptions, predictedTrees); evaluator.Eval(goldTrees); evaluator.PrintSummary(); }
/// <summary>Trains a sentiment model.</summary> /// <remarks> /// Trains a sentiment model. /// The -trainPath argument points to a labeled sentiment treebank. /// The trees in this data will be used to train the model parameters (also to seed the model vocabulary). /// The -devPath argument points to a second labeled sentiment treebank. /// The trees in this data will be used to periodically evaluate the performance of the model. /// We won't train on this data; it will only be used to test how well the model generalizes to unseen data. /// The -model argument specifies where to save the learned sentiment model. /// </remarks> /// <param name="args">Command line arguments</param> public static void Main(string[] args) { RNNOptions op = new RNNOptions(); string trainPath = "sentimentTreesDebug.txt"; string devPath = null; bool runGradientCheck = false; bool runTraining = false; bool filterUnknown = false; string modelPath = null; for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-train")) { runTraining = true; argIndex++; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-gradientcheck")) { runGradientCheck = true; argIndex++; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-trainpath")) { trainPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-devpath")) { devPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model")) { modelPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-filterUnknown")) { filterUnknown = true; argIndex++; } else { int newArgIndex = op.SetOption(args, argIndex); if (newArgIndex == argIndex) { throw new ArgumentException("Unknown argument " + args[argIndex]); } argIndex = newArgIndex; } } } } } } } // read in the trees IList <Tree> trainingTrees = SentimentUtils.ReadTreesWithGoldLabels(trainPath); log.Info("Read in " + trainingTrees.Count + " training trees"); if (filterUnknown) { trainingTrees = SentimentUtils.FilterUnknownRoots(trainingTrees); log.Info("Filtered training trees: " + trainingTrees.Count); } IList <Tree> devTrees = null; if (devPath != null) { devTrees = SentimentUtils.ReadTreesWithGoldLabels(devPath); log.Info("Read in " + devTrees.Count + " dev trees"); if (filterUnknown) { devTrees = SentimentUtils.FilterUnknownRoots(devTrees); log.Info("Filtered dev trees: " + devTrees.Count); } } // TODO: binarize the trees, then collapse the unary chains. // Collapsed unary chains always have the label of the top node in // the chain // Note: the sentiment training data already has this done. // However, when we handle trees given to us from the Stanford Parser, // we will have to perform this step // build an uninitialized SentimentModel from the binary productions log.Info("Sentiment model options:\n" + op); SentimentModel model = new SentimentModel(op, trainingTrees); if (op.trainOptions.initialMatrixLogPath != null) { StringUtils.PrintToFile(new File(op.trainOptions.initialMatrixLogPath), model.ToString(), false, false, "utf-8"); } // TODO: need to handle unk rules somehow... at test time the tree // structures might have something that we never saw at training // time. for example, we could put a threshold on all of the // rules at training time and anything that doesn't meet that // threshold goes into the unk. perhaps we could also use some // component of the accepted training rules to build up the "unk" // parameter in case there are no rules that don't meet the // threshold if (runGradientCheck) { RunGradientCheck(model, trainingTrees); } if (runTraining) { Train(model, modelPath, trainingTrees, devTrees); model.SaveSerialized(modelPath); } }