public static void ProcessCoreNLPIfDoesNotExist(File processedFile, Properties coreNLPProps, string text) { if (!processedFile.Exists()) { try { StanfordCoreNLP coreNLP = new StanfordCoreNLP(coreNLPProps); Annotation processedAnnotation = coreNLP.Process(text); //this document holds the split for paragraphs. ProtobufAnnotationSerializer pas = new ProtobufAnnotationSerializer(true); OutputStream fos = new BufferedOutputStream(new FileOutputStream(processedFile.GetAbsolutePath())); pas.Write(processedAnnotation, fos); } catch (IOException e) { Sharpen.Runtime.PrintStackTrace(e); } } }
// TODO: make this into a unit test //Test QuoteAttributionAnnotator on a chapter of PP. /// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> public static void TestPP(string familyFile, string animateFile, string genderFile, string charactersFile, string modelFile) { Properties props = new Properties(); props.SetProperty("annotators", "tokenize, ssplit, pos, lemma, ner, depparse, quote, quoteattribution"); props.SetProperty("quoteattribution.familyWordsFile", familyFile); props.SetProperty("quoteattribution.animacyWordsFile", animateFile); props.SetProperty("quoteattribution.genderNamesFile", genderFile); props.SetProperty("quoteattribution.charactersPath", charactersFile); props.SetProperty("quoteattribution.modelPath", modelFile); StanfordCoreNLP coreNLP = new StanfordCoreNLP(props); Annotation processedAnnotation = coreNLP.Process(test); IList <ICoreMap> quotes = processedAnnotation.Get(typeof(CoreAnnotations.QuotationsAnnotation)); foreach (ICoreMap quote in quotes) { System.Console.Out.WriteLine("Quote: " + quote.Get(typeof(CoreAnnotations.TextAnnotation))); if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null) { System.Console.Out.WriteLine("Predicted Mention: " + quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) + " Predictor: " + quote.Get(typeof(QuoteAttributionAnnotator.MentionSieveAnnotation))); } else { System.Console.Out.WriteLine("Predicted Mention: none"); } if (quote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)) != null) { System.Console.Out.WriteLine("Predicted Speaker: " + quote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)) + " Predictor: " + quote.Get(typeof(QuoteAttributionAnnotator.SpeakerSieveAnnotation))); } else { System.Console.Out.WriteLine("Predicted Speaker: none"); } System.Console.Out.WriteLine("===="); } System.Console.Out.WriteLine("Finished"); }
/// <summary>Runs the tree-based sentiment model on some text.</summary> /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { string parserModel = null; string sentimentModel = null; string filename = null; string fileList = null; bool stdin = false; bool filterUnknown = false; IList <SentimentPipeline.Output> outputFormats = Java.Util.Collections.SingletonList(SentimentPipeline.Output.Root); SentimentPipeline.Input inputFormat = SentimentPipeline.Input.Text; string tlppClass = DefaultTlppClass; for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-sentimentModel")) { sentimentModel = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-parserModel")) { parserModel = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-file")) { filename = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-fileList")) { fileList = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-stdin")) { stdin = true; argIndex++; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-input")) { inputFormat = SentimentPipeline.Input.ValueOf(args[argIndex + 1].ToUpper()); argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output")) { string[] formats = args[argIndex + 1].Split(","); outputFormats = new List <SentimentPipeline.Output>(); foreach (string format in formats) { outputFormats.Add(SentimentPipeline.Output.ValueOf(format.ToUpper())); } argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-filterUnknown")) { filterUnknown = true; argIndex++; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-tlppClass")) { tlppClass = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-help")) { Help(); System.Environment.Exit(0); } else { log.Info("Unknown argument " + args[argIndex + 1]); Help(); throw new ArgumentException("Unknown argument " + args[argIndex + 1]); } } } } } } } } } } } // We construct two pipelines. One handles tokenization, if // necessary. The other takes tokenized sentences and converts // them to sentiment trees. Properties pipelineProps = new Properties(); Properties tokenizerProps = null; if (sentimentModel != null) { pipelineProps.SetProperty("sentiment.model", sentimentModel); } if (parserModel != null) { pipelineProps.SetProperty("parse.model", parserModel); } if (inputFormat == SentimentPipeline.Input.Trees) { pipelineProps.SetProperty("annotators", "binarizer, sentiment"); pipelineProps.SetProperty("customAnnotatorClass.binarizer", "edu.stanford.nlp.pipeline.BinarizerAnnotator"); pipelineProps.SetProperty("binarizer.tlppClass", tlppClass); pipelineProps.SetProperty("enforceRequirements", "false"); } else { pipelineProps.SetProperty("annotators", "parse, sentiment"); pipelineProps.SetProperty("parse.binaryTrees", "true"); pipelineProps.SetProperty("parse.buildgraphs", "false"); pipelineProps.SetProperty("enforceRequirements", "false"); tokenizerProps = new Properties(); tokenizerProps.SetProperty("annotators", "tokenize, ssplit"); } if (stdin && tokenizerProps != null) { tokenizerProps.SetProperty(StanfordCoreNLP.NewlineSplitterProperty, "true"); } int count = 0; if (filename != null) { count++; } if (fileList != null) { count++; } if (stdin) { count++; } if (count > 1) { throw new ArgumentException("Please only specify one of -file, -fileList or -stdin"); } if (count == 0) { throw new ArgumentException("Please specify either -file, -fileList or -stdin"); } StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps); StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps); if (filename != null) { // Process a file. The pipeline will do tokenization, which // means it will split it into sentences as best as possible // with the tokenizer. IList <Annotation> annotations = GetAnnotations(tokenizer, inputFormat, filename, filterUnknown); foreach (Annotation annotation in annotations) { pipeline.Annotate(annotation); foreach (ICoreMap sentence in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { System.Console.Out.WriteLine(sentence); OutputTree(System.Console.Out, sentence, outputFormats); } } } else { if (fileList != null) { // Process multiple files. The pipeline will do tokenization, // which means it will split it into sentences as best as // possible with the tokenizer. Output will go to filename.out // for each file. foreach (string file in fileList.Split(",")) { IList <Annotation> annotations = GetAnnotations(tokenizer, inputFormat, file, filterUnknown); FileOutputStream fout = new FileOutputStream(file + ".out"); TextWriter pout = new TextWriter(fout); foreach (Annotation annotation in annotations) { pipeline.Annotate(annotation); foreach (ICoreMap sentence in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { pout.WriteLine(sentence); OutputTree(pout, sentence, outputFormats); } } pout.Flush(); fout.Close(); } } else { // Process stdin. Each line will be treated as a single sentence. log.Info("Reading in text from stdin."); log.Info("Please enter one sentence per line."); log.Info("Processing will end when EOF is reached."); BufferedReader reader = IOUtils.ReaderFromStdin("utf-8"); for (string line; (line = reader.ReadLine()) != null;) { line = line.Trim(); if (!line.IsEmpty()) { Annotation annotation = tokenizer.Process(line); pipeline.Annotate(annotation); foreach (ICoreMap sentence in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { OutputTree(System.Console.Out, sentence, outputFormats); } } else { // Output blank lines for blank lines so the tool can be // used for line-by-line text processing System.Console.Out.WriteLine(); } } } } }