public static void Execute(string option, string text, bool disableLogging = true) { if (disableLogging) { RedwoodConfiguration.current().clear().apply(); } var jarRoot = @"../../../data/paket-files/stanford-corenlp-3.9.1-models/"; var props = new Properties(); props.setProperty("annotators", option); props.setProperty("ner.useSUTime", "0"); // We should change current directory, so StanfordCoreNLP could find all the model files automatically var curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(jarRoot); var pipeline = new StanfordNLP.StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); // Annotation var annotation = new StanfordNLP.Annotation(text); pipeline.annotate(annotation); //get sentencesAnnotation to get sentences var sentencesAnnotation = new CoreAnnotations.SentencesAnnotation().getClass(); //get tokensAnnotaion to get tokens in each sentence var tokensAnnotaion = new CoreAnnotations.TokensAnnotation().getClass(); //get posAnnotation to get POS result of each token var posAnnotation = new CoreAnnotations.PartOfSpeechAnnotation().getClass(); //get nerAnnotation to get NER result of each token var nerAnnotaion = new CoreAnnotations.NamedEntityTagAnnotation().getClass(); var deparseAnnotation = new TreeCoreAnnotations.TreeAnnotation().getClass(); //deparseAnnotation = new TypedDependency().getClass(); var sentences = annotation.get(sentencesAnnotation) as ArrayList; foreach (CoreMap sentence in sentences.toArray()) { var tokens = (ArrayList)sentence.get(tokensAnnotaion); Console.WriteLine("Token-POS-NER: "); foreach (CoreLabel token in tokens) { Console.Write($"{token.value()}-{token.get(posAnnotation)}-{token.get(nerAnnotaion)} "); } Console.WriteLine("\n\n\n"); var parsedText = (Tree)sentence.get(deparseAnnotation); if (parsedText != null) { Console.WriteLine("Parsed Text: "); new TreePrint("penn,typedDependenciesCollapsed").printTree(parsedText); } } }
/// <summary> /// /// </summary> /// <param name="phrase"></param> /// <returns></returns> public StanfordDocumentFacade Annotate(string phrase) { if (!this.Initialized) { return(null); } try { var annotation = new edu.stanford.nlp.pipeline.Annotation(phrase); _pipeLine.annotate(annotation); return(new StanfordDocumentFacade(annotation, phrase)); } catch (Exception ex) { throw ex; } }
public int SentiAnalysis(string text) { // Annotation var annotation = new edu.stanford.nlp.pipeline.Annotation(text); pipeline.annotate(annotation); using (var stream = new ByteArrayOutputStream()) { pipeline.prettyPrint(annotation, new PrintWriter(stream)); int mainSentiment = 0; int longest = 0; String[] sentimentText = { "Very Negative", "Negative", "Neutral", "Positive", "Very Positive" }; NumberFormat NF = new DecimalFormat("0.0000"); var sentences = annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()) as ArrayList; foreach (CoreMap sentence in sentences) { Tree tree = (Tree)sentence.get(typeof(SentimentCoreAnnotations.SentimentAnnotatedTree)); int sentiment = edu.stanford.nlp.neural.rnn.RNNCoreAnnotations.getPredictedClass(tree); String partText = sentence.ToString(); try { } catch (IndexOutOfRangeException e) { } if (partText.Length > longest) { mainSentiment = sentiment; longest = partText.Length; } return(sentiment); } } return(-1); }
/// <summary> /// Accepts a document (text) file, lets CoreNLP process (annotate) it /// and returns an adapter that allows iterating the output /// /// also, the text file (normalized) is returned /// </summary> /// <param name="file"></param> /// <returns>returns null if fails</returns> public StanfordDocumentFacade Annotate(FileInfo file) { if (!this.Initialized) { return(null); } try { string data; NormalizeFile(file, out data); var annotation = new edu.stanford.nlp.pipeline.Annotation(data); _pipeLine.annotate(annotation); return(new StanfordDocumentFacade(annotation, data)); } catch (Exception ex) { throw ex; } }
public string SplitSentences(Models.Text doc) { List <Models.Text> toReturn = new List <Models.Text>(); string fulltext = doc.RawText; edu.stanford.nlp.pipeline.Annotation document = new edu.stanford.nlp.pipeline.Annotation(fulltext); PipelineDispenser.GetNewPipeline().annotate(document); List <CoreMap> sentences = JavaExtensions.ToList <CoreMap>((java.util.List)document.get(typeof(SentencesAnnotation))); foreach (CoreMap sentence in sentences) { Models.Text sentenceObject = new Text(); sentenceObject.RawText = (string)sentence.get(typeof(TextAnnotation)); toReturn.Add(sentenceObject); } return(JsonConvert.SerializeObject(toReturn)); }
public string SuggestEntityMentions(Models.Text doc) { string fulltext = doc.RawText; edu.stanford.nlp.pipeline.Annotation document = new edu.stanford.nlp.pipeline.Annotation(fulltext); PipelineDispenser.GetNewPipeline().annotate(document); List <CoreMap> entityMentions = JavaExtensions.ToList <CoreMap>((java.util.List)document.get(typeof(MentionsAnnotation))); List <Bean.Annotation> annotations = new List <Bean.Annotation>(); foreach (CoreMap entityMention in entityMentions) { Bean.Annotation annotation = new Bean.Annotation(); annotation.begin = ((Integer)entityMention.get(typeof(CharacterOffsetBeginAnnotation))).intValue(); annotation.end = ((Integer)entityMention.get(typeof(CharacterOffsetEndAnnotation))).intValue(); annotation.type = (string)entityMention.get(typeof(NamedEntityTagAnnotation)); annotations.Add(annotation); } return(JsonConvert.SerializeObject(annotations)); }
public static String Parse_Data(String sent, StanfordCoreNLP pipeline1) {// extract meaningful words from user query // Text for processing var text = sent; // Annotation var annotation = new edu.stanford.nlp.pipeline.Annotation(text); pipeline1.annotate(annotation); // Result - Pretty Print string output; using (var stream = new ByteArrayOutputStream()) { pipeline1.prettyPrint(annotation, new PrintWriter(stream)); System.Console.WriteLine(" it's stanford time "); output = stream.toString(); stream.close(); } return(output); }
public Payload RunPlugin(Payload Input) { Payload pData = new Payload(); pData.FileID = Input.FileID; bool trackSegmentID = false; if (Input.SegmentID.Count > 0) { trackSegmentID = true; } else { pData.SegmentID = Input.SegmentID; } for (int i = 0; i < Input.StringList.Count; i++) { //seems to prematurely exit sometimes. checking to see what might cause that -- maybe blank docs? if (!string.IsNullOrEmpty(Input.StringList[i]) && !string.IsNullOrWhiteSpace(Input.StringList[i])) { Annotation annotation = new edu.stanford.nlp.pipeline.Annotation(); ArrayList sentences = new ArrayList(); List <double> SentimentValues = new List <double>(); annotation = new edu.stanford.nlp.pipeline.Annotation(Input.StringList[i]); pipeline.annotate(annotation); sentences = annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()) as ArrayList; int SentenceCount = 0; foreach (CoreMap sentence in sentences) { SentenceCount++; Tree tree = sentence.get(new SentimentCoreAnnotations.SentimentAnnotatedTree().getClass()) as Tree; //add this sentence to our overall list of sentiment scores SentimentValues.Add(RNNCoreAnnotations.getPredictedClass(tree)); string[] OutputString_SentenceLevel = new string[10] { "", "", "", "", "", "", "", "", "", "" }; string Classification = GetClassification((double)RNNCoreAnnotations.getPredictedClass(tree)); //this pulls out the prediction probabilites for each class string Predictions = RNNCoreAnnotations.getPredictionsAsStringList(tree).ToString(); string[] Predictions_Split = Predictions.Replace("[", "").Replace("]", "").Split(','); if (useBuiltInSentenceSplitter) { OutputString_SentenceLevel[0] = SentenceCount.ToString(); } else { //if we're using an external sentence tokenizer, then every segment is //going to be treated as its own sentence. OutputString_SentenceLevel[0] = (i + 1).ToString(); } OutputString_SentenceLevel[1] = Classification; OutputString_SentenceLevel[2] = RNNCoreAnnotations.getPredictedClassProb(tree.label()).ToString(); OutputString_SentenceLevel[3] = RNNCoreAnnotations.getPredictedClass(tree).ToString(); OutputString_SentenceLevel[4] = Predictions_Split[0]; OutputString_SentenceLevel[5] = Predictions_Split[1]; OutputString_SentenceLevel[6] = Predictions_Split[2]; OutputString_SentenceLevel[7] = Predictions_Split[3]; OutputString_SentenceLevel[8] = Predictions_Split[4]; if (includeSentenceText) { OutputString_SentenceLevel[9] = sentence.ToString(); } pData.StringArrayList.Add(OutputString_SentenceLevel); pData.SegmentNumber.Add(Input.SegmentNumber[i]); if (trackSegmentID) { pData.SegmentID.Add(Input.SegmentID[i]); } } } else { pData.StringArrayList.Add(new string[10] { "", "", "", "", "", "", "", "", "", "" }); pData.SegmentNumber.Add(Input.SegmentNumber[i]); if (trackSegmentID) { pData.SegmentID.Add(Input.SegmentID[i]); } } } return(pData); }
public string SpellCorrect(Models.Text doc) { string fulltext = doc.RawText; // These next two lines really should not be done per call. They should be moved to startup var distance = new Distance(AppDomain.CurrentDomain.BaseDirectory + "\\wordvec\\my_output_model.bin"); var spellingDistance = new Distance(AppDomain.CurrentDomain.BaseDirectory + "\\wordvec\\spelling_model.bin"); // Here, we manipulate fulltext if there are spelling errors present // then we return the edited text // reconstruct it maybe? string correctedText = ""; // fetch tokenization for the document as we are correcting individual words edu.stanford.nlp.pipeline.Annotation document = new edu.stanford.nlp.pipeline.Annotation(fulltext); PipelineDispenser.GetNewPipeline().annotate(document); List <CoreMap> sentences = JavaExtensions.ToList <CoreMap>((java.util.List)document.get(typeof(SentencesAnnotation))); foreach (CoreMap sentence in sentences) { foreach (CoreLabel token in JavaExtensions.ToList <CoreMap>((java.util.List)sentence.get(typeof(TokensAnnotation)))) { // we have to look this token up in both normal word space as well as spelling word space // at that point, we would do the mathematics to compute the resultant word vector /*You have something like: * * [reliable] - [relieable] + [foriegn] ==> [foreign] * To generalise this approach(make it less reliant on reliable…), * we can build a spelling transformation vector by taking the average * difference between a set of pairs of correct and incorrectly spelled words. * We can then fix a spelling mistake by subtracting this spelling transformation * vector from the incorrectly spelled word vector and finding the word closest * to where we end up.*/ BestWord[] bestwords = distance.Search(token.word()); BestWord[] spellingBestwords = spellingDistance.Search(token.word()); if (bestwords.Length == 0) { string correction = token.word(); // we assume there might be a spelling mistake if (spellingBestwords.Length != 0) { correction = spellingBestwords[0].Word; } // We have to make a proper decision on the next line if (correctedText.Length > 0) { correctedText += " "; } correctedText = correctedText + correction; } else { // we assume that this is spelled right since our main vector knows of it // this is really not the correct way to construct the doucment because space is not // always the appropriate whitespace. if (correctedText.Length > 0) { correctedText += " "; } correctedText = correctedText + token.word(); } } } return(correctedText); }
private void BgWorkerClean_DoWork(object sender, DoWorkEventArgs e) { DictionaryData DictData = (DictionaryData)e.Argument; //report what we're working on FilenameLabel.Invoke((MethodInvoker) delegate { FilenameLabel.Text = "Loading CoreNLP models... please wait..."; }); //largely taken from here: https://github.com/sergey-tihon/Stanford.NLP.NET/issues/39 var jarRoot = @"stanford-corenlp-full-2018-02-27\"; var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); props.setProperty("sutime.binders", "0"); var curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(Path.Combine(Path.GetDirectoryName(AppDomain.CurrentDomain.BaseDirectory), jarRoot)); var pipeline = new StanfordCoreNLP(props); //selects the text encoding based on user selection Encoding SelectedEncoding = null; this.Invoke((MethodInvoker) delegate() { SelectedEncoding = Encoding.GetEncoding(EncodingDropdown.SelectedItem.ToString()); }); //get the list of files var SearchDepth = SearchOption.TopDirectoryOnly; if (ScanSubfolderCheckbox.Checked) { SearchDepth = SearchOption.AllDirectories; } var files = Directory.EnumerateFiles(DictData.TextFileFolder, "*.txt", SearchDepth); //try //{ //open up the output file using (StreamWriter outputFile = new StreamWriter(new FileStream(DictData.OutputFileLocation, FileMode.Create), SelectedEncoding)) { using (StreamWriter outputFileSentences = new StreamWriter(new FileStream(AddSuffix(DictData.OutputFileLocation, "_Sentences"), FileMode.Create), SelectedEncoding)) { //write the header row to the output file StringBuilder HeaderString = new StringBuilder(); HeaderString.Append("\"Filename\",\"Sentences\",\"Classification\",\"Classification_M\",\"Classification_SD\""); outputFile.WriteLine(HeaderString.ToString()); StringBuilder HeaderStringSentence = new StringBuilder(); HeaderStringSentence.Append("\"Filename\",\"SentNumber\",\"SentenceText\",\"Classification\",\"Class_Prob\",\"Class_Number\""); outputFileSentences.WriteLine(HeaderStringSentence.ToString()); foreach (string fileName in files) { //set up our variables to report string Filename_Clean = Path.GetFileName(fileName); Dictionary <string, int> DictionaryResults = new Dictionary <string, int>(); //report what we're working on FilenameLabel.Invoke((MethodInvoker) delegate { FilenameLabel.Text = "Analyzing: " + Filename_Clean; }); //read in the text file, convert everything to lowercase string InputText = System.IO.File.ReadAllText(fileName, SelectedEncoding).Trim(); // _ _ _____ _ // / \ _ __ __ _| |_ _ _______ |_ _|____ _| |_ // / _ \ | '_ \ / _` | | | | |_ / _ \ | |/ _ \ \/ / __| // / ___ \| | | | (_| | | |_| |/ / __/ | | __/> <| |_ // /_/ \_\_| |_|\__,_|_|\__, /___\___| |_|\___/_/\_\\__| // |___/ var annotation = new edu.stanford.nlp.pipeline.Annotation(InputText); pipeline.annotate(annotation); List <double> SentimentValues = new List <double>(); var sentences = annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()) as ArrayList; int SentenceCount = 0; foreach (CoreMap sentence in sentences) { SentenceCount++; Tree tree = sentence.get(new SentimentCoreAnnotations.SentimentAnnotatedTree().getClass()) as Tree; //add this sentence to our overall list of sentiment scores SentimentValues.Add(RNNCoreAnnotations.getPredictedClass(tree)); // __ __ _ _ ___ _ _ // \ \ / / __(_) |_ ___ / _ \ _ _| |_ _ __ _ _| |_ // \ \ /\ / / '__| | __/ _ \ | | | | | | | __| '_ \| | | | __| // \ V V /| | | | || __/ | |_| | |_| | |_| |_) | |_| | |_ // \_/\_/ |_| |_|\__\___| \___/ \__,_|\__| .__/ \__,_|\__| // |_| string[] OutputString_SentenceLevel = new string[6]; string Classification = GetClassification((double)RNNCoreAnnotations.getPredictedClass(tree)); OutputString_SentenceLevel[0] = "\"" + Filename_Clean + "\""; OutputString_SentenceLevel[1] = SentenceCount.ToString(); OutputString_SentenceLevel[2] = "\"" + sentence.ToString().Replace("\"", "\"\"") + "\""; OutputString_SentenceLevel[3] = Classification; OutputString_SentenceLevel[4] = RNNCoreAnnotations.getPredictedClassProb(tree.label()).ToString(); OutputString_SentenceLevel[5] = RNNCoreAnnotations.getPredictedClass(tree).ToString(); outputFileSentences.WriteLine(String.Join(",", OutputString_SentenceLevel)); } //write output at the file level string[] OutputString = new string[5]; OutputString[0] = "\"" + Filename_Clean + "\""; OutputString[1] = SentenceCount.ToString(); OutputString[2] = GetClassification(SentimentValues.Average()); OutputString[3] = SentimentValues.Average().ToString(); OutputString[4] = StandardDeviation(SentimentValues).ToString(); outputFile.WriteLine(String.Join(",", OutputString)); } //this is the closing bracket for the sentence-level "using" filestream } //this is the closing bracket for the document-level "using" filestream } //} //catch //{ // MessageBox.Show("Senti-Gent encountered an issue somewhere while trying to analyze your texts. The most common cause of this is trying to open your output file while Senti-Gent is still running. Did any of your input files move, or is your output file being opened/modified by another application?", "Error while analyzing", MessageBoxButtons.OK, MessageBoxIcon.Error); //} }