public static void Main(string[] args) { // set up pipeline properties Properties props = new Properties(); // set the list of annotators to run props.SetProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,depparse,coref,kbp,quote"); // set a property for an annotator, in this case the coref annotator is being set to use the neural algorithm props.SetProperty("coref.algorithm", "neural"); // build pipeline StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // create a document object CoreDocument document = new CoreDocument(text); // annnotate the document pipeline.Annotate(document); // examples // 10th token of the document CoreLabel token = document.Tokens()[10]; System.Console.Out.WriteLine("Example: token"); System.Console.Out.WriteLine(token); System.Console.Out.WriteLine(); // text of the first sentence string sentenceText = document.Sentences()[0].Text(); System.Console.Out.WriteLine("Example: sentence"); System.Console.Out.WriteLine(sentenceText); System.Console.Out.WriteLine(); // second sentence CoreSentence sentence = document.Sentences()[1]; // list of the part-of-speech tags for the second sentence IList <string> posTags = sentence.PosTags(); System.Console.Out.WriteLine("Example: pos tags"); System.Console.Out.WriteLine(posTags); System.Console.Out.WriteLine(); // list of the ner tags for the second sentence IList <string> nerTags = sentence.NerTags(); System.Console.Out.WriteLine("Example: ner tags"); System.Console.Out.WriteLine(nerTags); System.Console.Out.WriteLine(); // constituency parse for the second sentence Tree constituencyParse = sentence.ConstituencyParse(); System.Console.Out.WriteLine("Example: constituency parse"); System.Console.Out.WriteLine(constituencyParse); System.Console.Out.WriteLine(); // dependency parse for the second sentence SemanticGraph dependencyParse = sentence.DependencyParse(); System.Console.Out.WriteLine("Example: dependency parse"); System.Console.Out.WriteLine(dependencyParse); System.Console.Out.WriteLine(); // kbp relations found in fifth sentence IList <RelationTriple> relations = document.Sentences()[4].Relations(); System.Console.Out.WriteLine("Example: relation"); System.Console.Out.WriteLine(relations[0]); System.Console.Out.WriteLine(); // entity mentions in the second sentence IList <CoreEntityMention> entityMentions = sentence.EntityMentions(); System.Console.Out.WriteLine("Example: entity mentions"); System.Console.Out.WriteLine(entityMentions); System.Console.Out.WriteLine(); // coreference between entity mentions CoreEntityMention originalEntityMention = document.Sentences()[3].EntityMentions()[1]; System.Console.Out.WriteLine("Example: original entity mention"); System.Console.Out.WriteLine(originalEntityMention); System.Console.Out.WriteLine("Example: canonical entity mention"); System.Console.Out.WriteLine(originalEntityMention.CanonicalEntityMention().Get()); System.Console.Out.WriteLine(); // get document wide coref info IDictionary <int, CorefChain> corefChains = document.CorefChains(); System.Console.Out.WriteLine("Example: coref chains for document"); System.Console.Out.WriteLine(corefChains); System.Console.Out.WriteLine(); // get quotes in document IList <CoreQuote> quotes = document.Quotes(); CoreQuote quote = quotes[0]; System.Console.Out.WriteLine("Example: quote"); System.Console.Out.WriteLine(quote); System.Console.Out.WriteLine(); // original speaker of quote // note that quote.speaker() returns an Optional System.Console.Out.WriteLine("Example: original speaker of quote"); System.Console.Out.WriteLine(quote.Speaker().Get()); System.Console.Out.WriteLine(); // canonical speaker of quote System.Console.Out.WriteLine("Example: canonical speaker of quote"); System.Console.Out.WriteLine(quote.CanonicalSpeaker().Get()); System.Console.Out.WriteLine(); }
protected internal virtual bool SameEntityWithoutLinking(CoreEntityMention emOne, CoreEntityMention emTwo) { string type = emOne.EntityType(); if (type.Equals(NerPerson) && emOne.Tokens().Count >= 2 && emTwo.Tokens().Count >= 2 && emOne.Tokens()[emOne.Tokens().Count - 1].Word().ToLower().Equals(emTwo.Tokens()[emTwo.Tokens().Count - 1].Word().ToLower())) { string firstNameOne = emOne.Tokens()[0].Word().ToLower(); string firstNameTwo = emTwo.Tokens()[0].Word().ToLower(); if (FirstNameMatch(firstNameOne, firstNameTwo)) { return(true); } else { if (emOne.Tokens().Count == 2 && emTwo.Tokens().Count == 2) { return(false); } } } // Proper match score double matchScore = Math.Max(ApproximateEntityMatchScore(emOne.Text(), emTwo.Text()), ApproximateEntityMatchScore(emTwo.Text(), emOne.Text())); // Some simple cases if (matchScore == 1.0) { return(true); } if (matchScore < 0.34) { return(false); } if (type.Equals(NerPerson) && matchScore > 0.49) { // Both entities are more than one character if (Math.Min(emOne.Text().Length, emTwo.Text().Length) > 1) { // Last names match if ((emOne.Tokens().Count == 1 && emTwo.Tokens().Count > 1 && Sharpen.Runtime.EqualsIgnoreCase(emTwo.Tokens()[emTwo.Tokens().Count - 1].Word(), emOne.Tokens()[0].Word())) || (emTwo.Tokens().Count == 1 && emOne.Tokens().Count > 1 && Sharpen.Runtime.EqualsIgnoreCase (emOne.Tokens()[emOne.Tokens().Count - 1].Word(), emTwo.Tokens()[0].Word()))) { return(true); } // First names match if ((emOne.Tokens().Count == 1 && emTwo.Tokens().Count > 1 && Sharpen.Runtime.EqualsIgnoreCase(emTwo.Tokens()[0].Word(), emOne.Tokens()[0].Word())) || (emTwo.Tokens().Count == 1 && emOne.Tokens().Count > 1 && Sharpen.Runtime.EqualsIgnoreCase (emOne.Tokens()[0].Word(), emTwo.Tokens()[0].Word()))) { return(true); } } if (matchScore > 0.65) { return(true); } } if (type == NerOrganization && matchScore > 0.79) { return(true); } return(false); }
private static void ExtractNERTags(CoreDocument coredoc, Lucene.Net.Documents.Document document) { //I have no clue as to why NER-tagged messages are stored like that. I guess there is some deep idea behind copying the same info over and over again (or, most likely, this is because some documents have more than one sentence. even tho its stil really stupid) if (coredoc != null) { List nerList = coredoc.entityMentions(); if (nerList.size() > 0) { for (int j = 0; j < nerList.size(); j++) { CoreEntityMention em = (CoreEntityMention)nerList.get(j); //Does this need to be a switch case? if (em.entityType() == "DATE") { var datekey = document.GetField("id").GetInt32Value().Value; if (!DateList.ContainsKey(datekey)) { DateList.Add(datekey, em.text()); } else { DateList.TryUpdate(datekey, DateList[datekey] + ", " + em.text()); } } if (em.entityType() == "TIME") { var timekey = document.GetField("id").GetInt32Value().Value; if (!TimeList.ContainsKey(timekey)) { TimeList.Add(timekey, em.text()); } else { TimeList.TryUpdate(timekey, TimeList[timekey] + ", " + em.text()); } } if (em.entityType() == "LOCATION") { var lockey = document.GetField("id").GetInt32Value().Value; if (!LocList.ContainsKey(lockey)) { LocList.Add(lockey, em.text()); } else { LocList.TryUpdate(lockey, LocList[lockey] + ", " + em.text()); } } if (em.entityType() == "ORGANIZATION") { var orgkey = document.GetField("id").GetInt32Value().Value; if (!OrgList.ContainsKey(orgkey)) { OrgList.Add(orgkey, em.text()); } else { OrgList.TryUpdate(orgkey, OrgList[orgkey] + ", " + em.text()); } } if (em.entityType() == "URL") { var urlkey = document.GetField("id").GetInt32Value().Value; if (!URLList.ContainsKey(urlkey)) { URLList.Add(urlkey, em.text()); } else { URLList.TryUpdate(urlkey, OrgList[urlkey] + ", " + em.text()); } } } } } }