// static demo class /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { if (args.Length < 2) { System.Console.Error.WriteLine("TokensRegexMatcher rules file [outFile]"); return; } string rules = args[0]; PrintWriter @out; if (args.Length > 2) { @out = new PrintWriter(args[2]); } else { @out = new PrintWriter(System.Console.Out); } StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner")); Annotation annotation = new Annotation(IOUtils.SlurpFileNoExceptions(args[1])); pipeline.Annotate(annotation); // Load lines of file as TokenSequencePatterns IList <TokenSequencePattern> tokenSequencePatterns = new List <TokenSequencePattern>(); foreach (string line in ObjectBank.GetLineIterator(rules)) { TokenSequencePattern pattern = TokenSequencePattern.Compile(line); tokenSequencePatterns.Add(pattern); } IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); int i = 0; foreach (ICoreMap sentence in sentences) { IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); @out.Println("Sentence #" + ++i); @out.Print(" Tokens:"); foreach (CoreLabel token in tokens) { @out.Print(' '); @out.Print(token.ToShortString("Text", "PartOfSpeech", "NamedEntityTag")); } @out.Println(); MultiPatternMatcher <ICoreMap> multiMatcher = TokenSequencePattern.GetMultiPatternMatcher(tokenSequencePatterns); IList <ISequenceMatchResult <ICoreMap> > answers = multiMatcher.FindNonOverlapping(tokens); int j = 0; foreach (ISequenceMatchResult <ICoreMap> matched in answers) { @out.Println(" Match #" + ++j); for (int k = 0; k <= matched.GroupCount(); k++) { @out.Println(" group " + k + " = " + matched.Group(k)); } } } @out.Flush(); }
public ApplyPatternsMulti(IDictionary <string, DataInstance> sents, IList <string> sentids, IDictionary <TokenSequencePattern, E> patterns, string label, bool removeStopWordsFromSelectedPhrases, bool removePhrasesWithStopWords, ConstantsAndVariables cv) { //Set<String> ignoreWords; this.sents = sents; this.patterns = patterns; multiPatternMatcher = TokenSequencePattern.GetMultiPatternMatcher(patterns.Keys); this.sentids = sentids; this.label = label; this.removeStopWordsFromSelectedPhrases = removeStopWordsFromSelectedPhrases; this.removePhrasesWithStopWords = removePhrasesWithStopWords; this.constVars = cv; }
// static main only /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner")); Annotation annotation = new Annotation("Casey is 21. Sally Atkinson's age is 30."); pipeline.Annotate(annotation); IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); IList <TokenSequencePattern> tokenSequencePatterns = new List <TokenSequencePattern>(); string[] patterns = new string[] { "(?$who [ ner: PERSON]+ ) /is/ (?$age [ pos: CD ] )", "(?$who [ ner: PERSON]+ ) /'s/ /age/ /is/ (?$age [ pos: CD ] )" }; foreach (string line in patterns) { TokenSequencePattern pattern = TokenSequencePattern.Compile(line); tokenSequencePatterns.Add(pattern); } MultiPatternMatcher <ICoreMap> multiMatcher = TokenSequencePattern.GetMultiPatternMatcher(tokenSequencePatterns); int i = 0; foreach (ICoreMap sentence in sentences) { IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); System.Console.Out.WriteLine("Sentence #" + ++i); System.Console.Out.Write(" Tokens:"); foreach (CoreLabel token in tokens) { System.Console.Out.Write(' '); System.Console.Out.Write(token.ToShortString("Text", "PartOfSpeech", "NamedEntityTag")); } System.Console.Out.WriteLine(); IList <ISequenceMatchResult <ICoreMap> > answers = multiMatcher.FindNonOverlapping(tokens); int j = 0; foreach (ISequenceMatchResult <ICoreMap> matched in answers) { System.Console.Out.WriteLine(" Match #" + ++j); System.Console.Out.WriteLine(" match: " + matched.Group(0)); System.Console.Out.WriteLine(" who: " + matched.Group("$who")); System.Console.Out.WriteLine(" age: " + matched.Group("$age")); } } }