示例#1
0
        // static demo class
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                System.Console.Error.WriteLine("TokensRegexMatcher rules file [outFile]");
                return;
            }
            string      rules = args[0];
            PrintWriter @out;

            if (args.Length > 2)
            {
                @out = new PrintWriter(args[2]);
            }
            else
            {
                @out = new PrintWriter(System.Console.Out);
            }
            StanfordCoreNLP pipeline   = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
            Annotation      annotation = new Annotation(IOUtils.SlurpFileNoExceptions(args[1]));

            pipeline.Annotate(annotation);
            // Load lines of file as TokenSequencePatterns
            IList <TokenSequencePattern> tokenSequencePatterns = new List <TokenSequencePattern>();

            foreach (string line in ObjectBank.GetLineIterator(rules))
            {
                TokenSequencePattern pattern = TokenSequencePattern.Compile(line);
                tokenSequencePatterns.Add(pattern);
            }
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            int i = 0;

            foreach (ICoreMap sentence in sentences)
            {
                IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                @out.Println("Sentence #" + ++i);
                @out.Print("  Tokens:");
                foreach (CoreLabel token in tokens)
                {
                    @out.Print(' ');
                    @out.Print(token.ToShortString("Text", "PartOfSpeech", "NamedEntityTag"));
                }
                @out.Println();
                MultiPatternMatcher <ICoreMap>           multiMatcher = TokenSequencePattern.GetMultiPatternMatcher(tokenSequencePatterns);
                IList <ISequenceMatchResult <ICoreMap> > answers      = multiMatcher.FindNonOverlapping(tokens);
                int j = 0;
                foreach (ISequenceMatchResult <ICoreMap> matched in answers)
                {
                    @out.Println("  Match #" + ++j);
                    for (int k = 0; k <= matched.GroupCount(); k++)
                    {
                        @out.Println("    group " + k + " = " + matched.Group(k));
                    }
                }
            }
            @out.Flush();
        }
 public ApplyPatternsMulti(IDictionary <string, DataInstance> sents, IList <string> sentids, IDictionary <TokenSequencePattern, E> patterns, string label, bool removeStopWordsFromSelectedPhrases, bool removePhrasesWithStopWords, ConstantsAndVariables
                           cv)
 {
     //Set<String> ignoreWords;
     this.sents          = sents;
     this.patterns       = patterns;
     multiPatternMatcher = TokenSequencePattern.GetMultiPatternMatcher(patterns.Keys);
     this.sentids        = sentids;
     this.label          = label;
     this.removeStopWordsFromSelectedPhrases = removeStopWordsFromSelectedPhrases;
     this.removePhrasesWithStopWords         = removePhrasesWithStopWords;
     this.constVars = cv;
 }
        // static main only
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            StanfordCoreNLP pipeline   = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
            Annotation      annotation = new Annotation("Casey is 21. Sally Atkinson's age is 30.");

            pipeline.Annotate(annotation);
            IList <ICoreMap>             sentences             = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            IList <TokenSequencePattern> tokenSequencePatterns = new List <TokenSequencePattern>();

            string[] patterns = new string[] { "(?$who [ ner: PERSON]+ ) /is/ (?$age [ pos: CD ] )", "(?$who [ ner: PERSON]+ ) /'s/ /age/ /is/ (?$age [ pos: CD ] )" };
            foreach (string line in patterns)
            {
                TokenSequencePattern pattern = TokenSequencePattern.Compile(line);
                tokenSequencePatterns.Add(pattern);
            }
            MultiPatternMatcher <ICoreMap> multiMatcher = TokenSequencePattern.GetMultiPatternMatcher(tokenSequencePatterns);
            int i = 0;

            foreach (ICoreMap sentence in sentences)
            {
                IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                System.Console.Out.WriteLine("Sentence #" + ++i);
                System.Console.Out.Write("  Tokens:");
                foreach (CoreLabel token in tokens)
                {
                    System.Console.Out.Write(' ');
                    System.Console.Out.Write(token.ToShortString("Text", "PartOfSpeech", "NamedEntityTag"));
                }
                System.Console.Out.WriteLine();
                IList <ISequenceMatchResult <ICoreMap> > answers = multiMatcher.FindNonOverlapping(tokens);
                int j = 0;
                foreach (ISequenceMatchResult <ICoreMap> matched in answers)
                {
                    System.Console.Out.WriteLine("  Match #" + ++j);
                    System.Console.Out.WriteLine("    match: " + matched.Group(0));
                    System.Console.Out.WriteLine("      who: " + matched.Group("$who"));
                    System.Console.Out.WriteLine("      age: " + matched.Group("$age"));
                }
            }
        }