Exemplo n.º 1
0
        /// <summary>
        /// demoAPI demonstrates other ways of calling the parser with
        /// already tokenized text, or in some cases, raw text that needs to
        /// be tokenized as a single sentence.
        /// </summary>
        /// <remarks>
        /// demoAPI demonstrates other ways of calling the parser with
        /// already tokenized text, or in some cases, raw text that needs to
        /// be tokenized as a single sentence.  Output is handled with a
        /// TreePrint object.  Note that the options used when creating the
        /// TreePrint can determine what results to print out.  Once again,
        /// one can capture the output by passing a PrintWriter to
        /// TreePrint.printTree. This code is for English.
        /// </remarks>
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            string[]          sent     = new string[] { "This", "is", "an", "easy", "sentence", "." };
            IList <CoreLabel> rawWords = SentenceUtils.ToCoreLabelList(sent);
            Tree parse = lp.Apply(rawWords);

            parse.PennPrint();
            System.Console.Out.WriteLine();
            // This option shows loading and using an explicit tokenizer
            string sent2 = "This is another sentence.";
            ITokenizerFactory <CoreLabel> tokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), string.Empty);
            ITokenizer <CoreLabel>        tok       = tokenizerFactory.GetTokenizer(new StringReader(sent2));
            IList <CoreLabel>             rawWords2 = tok.Tokenize();

            parse = lp.Apply(rawWords2);
            ITreebankLanguagePack tlp = lp.TreebankLanguagePack();
            // PennTreebankLanguagePack for English
            IGrammaticalStructureFactory gsf = tlp.GrammaticalStructureFactory();
            GrammaticalStructure         gs  = gsf.NewGrammaticalStructure(parse);
            IList <TypedDependency>      tdl = gs.TypedDependenciesCCprocessed();

            System.Console.Out.WriteLine(tdl);
            System.Console.Out.WriteLine();
            // You can also use a TreePrint object to print trees and dependencies
            TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.PrintTree(parse);
        }
Exemplo n.º 2
0
        public static IList <CoreLabel> StringToIOB(string str, char segMarker)
        {
            // Whitespace tokenization
            IList <CoreLabel> toks = SentenceUtils.ToCoreLabelList(str.Trim().Split("\\s+"));

            return(StringToIOB(toks, segMarker, false));
        }
Exemplo n.º 3
0
        public virtual void TestSimpleTrigger()
        {
            IList <TokenSequencePattern> patterns = new List <TokenSequencePattern>();

            patterns.Add(TokenSequencePattern.Compile("which word should be matched"));
            MultiPatternMatcher.ISequencePatternTrigger <ICoreMap> trigger = new MultiPatternMatcher.BasicSequencePatternTrigger <ICoreMap>(new CoreMapNodePatternTrigger(patterns));
            ICollection <SequencePattern <ICoreMap> > triggered            = trigger.Apply(SentenceUtils.ToCoreLabelList("one", "two", "three"));

            NUnit.Framework.Assert.AreEqual(0, triggered.Count);
            triggered = trigger.Apply(SentenceUtils.ToCoreLabelList("which"));
            NUnit.Framework.Assert.AreEqual(0, triggered.Count);
            triggered = trigger.Apply(SentenceUtils.ToCoreLabelList("which", "word", "should", "be", "matched"));
            NUnit.Framework.Assert.AreEqual(1, triggered.Count);
        }
Exemplo n.º 4
0
        public virtual void TestFromList()
        {
            IList <ICoreMap>  sentences = Generics.NewArrayList();
            ICoreMap          sentence  = new ArrayCoreMap();
            IList <CoreLabel> words     = SentenceUtils.ToCoreLabelList("This", "is", "a", "test", ".");

            sentence.Set(typeof(CoreAnnotations.TokensAnnotation), words);
            sentences.Add(sentence);
            Annotation annotation = new Annotation(sentences);

            NUnit.Framework.Assert.AreEqual("This is a test .", annotation.ToString());
            sentence.Set(typeof(CoreAnnotations.TextAnnotation), "This is a test.");
            annotation = new Annotation(sentences);
            NUnit.Framework.Assert.AreEqual("This is a test.", annotation.ToString());
        }
 /// <summary>Returns the input sentence for the parser.</summary>
 private IList <CoreLabel> GetInputSentence(Tree t)
 {
     if (op.testOptions.forceTags)
     {
         if (op.testOptions.preTag)
         {
             IList <TaggedWord> s = tagger.Apply(t.YieldWords());
             if (op.testOptions.verbose)
             {
                 log.Info("Guess tags: " + Arrays.ToString(Sharpen.Collections.ToArray(s)));
                 log.Info("Gold tags: " + t.LabeledYield().ToString());
             }
             return(SentenceUtils.ToCoreLabelList(s));
         }
         else
         {
             if (op.testOptions.noFunctionalForcing)
             {
                 List <IHasWord> s = t.TaggedYield();
                 foreach (IHasWord word in s)
                 {
                     string tag = ((IHasTag)word).Tag();
                     tag = tag.Split("-")[0];
                     ((IHasTag)word).SetTag(tag);
                 }
                 return(SentenceUtils.ToCoreLabelList(s));
             }
             else
             {
                 return(SentenceUtils.ToCoreLabelList(t.TaggedYield()));
             }
         }
     }
     else
     {
         return(SentenceUtils.ToCoreLabelList(t.YieldWords()));
     }
 }