/// <summary> /// demoAPI demonstrates other ways of calling the parser with /// already tokenized text, or in some cases, raw text that needs to /// be tokenized as a single sentence. /// </summary> /// <remarks> /// demoAPI demonstrates other ways of calling the parser with /// already tokenized text, or in some cases, raw text that needs to /// be tokenized as a single sentence. Output is handled with a /// TreePrint object. Note that the options used when creating the /// TreePrint can determine what results to print out. Once again, /// one can capture the output by passing a PrintWriter to /// TreePrint.printTree. This code is for English. /// </remarks> public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words string[] sent = new string[] { "This", "is", "an", "easy", "sentence", "." }; IList <CoreLabel> rawWords = SentenceUtils.ToCoreLabelList(sent); Tree parse = lp.Apply(rawWords); parse.PennPrint(); System.Console.Out.WriteLine(); // This option shows loading and using an explicit tokenizer string sent2 = "This is another sentence."; ITokenizerFactory <CoreLabel> tokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), string.Empty); ITokenizer <CoreLabel> tok = tokenizerFactory.GetTokenizer(new StringReader(sent2)); IList <CoreLabel> rawWords2 = tok.Tokenize(); parse = lp.Apply(rawWords2); ITreebankLanguagePack tlp = lp.TreebankLanguagePack(); // PennTreebankLanguagePack for English IGrammaticalStructureFactory gsf = tlp.GrammaticalStructureFactory(); GrammaticalStructure gs = gsf.NewGrammaticalStructure(parse); IList <TypedDependency> tdl = gs.TypedDependenciesCCprocessed(); System.Console.Out.WriteLine(tdl); System.Console.Out.WriteLine(); // You can also use a TreePrint object to print trees and dependencies TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.PrintTree(parse); }
public static IList <CoreLabel> StringToIOB(string str, char segMarker) { // Whitespace tokenization IList <CoreLabel> toks = SentenceUtils.ToCoreLabelList(str.Trim().Split("\\s+")); return(StringToIOB(toks, segMarker, false)); }
public virtual void TestSimpleTrigger() { IList <TokenSequencePattern> patterns = new List <TokenSequencePattern>(); patterns.Add(TokenSequencePattern.Compile("which word should be matched")); MultiPatternMatcher.ISequencePatternTrigger <ICoreMap> trigger = new MultiPatternMatcher.BasicSequencePatternTrigger <ICoreMap>(new CoreMapNodePatternTrigger(patterns)); ICollection <SequencePattern <ICoreMap> > triggered = trigger.Apply(SentenceUtils.ToCoreLabelList("one", "two", "three")); NUnit.Framework.Assert.AreEqual(0, triggered.Count); triggered = trigger.Apply(SentenceUtils.ToCoreLabelList("which")); NUnit.Framework.Assert.AreEqual(0, triggered.Count); triggered = trigger.Apply(SentenceUtils.ToCoreLabelList("which", "word", "should", "be", "matched")); NUnit.Framework.Assert.AreEqual(1, triggered.Count); }
public virtual void TestFromList() { IList <ICoreMap> sentences = Generics.NewArrayList(); ICoreMap sentence = new ArrayCoreMap(); IList <CoreLabel> words = SentenceUtils.ToCoreLabelList("This", "is", "a", "test", "."); sentence.Set(typeof(CoreAnnotations.TokensAnnotation), words); sentences.Add(sentence); Annotation annotation = new Annotation(sentences); NUnit.Framework.Assert.AreEqual("This is a test .", annotation.ToString()); sentence.Set(typeof(CoreAnnotations.TextAnnotation), "This is a test."); annotation = new Annotation(sentences); NUnit.Framework.Assert.AreEqual("This is a test.", annotation.ToString()); }
/// <summary>Returns the input sentence for the parser.</summary> private IList <CoreLabel> GetInputSentence(Tree t) { if (op.testOptions.forceTags) { if (op.testOptions.preTag) { IList <TaggedWord> s = tagger.Apply(t.YieldWords()); if (op.testOptions.verbose) { log.Info("Guess tags: " + Arrays.ToString(Sharpen.Collections.ToArray(s))); log.Info("Gold tags: " + t.LabeledYield().ToString()); } return(SentenceUtils.ToCoreLabelList(s)); } else { if (op.testOptions.noFunctionalForcing) { List <IHasWord> s = t.TaggedYield(); foreach (IHasWord word in s) { string tag = ((IHasTag)word).Tag(); tag = tag.Split("-")[0]; ((IHasTag)word).SetTag(tag); } return(SentenceUtils.ToCoreLabelList(s)); } else { return(SentenceUtils.ToCoreLabelList(t.TaggedYield())); } } } else { return(SentenceUtils.ToCoreLabelList(t.YieldWords())); } }