/// <exception cref="System.Exception"/> public static void Main(string[] args) { if (args.Length != 2) { log.Info("usage: java TaggerDemo2 modelFile fileToTag"); return; } MaxentTagger tagger = new MaxentTagger(args[0]); ITokenizerFactory <CoreLabel> ptbTokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep"); BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8")); PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.Console.Out, "utf-8")); DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r); documentPreprocessor.SetTokenizerFactory(ptbTokenizerFactory); foreach (IList <IHasWord> sentence in documentPreprocessor) { IList <TaggedWord> tSentence = tagger.TagSentence(sentence); pw.Println(SentenceUtils.ListToString(tSentence, false)); } // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence. IList <IHasWord> sent = SentenceUtils.ToWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", "."); IList <TaggedWord> taggedSent = tagger.TagSentence(sent); foreach (TaggedWord tw in taggedSent) { if (tw.Tag().StartsWith("JJ")) { pw.Println(tw.Word()); } } pw.Close(); }
/// <summary>Perform (possibly destructive) operations on the tree.</summary> /// <remarks>Perform (possibly destructive) operations on the tree. Do a top-down DFS on the tree.</remarks> public virtual void VisitTree(Tree tree) { if (tree == null) { return; } string yield = SentenceUtils.ListToString(tree.Yield()); if (mweDictionary.Contains(yield)) { IList <Tree> children = GetPreterminalSubtrees(tree); string newLabel = "MW" + tree.Value(); tree.SetValue(newLabel); tree.SetChildren(children); // Bottom out of the recursion return; } else { foreach (Tree subTree in tree.Children()) { if (subTree.IsPhrasal()) { // Only phrasal trees can have yields > 1!! VisitTree(subTree); } } } }
public static IList <CoreLabel> StringToIOB(string str, char segMarker) { // Whitespace tokenization IList <CoreLabel> toks = SentenceUtils.ToCoreLabelList(str.Trim().Split("\\s+")); return(StringToIOB(toks, segMarker, false)); }
public virtual void PrintSentences(IEnumerable <IList <IHasWord> > sentences, string filename) { try { PrintWriter pw = IOUtils.GetPrintWriter(filename); foreach (IList <IHasWord> sentence in sentences) { pw.Print("<s> "); // Note: Use <s sentence-id > to identify sentences string sentString = SentenceUtils.ListToString(sentence); if (sentence.Count > maxSentenceLength) { logger.Warning("Sentence length=" + sentence.Count + " is longer than maximum set length " + maxSentenceLength); logger.Warning("Long Sentence: " + sentString); } pw.Print(sentString); pw.Println(" </s>"); } pw.Close(); } catch (IOException ex) { throw new Exception(ex); } }
/// <summary> /// demoAPI demonstrates other ways of calling the parser with /// already tokenized text, or in some cases, raw text that needs to /// be tokenized as a single sentence. /// </summary> /// <remarks> /// demoAPI demonstrates other ways of calling the parser with /// already tokenized text, or in some cases, raw text that needs to /// be tokenized as a single sentence. Output is handled with a /// TreePrint object. Note that the options used when creating the /// TreePrint can determine what results to print out. Once again, /// one can capture the output by passing a PrintWriter to /// TreePrint.printTree. This code is for English. /// </remarks> public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words string[] sent = new string[] { "This", "is", "an", "easy", "sentence", "." }; IList <CoreLabel> rawWords = SentenceUtils.ToCoreLabelList(sent); Tree parse = lp.Apply(rawWords); parse.PennPrint(); System.Console.Out.WriteLine(); // This option shows loading and using an explicit tokenizer string sent2 = "This is another sentence."; ITokenizerFactory <CoreLabel> tokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), string.Empty); ITokenizer <CoreLabel> tok = tokenizerFactory.GetTokenizer(new StringReader(sent2)); IList <CoreLabel> rawWords2 = tok.Tokenize(); parse = lp.Apply(rawWords2); ITreebankLanguagePack tlp = lp.TreebankLanguagePack(); // PennTreebankLanguagePack for English IGrammaticalStructureFactory gsf = tlp.GrammaticalStructureFactory(); GrammaticalStructure gs = gsf.NewGrammaticalStructure(parse); IList <TypedDependency> tdl = gs.TypedDependenciesCCprocessed(); System.Console.Out.WriteLine(tdl); System.Console.Out.WriteLine(); // You can also use a TreePrint object to print trees and dependencies TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.PrintTree(parse); }
static void Main(string[] args) { // Loading POS Tagger var tagger = new MaxentTagger(@"Resources/english-bidirectional-distsim.tagger"); // Text for tagging //var text = @"یک روز آمدم "; var text = "hello how are you?"; IList <Tuple <string, string> > tagged = new List <Tuple <string, string> >(); var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray(); foreach (ArrayList sentence in sentences) { var taggedSentence = tagger.tagSentence(sentence); System.Console.WriteLine(SentenceUtils.listToString(taggedSentence, false)); for (int i = 0; i < taggedSentence.size(); i++) { var t = taggedSentence.toArray()[i].ToString().Split('/'); tagged.Add(Tuple.Create(t[0], t[1])); } } }
static void Main() { var jarRoot = @"C:\Users\Burds\Downloads\Stanford.NLP.NET-master (1)\Stanford.NLP.NET-master\samples\Stanford.NLP.POSTagger.CSharp\bin\Debug\stanford-postagger-2018-02-27"; var modelsDirectory = jarRoot + @"\models"; // Loading POS Tagger var tagger = new MaxentTagger(modelsDirectory + @"\english-left3words-distsim.tagger"); // Text for tagging var text = "This is a test sentence."; string[] arr = new string[10]; var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray(); string[] getType = new string[10]; foreach (ArrayList sentence in sentences) { var taggedSentence = tagger.tagSentence(sentence); Console.WriteLine(SentenceUtils.listToString(taggedSentence, false)); var data = new List <DataClass>(); for (int i = 0; i < taggedSentence.size() - 1; i++) { string myString = taggedSentence.get(i).ToString(); data.Add(new DataClass { SWord = sentence.get(i).ToString(), WType = myString.Substring(myString.IndexOf("/") + 1) }); //getType[i] = myString.Substring(myString.IndexOf("/") + 1); } } }
public void ParseEasySentence() { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = SentenceUtils.toCoreLabelList(sent); var parse = _lp.apply(rawWords); Assert.NotNull(parse); parse.pennPrint(); // This option shows loading and using an explicit tokenizer var sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); using var sent2Reader = new StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = _lp.apply(rawWords2); Assert.NotNull(parse); var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); TestContext.Out.WriteLine($"\n{tdl}\n"); var tp = new TreePrint("penn,typedDependenciesCollapsed"); Assert.NotNull(tp); tp.printTree(parse); }
/// <summary>Make a new Annotation from a List of tokenized sentences.</summary> public Annotation(IList <ICoreMap> sentences) : base() { this.Set(typeof(CoreAnnotations.SentencesAnnotation), sentences); IList <CoreLabel> tokens = new List <CoreLabel>(); StringBuilder text = new StringBuilder(); foreach (ICoreMap sentence in sentences) { IList <CoreLabel> sentenceTokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); Sharpen.Collections.AddAll(tokens, sentenceTokens); if (sentence.ContainsKey(typeof(CoreAnnotations.TextAnnotation))) { text.Append(sentence.Get(typeof(CoreAnnotations.TextAnnotation))); } else { // If there is no text in the sentence, fake it as best as we can if (text.Length > 0) { text.Append('\n'); } text.Append(SentenceUtils.ListToString(sentenceTokens)); } } this.Set(typeof(CoreAnnotations.TokensAnnotation), tokens); this.Set(typeof(CoreAnnotations.TextAnnotation), text.ToString()); }
/// <summary>Returns the string associated with the input parse tree.</summary> /// <remarks> /// Returns the string associated with the input parse tree. Traces and /// ATB-specific escape sequences (e.g., "-RRB-" for ")") are removed. /// </remarks> /// <param name="t">- A parse tree</param> /// <returns>The yield of the input parse tree</returns> public static string FlattenTree(Tree t) { t = t.Prune(emptyFilter, tf); string flatString = SentenceUtils.ListToString(t.Yield()); return(flatString); }
public virtual void Build() { LineNumberReader infile = null; PrintWriter outfile = null; string currentInfile = string.Empty; try { outfile = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFileName), "UTF-8"))); foreach (File path in pathsToData) { infile = new LineNumberReader(new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"))); currentInfile = path.GetPath(); while (infile.Ready()) { List <Word> sent = SentenceUtils.ToUntaggedList(infile.ReadLine().Split("\\s+")); foreach (Word token in sent) { Matcher hasArabic = utf8ArabicChart.Matcher(token.Word()); if (hasArabic.Find()) { token.SetWord(escaper.Apply(token.Word())); token.SetWord(lexMapper.Map(null, token.Word())); } } outfile.Println(SentenceUtils.ListToString(sent)); } toStringBuffer.Append(string.Format(" Read %d input lines from %s", infile.GetLineNumber(), path.GetPath())); } infile.Close(); } catch (UnsupportedEncodingException e) { System.Console.Error.Printf("%s: Filesystem does not support UTF-8 output\n", this.GetType().FullName); Sharpen.Runtime.PrintStackTrace(e); } catch (FileNotFoundException) { System.Console.Error.Printf("%s: Could not open %s for writing\n", this.GetType().FullName, outFileName); } catch (IOException) { System.Console.Error.Printf("%s: Error reading from %s (line %d)\n", this.GetType().FullName, currentInfile, infile.GetLineNumber()); } catch (Exception e) { System.Console.Error.Printf("%s: Input sentence from %s contains token mapped to null (line %d)\n", this.GetType().FullName, currentInfile, infile.GetLineNumber()); Sharpen.Runtime.PrintStackTrace(e); } finally { if (outfile != null) { outfile.Close(); } } }
private void TagReader(Reader reader) { var sentences = MaxentTagger.tokenizeText(reader).toArray(); Assert.NotNull(sentences); foreach (ArrayList sentence in sentences) { var tSentence = _tagger.tagSentence(sentence); TestContext.Out.WriteLine(SentenceUtils.listToString(tSentence, false)); } }
/// <summary>Reads an annotation from the given filename using the requested input.</summary> public static IList <Annotation> GetAnnotations(StanfordCoreNLP tokenizer, SentimentPipeline.Input inputFormat, string filename, bool filterUnknown) { switch (inputFormat) { case SentimentPipeline.Input.Text: { string text = IOUtils.SlurpFileNoExceptions(filename); Annotation annotation = new Annotation(text); tokenizer.Annotate(annotation); IList <Annotation> annotations = Generics.NewArrayList(); foreach (ICoreMap sentence in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { Annotation nextAnnotation = new Annotation(sentence.Get(typeof(CoreAnnotations.TextAnnotation))); nextAnnotation.Set(typeof(CoreAnnotations.SentencesAnnotation), Java.Util.Collections.SingletonList(sentence)); annotations.Add(nextAnnotation); } return(annotations); } case SentimentPipeline.Input.Trees: { IList <Tree> trees; if (filterUnknown) { trees = SentimentUtils.ReadTreesWithGoldLabels(filename); trees = SentimentUtils.FilterUnknownRoots(trees); } else { MemoryTreebank treebank = new MemoryTreebank("utf-8"); treebank.LoadPath(filename, null); trees = new List <Tree>(treebank); } IList <Annotation> annotations = Generics.NewArrayList(); foreach (Tree tree in trees) { ICoreMap sentence = new Annotation(SentenceUtils.ListToString(tree.Yield())); sentence.Set(typeof(TreeCoreAnnotations.TreeAnnotation), tree); IList <ICoreMap> sentences = Java.Util.Collections.SingletonList(sentence); Annotation annotation = new Annotation(string.Empty); annotation.Set(typeof(CoreAnnotations.SentencesAnnotation), sentences); annotations.Add(annotation); } return(annotations); } default: { throw new ArgumentException("Unknown format " + inputFormat); } } }
/// <summary>Tags the sentence s by running maxent model.</summary> /// <remarks> /// Tags the sentence s by running maxent model. Returns a sentence (List) of /// TaggedWord objects. /// </remarks> /// <param name="s">Input sentence (List). This isn't changed.</param> /// <returns>Tagged sentence</returns> public virtual List <TaggedWord> TagSentence <_T0>(IList <_T0> s, bool reuseTags) where _T0 : IHasWord { this.origWords = new List <IHasWord>(s); int sz = s.Count; this.sent = new List <string>(sz + 1); foreach (IHasWord value1 in s) { if (maxentTagger.wordFunction != null) { sent.Add(maxentTagger.wordFunction.Apply(value1.Word())); } else { sent.Add(value1.Word()); } } sent.Add(Edu.Stanford.Nlp.Tagger.Common.Tagger.EosWord); if (reuseTags) { this.originalTags = new List <string>(sz + 1); foreach (IHasWord value in s) { if (value is IHasTag) { originalTags.Add(((IHasTag)value).Tag()); } else { originalTags.Add(null); } } originalTags.Add(Edu.Stanford.Nlp.Tagger.Common.Tagger.EosTag); } size = sz + 1; if (Verbose) { log.Info("Sentence is " + SentenceUtils.ListToString(sent, false, tagSeparator)); } Init(); List <TaggedWord> result = TestTagInference(); if (maxentTagger.wordFunction != null) { for (int j = 0; j < sz; ++j) { result[j].SetWord(s[j].Word()); } } return(result); }
/// <summary>Converts a parse tree into a string of tokens.</summary> /// <remarks> /// Converts a parse tree into a string of tokens. Each token is a word and /// its POS tag separated by the delimiter specified by <code>separator</code> /// </remarks> /// <param name="t">- A parse tree</param> /// <param name="removeEscaping">- If true, remove LDC escape characters. Otherwise, leave them.</param> /// <param name="separator">Word/tag separator</param> /// <returns>A string of tagged words</returns> public static string TaggedStringFromTree(Tree t, bool removeEscaping, string separator) { t = t.Prune(emptyFilter, tf); IList <CoreLabel> taggedSentence = t.TaggedLabeledYield(); foreach (CoreLabel token in taggedSentence) { string word = (removeEscaping) ? UnEscape(token.Word()) : token.Word(); token.SetWord(word); token.SetValue(word); } return(SentenceUtils.ListToString(taggedSentence, false, separator)); }
public virtual void TestSimpleTrigger() { IList <TokenSequencePattern> patterns = new List <TokenSequencePattern>(); patterns.Add(TokenSequencePattern.Compile("which word should be matched")); MultiPatternMatcher.ISequencePatternTrigger <ICoreMap> trigger = new MultiPatternMatcher.BasicSequencePatternTrigger <ICoreMap>(new CoreMapNodePatternTrigger(patterns)); ICollection <SequencePattern <ICoreMap> > triggered = trigger.Apply(SentenceUtils.ToCoreLabelList("one", "two", "three")); NUnit.Framework.Assert.AreEqual(0, triggered.Count); triggered = trigger.Apply(SentenceUtils.ToCoreLabelList("which")); NUnit.Framework.Assert.AreEqual(0, triggered.Count); triggered = trigger.Apply(SentenceUtils.ToCoreLabelList("which", "word", "should", "be", "matched")); NUnit.Framework.Assert.AreEqual(1, triggered.Count); }
public virtual void TestInitialStateFromTagged() { string[] words = new string[] { "This", "is", "a", "short", "test", "." }; string[] tags = new string[] { "DT", "VBZ", "DT", "JJ", "NN", "." }; NUnit.Framework.Assert.AreEqual(words.Length, tags.Length); IList <TaggedWord> sentence = SentenceUtils.ToTaggedList(Arrays.AsList(words), Arrays.AsList(tags)); State state = ShiftReduceParser.InitialStateFromTaggedSentence(sentence); for (int i = 0; i < words.Length; ++i) { NUnit.Framework.Assert.AreEqual(tags[i], state.sentence[i].Value()); NUnit.Framework.Assert.AreEqual(1, state.sentence[i].Children().Length); NUnit.Framework.Assert.AreEqual(words[i], state.sentence[i].Children()[0].Value()); } }
public virtual void TestTransition() { string[] words = new string[] { "This", "is", "a", "short", "test", "." }; string[] tags = new string[] { "DT", "VBZ", "DT", "JJ", "NN", "." }; NUnit.Framework.Assert.AreEqual(words.Length, tags.Length); IList <TaggedWord> sentence = SentenceUtils.ToTaggedList(Arrays.AsList(words), Arrays.AsList(tags)); State state = ShiftReduceParser.InitialStateFromTaggedSentence(sentence); ShiftTransition shift = new ShiftTransition(); for (int i = 0; i < 3; ++i) { state = shift.Apply(state); } NUnit.Framework.Assert.AreEqual(3, state.tokenPosition); }
public virtual void TestFromList() { IList <ICoreMap> sentences = Generics.NewArrayList(); ICoreMap sentence = new ArrayCoreMap(); IList <CoreLabel> words = SentenceUtils.ToCoreLabelList("This", "is", "a", "test", "."); sentence.Set(typeof(CoreAnnotations.TokensAnnotation), words); sentences.Add(sentence); Annotation annotation = new Annotation(sentences); NUnit.Framework.Assert.AreEqual("This is a test .", annotation.ToString()); sentence.Set(typeof(CoreAnnotations.TextAnnotation), "This is a test."); annotation = new Annotation(sentences); NUnit.Framework.Assert.AreEqual("This is a test.", annotation.ToString()); }
private Tree PostProcessMWE(Tree t) { string tYield = SentenceUtils.ListToString(t.Yield()).ReplaceAll("\\s+", string.Empty); if (tYield.Matches("[\\d\\p{Punct}]*")) { IList <Tree> kids = new List <Tree>(); kids.Add(treeFactory.NewLeaf(tYield)); t = treeFactory.NewTreeNode(t.Value(), kids); } else { t.SetValue(MwePhrasal + t.Value()); } return(t); }
private static void CompareXMLResults(string input, string element, params string[] expectedResults) { List <string> results = new List <string>(); DocumentPreprocessor document = new DocumentPreprocessor(new BufferedReader(new StringReader(input)), DocumentPreprocessor.DocType.Xml); document.SetElementDelimiter(element); foreach (IList <IHasWord> sentence in document) { results.Add(SentenceUtils.ListToString(sentence)); } NUnit.Framework.Assert.AreEqual(expectedResults.Length, results.Count); for (int i = 0; i < results.Count; ++i) { NUnit.Framework.Assert.AreEqual(expectedResults[i], results[i]); } }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { if (args.Length != 2) { log.Info("usage: java TaggerDemo modelFile fileToTag"); return; } MaxentTagger tagger = new MaxentTagger(args[0]); IList <IList <IHasWord> > sentences = MaxentTagger.TokenizeText(new BufferedReader(new FileReader(args[1]))); foreach (IList <IHasWord> sentence in sentences) { IList <TaggedWord> tSentence = tagger.TagSentence(sentence); System.Console.Out.WriteLine(SentenceUtils.ListToString(tSentence, false)); } }
// static methods public static void SetLabels(Tree tree, IDictionary <string, string> labelMap, ParseAndSetLabels.MissingLabels missing, string defaultLabel, ICollection <string> unknowns) { if (tree.IsLeaf()) { return; } string text = SentenceUtils.ListToString(tree.Yield()); string label = labelMap[text]; if (label != null) { tree.Label().SetValue(label); } else { switch (missing) { case ParseAndSetLabels.MissingLabels.Fail: { throw new Exception("No label for '" + text + "'"); } case ParseAndSetLabels.MissingLabels.Default: { tree.Label().SetValue(defaultLabel); unknowns.Add(text); break; } case ParseAndSetLabels.MissingLabels.KeepOriginal: { // do nothing break; } default: { throw new ArgumentException("Unknown MissingLabels mode " + missing); } } } foreach (Tree child in tree.Children()) { SetLabels(child, labelMap, missing, defaultLabel, unknowns); } }
public virtual void TestBinarySide() { string[] words = new string[] { "This", "is", "a", "short", "test", "." }; string[] tags = new string[] { "DT", "VBZ", "DT", "JJ", "NN", "." }; NUnit.Framework.Assert.AreEqual(words.Length, tags.Length); IList <TaggedWord> sentence = SentenceUtils.ToTaggedList(Arrays.AsList(words), Arrays.AsList(tags)); State state = ShiftReduceParser.InitialStateFromTaggedSentence(sentence); ShiftTransition shift = new ShiftTransition(); state = shift.Apply(shift.Apply(state)); BinaryTransition transition = new BinaryTransition("NP", BinaryTransition.Side.Right); State next = transition.Apply(state); NUnit.Framework.Assert.AreEqual(BinaryTransition.Side.Right, ShiftReduceUtils.GetBinarySide(next.stack.Peek())); transition = new BinaryTransition("NP", BinaryTransition.Side.Left); next = transition.Apply(state); NUnit.Framework.Assert.AreEqual(BinaryTransition.Side.Left, ShiftReduceUtils.GetBinarySide(next.stack.Peek())); }
public virtual void TestArabicTokenizer() { System.Diagnostics.Debug.Assert((untokInputs.Length == tokReferences.Length)); ITokenizerFactory <CoreLabel> tf = ArabicTokenizer.AtbFactory(); tf.SetOptions("removeProMarker"); tf.SetOptions("removeSegMarker"); tf.SetOptions("removeMorphMarker"); for (int i = 0; i < untokInputs.Length; ++i) { string line = untokInputs[i]; ITokenizer <CoreLabel> tokenizer = tf.GetTokenizer(new StringReader(line)); IList <CoreLabel> tokens = tokenizer.Tokenize(); string tokenizedLine = SentenceUtils.ListToString(tokens); string reference = tokReferences[i]; NUnit.Framework.Assert.AreEqual("Tokenization deviates from reference", reference, tokenizedLine); } }
public virtual void TestSpanishDatelineSeparation() { Properties props = PropertiesUtils.AsProperties("annotators", "tokenize, cleanxml, ssplit", "tokenize.language", "es", "tokenize.options", "tokenizeNLs,ptb3Escaping=true", "ssplit.newlineIsSentenceBreak", "two", "ssplit.boundaryMultiTokenRegex" , "/\\*NL\\*/ /\\p{Lu}[-\\p{L}]+/+ ( /,/ /[-\\p{L}]+/+ )? " + "( /,/ /[1-3]?[0-9]/ /\\p{Ll}{3,3}/ )? /=LRB=/ /\\p{Lu}\\p{L}+/ /=RRB=/ /--/"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); NUnit.Framework.Assert.AreEqual(dateLineSpanishTexts.Length, dateLineSpanishTokens.Length, "Bad test data"); for (int i = 0; i < dateLineSpanishTexts.Length; i++) { Annotation document1 = new Annotation(dateLineSpanishTexts[i]); pipeline.Annotate(document1); IList <ICoreMap> sentences = document1.Get(typeof(CoreAnnotations.SentencesAnnotation)); NUnit.Framework.Assert.AreEqual(2, sentences.Count, "For " + dateLineSpanishTexts[i] + " annotation is " + document1); IList <CoreLabel> sentenceOneTokens = sentences[0].Get(typeof(CoreAnnotations.TokensAnnotation)); string sentenceOne = SentenceUtils.ListToString(sentenceOneTokens); NUnit.Framework.Assert.AreEqual(dateLineSpanishTokens[i], sentenceOne, "Bad tokens in dateline"); } }
public override void Evaluate(Tree guess, Tree gold, PrintWriter pw) { if (gold == null || guess == null) { System.Console.Error.Printf("%s: Cannot compare against a null gold or guess tree!\n", this.GetType().FullName); return; } else { if (guess.Yield().Count != gold.Yield().Count) { log.Info("Warning: yield differs:"); log.Info("Guess: " + SentenceUtils.ListToString(guess.Yield())); log.Info("Gold: " + SentenceUtils.ListToString(gold.Yield())); } } base.Evaluate(guess, gold, pw); }
public List <string> FindTag(string text, List <string> tag) { List <string> adj = new List <string>(); object[] sentences = MaxentTagger.tokenizeText(new java.io.StringReader(text)).toArray(); foreach (ArrayList sentence in sentences) { List taggedSentence = this.tagger.tagSentence(sentence); foreach (CoreLabel word in SentenceUtils.toCoreLabelList(taggedSentence).toArray()) { if (tag.Contains(word.tag())) { adj.Add(word.word()); } } } return(adj); }
public string send(string text) { string[] exampleWords = text.Split( new char[] { ' ', ',', '.', ')', '(' }, StringSplitOptions.RemoveEmptyEntries); ILemmatizer lmtz = new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English); StringBuilder sb = new StringBuilder(); foreach (string word in exampleWords) { sb.Append(LemmatizeOne(lmtz, word) + " "); } string finalstring = sb.ToString(); var jarRoot = @"E:\stanford-postagger-full-2015-12-09\stanford-postagger-full-2015-12-09"; var modelsDirectory = jarRoot + @"\models"; // Loading POS Tagger var tagger = new MaxentTagger(modelsDirectory + @"\wsj-0-18-bidirectional-nodistsim.tagger"); // Text for tagging StringBuilder str = new StringBuilder(); var sentences = MaxentTagger.tokenizeText(new java.io.StringReader(finalstring)).toArray(); foreach (ArrayList sentence in sentences) { var taggedSentence = tagger.tagSentence(sentence); string sent = SentenceUtils.listToString(taggedSentence, false); String[] tokens = sent.Split(' '); for (int i = 0; i < tokens.Length; i++) { if (tokens[i].Contains("/VB")) { str.Append(tokens[i] + " "); } } } return(str.ToString()); }
public virtual void TestTwoNewlineIsSentenceBreakTokenizeNLs() { string text = "This is \none sentence\n\nThis is not another."; Properties props = PropertiesUtils.AsProperties("annotators", "tokenize, ssplit", "tokenize.language", "en", "tokenize.options", "tokenizeNLs,invertible,ptb3Escaping=true", "ssplit.newlineIsSentenceBreak", "two"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation document1 = new Annotation(text); pipeline.Annotate(document1); IList <ICoreMap> sentences = document1.Get(typeof(CoreAnnotations.SentencesAnnotation)); NUnit.Framework.Assert.AreEqual(2, sentences.Count); // make sure that there are the correct # of tokens (does contain NL tokens) IList <CoreLabel> tokens = document1.Get(typeof(CoreAnnotations.TokensAnnotation)); NUnit.Framework.Assert.AreEqual(9, tokens.Count); IList <CoreLabel> sentenceTwoTokens = sentences[1].Get(typeof(CoreAnnotations.TokensAnnotation)); string sentenceTwo = SentenceUtils.ListToString(sentenceTwoTokens); NUnit.Framework.Assert.AreEqual("This is not another .", sentenceTwo, "Bad tokens in sentence"); }