private void AnnotateTokens <Token>(IList <TOKEN> tokens)
            where Token : CoreLabel
        {
            // Make a copy of the tokens before annotating because QuantifiableEntityNormalizer may change the POS too
            IList <CoreLabel> words = new List <CoreLabel>();

            foreach (CoreLabel token in tokens)
            {
                CoreLabel word = new CoreLabel();
                word.SetWord(token.Word());
                word.SetNER(token.Ner());
                word.SetTag(token.Tag());
                // copy fields potentially set by SUTime
                NumberSequenceClassifier.TransferAnnotations(token, word);
                words.Add(word);
            }
            DoOneSentence(words);
            // TODO: If collapsed is set, tokens for entities are collapsed into one node then
            // (words.size() != tokens.size() and the logic below just don't work!!!
            for (int i = 0; i < words.Count; i++)
            {
                string ner = words[i].Ner();
                tokens[i].SetNER(ner);
                tokens[i].Set(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation), words[i].Get(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation)));
            }
        }
        private CoreLabel MakeXmlToken(string tokenText, bool doNormalization, int charOffsetBegin, int charOffsetEnd)
        {
            CoreLabel token = new CoreLabel();

            token.SetOriginalText(tokenText);
            if (separatorPattern.Matcher(tokenText).Matches())
            {
                // Map to CoreNLP newline token
                tokenText = AbstractTokenizer.NewlineToken;
            }
            else
            {
                if (doNormalization && normalizeSpace)
                {
                    tokenText = tokenText.Replace(' ', '\u00A0');
                }
            }
            // change space to non-breaking space
            token.SetWord(tokenText);
            token.SetValue(tokenText);
            token.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), charOffsetBegin);
            token.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), charOffsetEnd);
            if (Verbose)
            {
                log.Info("Adding token " + token.ToShorterString());
            }
            return(token);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Only works on English, as it is hard coded for using the
        /// Morphology class, which is English-only
        /// </summary>
        public virtual IList <CoreLabel> Lemmatize <_T0>(IList <_T0> tokens)
            where _T0 : IHasWord
        {
            IList <TaggedWord> tagged;

            if (GetOp().testOptions.preTag)
            {
                IFunction <IList <IHasWord>, IList <TaggedWord> > tagger = LoadTagger();
                tagged = tagger.Apply(tokens);
            }
            else
            {
                Tree tree = Parse(tokens);
                tagged = tree.TaggedYield();
            }
            Morphology        morpha = new Morphology();
            IList <CoreLabel> lemmas = Generics.NewArrayList();

            foreach (TaggedWord token in tagged)
            {
                CoreLabel label = new CoreLabel();
                label.SetWord(token.Word());
                label.SetTag(token.Tag());
                morpha.Stem(label);
                lemmas.Add(label);
            }
            return(lemmas);
        }
Exemplo n.º 4
0
        public UnnamedDependency(string regent, string dependent)
        {
            // We store the text of the labels separately because it looks like
            // it is possible for an object to request a hash code using itself
            // in a partially reconstructed state when unserializing.  For
            // example, a TreeGraphNode might ask for the hash code of an
            // UnnamedDependency, which then uses an unfilled member of the same
            // TreeGraphNode to get the hash code.  Keeping the text of the
            // labels breaks that possible cycle.
            if (regent == null || dependent == null)
            {
                throw new ArgumentException("governor or dependent cannot be null");
            }
            CoreLabel headLabel = new CoreLabel();

            headLabel.SetValue(regent);
            headLabel.SetWord(regent);
            this.regent = headLabel;
            CoreLabel depLabel = new CoreLabel();

            depLabel.SetValue(dependent);
            depLabel.SetWord(dependent);
            this.dependent = depLabel;
            regentText     = regent;
            dependentText  = dependent;
        }
Exemplo n.º 5
0
 /// <summary>
 /// Loops back through all the datums inserted for the most recent word
 /// and inserts statistics about the word they are a part of.
 /// </summary>
 /// <remarks>
 /// Loops back through all the datums inserted for the most recent word
 /// and inserts statistics about the word they are a part of. This needs to
 /// be post hoc because the CoreLabel lists coming from testing data sets
 /// are pre-segmented (so treating each of those CoreLabels as a "word" lets
 /// us cheat and get 100% classification accuracy by just looking at whether
 /// we're at the beginning of a "word").
 /// </remarks>
 /// <param name="iobList"/>
 /// <param name="currentWord"/>
 /// <param name="wordStartIndex"/>
 private static void FillInWordStatistics(IList <CoreLabel> iobList, string currentWord, int wordStartIndex)
 {
     for (int j = wordStartIndex; j < iobList.Count; j++)
     {
         CoreLabel tok = iobList[j];
         tok.SetIndex(j - wordStartIndex);
         tok.SetWord(currentWord);
     }
 }
        private CoreLabel MkLabel(string word, string ner)
        {
            CoreLabel label = new CoreLabel();

            label.SetWord(word);
            label.SetOriginalText(word);
            label.SetNER(ner);
            return(label);
        }
Exemplo n.º 7
0
        private static CoreLabel InitCoreLabel(string token)
        {
            CoreLabel label = new CoreLabel();

            label.SetWord(token);
            label.SetValue(token);
            label.Set(typeof(CoreAnnotations.TextAnnotation), token);
            label.Set(typeof(CoreAnnotations.ValueAnnotation), token);
            return(label);
        }
Exemplo n.º 8
0
        /// <summary>Copies the CoreLabel cl with the new word part</summary>
        private static CoreLabel CopyCoreLabel(CoreLabel cl, string part, int beginPosition, int endPosition)
        {
            CoreLabel newLabel = new CoreLabel(cl);

            newLabel.SetWord(part);
            newLabel.SetValue(part);
            newLabel.SetBeginPosition(beginPosition);
            newLabel.SetEndPosition(endPosition);
            newLabel.Set(typeof(CoreAnnotations.OriginalTextAnnotation), part);
            return(newLabel);
        }
Exemplo n.º 9
0
        protected internal virtual CoreLabel MkWord(string gloss, int index)
        {
            CoreLabel w = new CoreLabel();

            w.SetWord(gloss);
            w.SetValue(gloss);
            if (index >= 0)
            {
                w.SetIndex(index);
            }
            return(w);
        }
        /// <summary>Create a dummy word, just with a given word at a given index.</summary>
        /// <remarks>
        /// Create a dummy word, just with a given word at a given index.
        /// Mostly useful for making semantic graphs.
        /// </remarks>
        public static CoreLabel MkWord(string gloss, int index)
        {
            CoreLabel w = new CoreLabel();

            w.SetWord(gloss);
            w.SetValue(gloss);
            if (index >= 0)
            {
                w.SetIndex(index);
            }
            return(w);
        }
        // Arbitrary test input.  We just need to segment something on multiple threads to reproduce
        // the issue
        private static IList <CoreLabel> CreateTestTokens()
        {
            CoreLabel token = new CoreLabel();

            token.SetWord("你好,世界");
            token.SetValue("你好,世界");
            token.Set(typeof(CoreAnnotations.ChineseSegAnnotation), "1");
            token.Set(typeof(CoreAnnotations.AnswerAnnotation), "0");
            IList <CoreLabel> labels = new List <CoreLabel>();

            labels.Add(token);
            return(labels);
        }
Exemplo n.º 12
0
        public static State InitialStateFromTaggedSentence <_T0>(IList <_T0> words)
            where _T0 : IHasWord
        {
            IList <Tree> preterminals = Generics.NewArrayList();

            for (int index = 0; index < words.Count; ++index)
            {
                IHasWord  hw = words[index];
                CoreLabel wordLabel;
                string    tag;
                if (hw is CoreLabel)
                {
                    wordLabel = (CoreLabel)hw;
                    tag       = wordLabel.Tag();
                }
                else
                {
                    wordLabel = new CoreLabel();
                    wordLabel.SetValue(hw.Word());
                    wordLabel.SetWord(hw.Word());
                    if (!(hw is IHasTag))
                    {
                        throw new ArgumentException("Expected tagged words");
                    }
                    tag = ((IHasTag)hw).Tag();
                    wordLabel.SetTag(tag);
                }
                if (tag == null)
                {
                    throw new ArgumentException("Input word not tagged");
                }
                CoreLabel tagLabel = new CoreLabel();
                tagLabel.SetValue(tag);
                // Index from 1.  Tools downstream from the parser expect that
                // Internally this parser uses the index, so we have to
                // overwrite incorrect indices if the label is already indexed
                wordLabel.SetIndex(index + 1);
                tagLabel.SetIndex(index + 1);
                LabeledScoredTreeNode wordNode = new LabeledScoredTreeNode(wordLabel);
                LabeledScoredTreeNode tagNode  = new LabeledScoredTreeNode(tagLabel);
                tagNode.AddChild(wordNode);
                // TODO: can we get away with not setting these on the wordLabel?
                wordLabel.Set(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation), wordLabel);
                wordLabel.Set(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation), tagLabel);
                tagLabel.Set(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation), wordLabel);
                tagLabel.Set(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation), tagLabel);
                preterminals.Add(tagNode);
            }
            return(new State(preterminals));
        }
Exemplo n.º 13
0
 /// <summary>Splits a compound marked by the lexer.</summary>
 private CoreLabel ProcessCompound(CoreLabel cl)
 {
     cl.Remove(typeof(CoreAnnotations.ParentAnnotation));
     string[] parts = cl.Word().ReplaceAll("-", " - ").Split("\\s+");
     foreach (string part in parts)
     {
         CoreLabel newLabel = new CoreLabel(cl);
         newLabel.SetWord(part);
         newLabel.SetValue(part);
         newLabel.Set(typeof(CoreAnnotations.OriginalTextAnnotation), part);
         compoundBuffer.Add(newLabel);
     }
     return(compoundBuffer.Remove(0));
 }
        public virtual void TestCoreLabelSetWordBehavior()
        {
            CoreLabel foo = new CoreLabel();

            foo.Set(typeof(CoreAnnotations.TextAnnotation), "foo");
            foo.Set(typeof(CoreAnnotations.PartOfSpeechAnnotation), "B");
            foo.Set(typeof(CoreAnnotations.LemmaAnnotation), "fool");
            // Lemma gets removed with word
            ArrayCoreMap copy = new ArrayCoreMap(foo);

            NUnit.Framework.Assert.AreEqual(copy, foo);
            foo.SetWord("foo");
            NUnit.Framework.Assert.AreEqual(copy, foo);
            // same word set
            foo.SetWord("bar");
            NUnit.Framework.Assert.IsFalse(copy.Equals(foo));
            // lemma removed
            foo.SetWord("foo");
            NUnit.Framework.Assert.IsFalse(copy.Equals(foo));
            // still removed
            foo.Set(typeof(CoreAnnotations.LemmaAnnotation), "fool");
            NUnit.Framework.Assert.AreEqual(copy, foo);
            // back to normal
            // Hash code is consistent
            int hashCode = foo.GetHashCode();

            NUnit.Framework.Assert.AreEqual(copy.GetHashCode(), hashCode);
            foo.SetWord("bar");
            NUnit.Framework.Assert.IsFalse(hashCode == foo.GetHashCode());
            foo.SetWord("foo");
            NUnit.Framework.Assert.IsFalse(hashCode == foo.GetHashCode());
            // Hash code doesn't care between a value of null and the key not existing
            NUnit.Framework.Assert.IsTrue(foo.Lemma() == null);
            int lemmalessHashCode = foo.GetHashCode();

            foo.Remove(typeof(CoreAnnotations.LemmaAnnotation));
            NUnit.Framework.Assert.AreEqual(lemmalessHashCode, foo.GetHashCode());
            foo.SetLemma(null);
            NUnit.Framework.Assert.AreEqual(lemmalessHashCode, foo.GetHashCode());
            foo.SetLemma("fool");
            NUnit.Framework.Assert.AreEqual(hashCode, foo.GetHashCode());
            // Check equals
            foo.SetWord("bar");
            foo.SetWord("foo");
            ArrayCoreMap nulledCopy = new ArrayCoreMap(foo);

            NUnit.Framework.Assert.AreEqual(nulledCopy, foo);
            foo.Remove(typeof(CoreAnnotations.LemmaAnnotation));
            NUnit.Framework.Assert.AreEqual(nulledCopy, foo);
        }
 public virtual void SetUp()
 {
     lock (typeof(RegexNERSequenceClassifierTest))
     {
         if (tempFile == null)
         {
             tempFile = File.CreateTempFile("regexnertest.patterns", "txt");
             FileWriter     fout = new FileWriter(tempFile);
             BufferedWriter bout = new BufferedWriter(fout);
             bout.Write("sausage\tfood\n");
             bout.Write("(avocet|curlew)(s?)\tshorebird\n");
             bout.Write("shoreline park\tpark\n");
             bout.Flush();
             fout.Close();
         }
     }
     sentences    = new List <IList <CoreLabel> >();
     NERsentences = new List <IList <CoreLabel> >();
     NUnit.Framework.Assert.AreEqual(words.Length, tags.Length);
     NUnit.Framework.Assert.AreEqual(words.Length, ner.Length);
     for (int snum = 0; snum < words.Length; ++snum)
     {
         string[] wordPieces = words[snum].Split(" ");
         string[] tagPieces  = tags[snum].Split(" ");
         string[] nerPieces  = ner[snum].Split(" ");
         NUnit.Framework.Assert.AreEqual(wordPieces.Length, tagPieces.Length);
         NUnit.Framework.Assert.AreEqual(wordPieces.Length, nerPieces.Length, "Input " + snum + " " + words[snum] + " of different length than " + ner[snum]);
         IList <CoreLabel> sentence    = new List <CoreLabel>();
         IList <CoreLabel> NERsentence = new List <CoreLabel>();
         for (int wnum = 0; wnum < wordPieces.Length; ++wnum)
         {
             CoreLabel token = new CoreLabel();
             token.SetWord(wordPieces[wnum]);
             token.SetTag(tagPieces[wnum]);
             sentence.Add(token);
             CoreLabel NERtoken = new CoreLabel();
             NERtoken.SetWord(wordPieces[wnum]);
             NERtoken.SetTag(tagPieces[wnum]);
             NERtoken.SetNER(nerPieces[wnum]);
             NERsentence.Add(NERtoken);
         }
         sentences.Add(sentence);
         NERsentences.Add(NERsentence);
     }
 }
Exemplo n.º 16
0
        /// <summary>
        /// Convert a String to a list of characters suitable for labeling in an IOB
        /// segmentation model.
        /// </summary>
        /// <param name="tokenList"/>
        /// <param name="segMarker"/>
        /// <param name="applyRewriteRules">add rewrite labels (for training data)</param>
        /// <param name="stripRewrites">
        /// revert training data to old Green and DeNero model (remove
        /// rewrite labels but still rewrite to try to preserve raw text)
        /// </param>
        /// <param name="tf">a TokenizerFactory returning ArabicTokenizers (for determining original segment boundaries)</param>
        /// <param name="origText">the original string before tokenization (for determining original segment boundaries)</param>
        public static IList <CoreLabel> StringToIOB(IList <CoreLabel> tokenList, char segMarker, bool applyRewriteRules, bool stripRewrites, ITokenizerFactory <CoreLabel> tf, string origText)
        {
            IList <CoreLabel> iobList      = new List <CoreLabel>(tokenList.Count * 7 + tokenList.Count);
            string            strSegMarker = segMarker.ToString();
            bool   addWhitespace           = false;
            int    numTokens      = tokenList.Count;
            string lastToken      = string.Empty;
            string currentWord    = string.Empty;
            int    wordStartIndex = 0;

            foreach (CoreLabel cl in tokenList)
            {
                // What type of token is this
                if (addWhitespace)
                {
                    FillInWordStatistics(iobList, currentWord, wordStartIndex);
                    currentWord    = string.Empty;
                    wordStartIndex = iobList.Count + 1;
                    iobList.Add(CreateDatum(cl, BoundaryChar, BoundarySymbol));
                    CoreLabel boundaryDatum = iobList[iobList.Count - 1];
                    boundaryDatum.SetIndex(0);
                    boundaryDatum.SetWord(string.Empty);
                    addWhitespace = false;
                }
                string             token   = cl.Word();
                IOBUtils.TokenType tokType = GetTokenType(token, strSegMarker);
                token = StripSegmentationMarkers(token, tokType);
                System.Diagnostics.Debug.Assert(token.Length != 0);
                if (ShouldNotSegment(token))
                {
                    iobList.Add(CreateDatum(cl, token, NosegSymbol));
                    addWhitespace = true;
                }
                else
                {
                    // Iterate over the characters in the token
                    TokenToDatums(iobList, cl, token, tokType, cl, lastToken, applyRewriteRules, stripRewrites, tf, origText);
                    addWhitespace = (tokType == IOBUtils.TokenType.BeginMarker || tokType == IOBUtils.TokenType.NoMarker);
                }
                currentWord += token;
                lastToken    = token;
            }
            FillInWordStatistics(iobList, currentWord, wordStartIndex);
            return(iobList);
        }
Exemplo n.º 17
0
        /// <summary>Splits a compound marked by the lexer.</summary>
        private CoreLabel ProcessCompound(CoreLabel cl)
        {
            cl.Remove(typeof(CoreAnnotations.ParentAnnotation));
            string[] parts       = pSpace.Split(pDash.Matcher(cl.Word()).ReplaceAll(" - "));
            int      lengthAccum = 0;

            foreach (string part in parts)
            {
                CoreLabel newLabel = new CoreLabel(cl);
                newLabel.SetWord(part);
                newLabel.SetValue(part);
                newLabel.SetBeginPosition(cl.BeginPosition() + lengthAccum);
                newLabel.SetEndPosition(cl.BeginPosition() + lengthAccum + part.Length);
                newLabel.Set(typeof(CoreAnnotations.OriginalTextAnnotation), part);
                compoundBuffer.Add(newLabel);
                lengthAccum += part.Length;
            }
            return(compoundBuffer.Remove(0));
        }
Exemplo n.º 18
0
        public virtual IList <CoreLabel> SegmentStringToTokenList(string line)
        {
            IList <CoreLabel> tokenList       = CollectionUtils.MakeList();
            IList <CoreLabel> labeledSequence = SegmentStringToIOB(line);

            foreach (IntPair span in IOBUtils.TokenSpansForIOB(labeledSequence))
            {
                CoreLabel token = new CoreLabel();
                string    text  = IOBUtils.IOBToString(labeledSequence, prefixMarker, suffixMarker, span.GetSource(), span.GetTarget());
                token.SetWord(text);
                token.SetValue(text);
                token.Set(typeof(CoreAnnotations.TextAnnotation), text);
                token.Set(typeof(CoreAnnotations.ArabicSegAnnotation), "1");
                int start = labeledSequence[span.GetSource()].BeginPosition();
                int end   = labeledSequence[span.GetTarget() - 1].EndPosition();
                token.SetOriginalText(Sharpen.Runtime.Substring(line, start, end));
                token.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), start);
                token.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), end);
                tokenList.Add(token);
            }
            return(tokenList);
        }
Exemplo n.º 19
0
        public UnnamedDependency(string regent, string dependent)
        {
            if (regent == null || dependent == null)
            {
                throw new ArgumentException("governor or dependent cannot be null");
            }

            var headLabel = new CoreLabel();

            headLabel.SetValue(regent);
            headLabel.SetWord(regent);
            this._regent = headLabel;

            var depLabel = new CoreLabel();

            depLabel.SetValue(dependent);
            depLabel.SetWord(dependent);
            this._dependent = depLabel;

            RegentText    = regent;
            DependentText = dependent;
        }
Exemplo n.º 20
0
 public override ILabel Label()
 {
     // TODO: move this CoreLabel construction logic somewhere appropriate
     var cLabel = new CoreLabel();
     if (this.parse.IsLeaf)
     {
         cLabel.SetWord(this.parse.Value);
         cLabel.SetBeginPosition(this.parse.Span.Start);
         cLabel.SetEndPosition(this.parse.Span.End);
         cLabel.SetValue(this.parse.Value);
     }
     else
     {
         cLabel.SetCategory(this.parse.Type);
         cLabel.SetValue(this.parse.Type);
         if (this.Depth() == 1)
         {
             cLabel.SetTag(this.parse.Type);
         }
     }
     return cLabel;
 }
Exemplo n.º 21
0
        public override ILabel Label()
        {
            // TODO: move this CoreLabel construction logic somewhere appropriate
            var cLabel = new CoreLabel();

            if (this.parse.IsLeaf)
            {
                cLabel.SetWord(this.parse.Value);
                cLabel.SetBeginPosition(this.parse.Span.Start);
                cLabel.SetEndPosition(this.parse.Span.End);
                cLabel.SetValue(this.parse.Value);
            }
            else
            {
                cLabel.SetCategory(this.parse.Type);
                cLabel.SetValue(this.parse.Type);
                if (this.Depth() == 1)
                {
                    cLabel.SetTag(this.parse.Type);
                }
            }
            return(cLabel);
        }
 public override Tree NormalizeWholeTree(Tree tree, ITreeFactory tf)
 {
     tree = tree.Prune(emptyFilter, tf).SpliceOut(aOverAFilter, tf);
     foreach (Tree t in tree)
     {
         if (t.IsLeaf())
         {
             //Strip off morphological analyses and place them in the OriginalTextAnnotation, which is
             //specified by HasContext.
             if (t.Value().Contains(MorphoFeatureSpecification.MorphoMark))
             {
                 string[] toks = t.Value().Split(MorphoFeatureSpecification.MorphoMark);
                 if (toks.Length != 2)
                 {
                     log.Err(string.Format("%s: Word contains malformed morph annotation: %s", this.GetType().FullName, t.Value()));
                 }
                 else
                 {
                     if (t.Label() is CoreLabel)
                     {
                         CoreLabel cl = (CoreLabel)t.Label();
                         cl.SetValue(string.Intern(toks[0].Trim()));
                         cl.SetWord(string.Intern(toks[0].Trim()));
                         Pair <string, string> lemmaMorph = MorphoFeatureSpecification.SplitMorphString(toks[0], toks[1]);
                         string lemma         = lemmaMorph.First();
                         string morphAnalysis = lemmaMorph.Second();
                         if (lemma.Equals(toks[0]))
                         {
                             cl.SetOriginalText(string.Intern(toks[1].Trim()));
                         }
                         else
                         {
                             // TODO(spenceg): Does this help?
                             string newLemma = lexMapper.Map(null, lemma);
                             if (newLemma == null || newLemma.Trim().IsEmpty())
                             {
                                 newLemma = lemma;
                             }
                             string newMorphAnalysis = newLemma + MorphoFeatureSpecification.LemmaMark + morphAnalysis;
                             cl.SetOriginalText(string.Intern(newMorphAnalysis));
                         }
                     }
                     else
                     {
                         log.Error(string.Format("%s: Cannot store morph analysis in non-CoreLabel: %s", this.GetType().FullName, t.Label().GetType().FullName));
                     }
                 }
             }
         }
         else
         {
             if (t.IsPreTerminal())
             {
                 if (t.Value() == null || t.Value().IsEmpty())
                 {
                     log.Warn(string.Format("%s: missing tag for %s", this.GetType().FullName, t.PennString()));
                 }
                 else
                 {
                     if (t.Label() is IHasTag)
                     {
                         ((IHasTag)t.Label()).SetTag(t.Value());
                     }
                 }
             }
             else
             {
                 //Phrasal nodes
                 // there are some nodes "/" missing preterminals.  We'll splice in a tag for these.
                 int          nk      = t.NumChildren();
                 IList <Tree> newKids = new List <Tree>(nk);
                 for (int j = 0; j < nk; j++)
                 {
                     Tree child = t.GetChild(j);
                     if (child.IsLeaf())
                     {
                         log.Warn(string.Format("%s: Splicing in DUMMYTAG for %s", this.GetType().FullName, t.ToString()));
                         newKids.Add(tf.NewTreeNode("DUMMYTAG", Java.Util.Collections.SingletonList(child)));
                     }
                     else
                     {
                         newKids.Add(child);
                     }
                 }
                 t.SetChildren(newKids);
             }
         }
     }
     //Every node in the tree has now been processed
     //
     // Additional processing for specific phrasal annotations
     //
     // special global coding for moving PRD annotation from constituent to verb tag.
     if (markPRDverb)
     {
         TregexMatcher m     = prdVerbPattern.Matcher(tree);
         Tree          match = null;
         while (m.Find())
         {
             if (m.GetMatch() != match)
             {
                 match = m.GetMatch();
                 match.Label().SetValue(match.Label().Value() + "-PRDverb");
                 Tree prd = m.GetNode("prd");
                 prd.Label().SetValue(base.NormalizeNonterminal(prd.Label().Value()));
             }
         }
     }
     //Mark *only* subjects in verb-initial clauses
     if (retainNPSbj)
     {
         TregexMatcher m = npSbjPattern.Matcher(tree);
         while (m.Find())
         {
             Tree match = m.GetMatch();
             match.Label().SetValue("NP");
         }
     }
     if (tree.IsPreTerminal())
     {
         // The whole tree is a bare tag: bad!
         string val = tree.Label().Value();
         if (val.Equals("CC") || val.StartsWith("PUNC") || val.Equals("CONJ"))
         {
             log.Warn(string.Format("%s: Bare tagged word being wrapped in FRAG %s", this.GetType().FullName, tree.PennString()));
             tree = tf.NewTreeNode("FRAG", Java.Util.Collections.SingletonList(tree));
         }
         else
         {
             log.Warn(string.Format("%s: Bare tagged word %s", this.GetType().FullName, tree.PennString()));
         }
     }
     //Add start symbol so that the root has only one sub-state. Escape any enclosing brackets.
     //If the "tree" consists entirely of enclosing brackets e.g. ((())) then this method
     //will return null. In this case, readers e.g. PennTreeReader will try to read the next tree.
     while (tree != null && (tree.Value() == null || tree.Value().IsEmpty()) && tree.NumChildren() <= 1)
     {
         tree = tree.FirstChild();
     }
     if (tree != null && !tree.Value().Equals(rootLabel))
     {
         tree = tf.NewTreeNode(rootLabel, Java.Util.Collections.SingletonList(tree));
     }
     return(tree);
 }
        public static void MungeLeaves(Tree tree, bool lemmasAsLeaves, bool addMorphoToLeaves)
        {
            IList <ILabel> labels = tree.Yield();

            foreach (ILabel label in labels)
            {
                ++nTokens;
                if (!(label is CoreLabel))
                {
                    throw new ArgumentException("Only works with CoreLabels trees");
                }
                CoreLabel coreLabel = (CoreLabel)label;
                string    lemma     = coreLabel.Lemma();
                //PTB escaping since we're going to put this in the leaf
                if (lemma == null)
                {
                    // No lemma, so just add the surface form
                    lemma = coreLabel.Word();
                }
                else
                {
                    if (lemma.Equals("("))
                    {
                        lemma = "-LRB-";
                    }
                    else
                    {
                        if (lemma.Equals(")"))
                        {
                            lemma = "-RRB-";
                        }
                    }
                }
                if (lemmasAsLeaves)
                {
                    string escapedLemma = lemma;
                    coreLabel.SetWord(escapedLemma);
                    coreLabel.SetValue(escapedLemma);
                    coreLabel.SetLemma(lemma);
                }
                if (addMorphoToLeaves)
                {
                    string morphStr = coreLabel.OriginalText();
                    if (morphStr == null || morphStr.Equals(string.Empty))
                    {
                        morphStr = MorphoFeatureSpecification.NoAnalysis;
                    }
                    else
                    {
                        ++nMorphAnalyses;
                    }
                    // Normalize punctuation analyses
                    if (morphStr.StartsWith("PONCT"))
                    {
                        morphStr = "PUNC";
                    }
                    string newLeaf = string.Format("%s%s%s%s%s", coreLabel.Value(), MorphoFeatureSpecification.MorphoMark, lemma, MorphoFeatureSpecification.LemmaMark, morphStr);
                    coreLabel.SetValue(newLeaf);
                    coreLabel.SetWord(newLeaf);
                }
            }
        }
Exemplo n.º 24
0
        /// <summary>Create an Annotation object (with a single sentence) from the given specification.</summary>
        private static Annotation ParseSentence(Optional <string> docid, Optional <int> sentenceIndex, string gloss, Func <IList <CoreLabel>, SemanticGraph> tree, Func <IList <CoreLabel>, SemanticGraph> maltTree, IList <string> words, IList <string
                                                                                                                                                                                                                                                  > lemmas, IList <string> pos, IList <string> ner, Optional <string> sentenceid)
        {
            // Error checks
            if (lemmas.Count != words.Count)
            {
                throw new ArgumentException("Array lengths don't match: " + words.Count + " vs " + lemmas.Count + " (sentence " + sentenceid.OrElse("???") + ")");
            }
            if (pos.Count != words.Count)
            {
                throw new ArgumentException("Array lengths don't match: " + words.Count + " vs " + pos.Count + " (sentence " + sentenceid.OrElse("???") + ")");
            }
            if (ner.Count != words.Count)
            {
                throw new ArgumentException("Array lengths don't match: " + words.Count + " vs " + ner.Count + " (sentence " + sentenceid.OrElse("???") + ")");
            }
            // Create structure
            IList <CoreLabel> tokens = new List <CoreLabel>(words.Count);
            int beginChar            = 0;

            for (int i = 0; i < words.Count; ++i)
            {
                CoreLabel token = new CoreLabel(12);
                token.SetWord(words[i]);
                token.SetValue(words[i]);
                token.SetBeginPosition(beginChar);
                token.SetEndPosition(beginChar + words[i].Length);
                beginChar += words[i].Length + 1;
                token.SetLemma(lemmas[i]);
                token.SetTag(pos[i]);
                token.SetNER(ner[i]);
                token.Set(typeof(CoreAnnotations.DocIDAnnotation), docid.OrElse("???"));
                token.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentenceIndex.OrElse(-1));
                token.Set(typeof(CoreAnnotations.IndexAnnotation), i + 1);
                token.Set(typeof(CoreAnnotations.TokenBeginAnnotation), i);
                token.Set(typeof(CoreAnnotations.TokenEndAnnotation), i + 1);
                tokens.Add(token);
            }
            gloss = gloss.Replace("\\n", "\n").Replace("\\t", "\t");
            ICoreMap sentence = new ArrayCoreMap(16);

            sentence.Set(typeof(CoreAnnotations.TokensAnnotation), tokens);
            SemanticGraph graph = tree.Apply(tokens);

            sentence.Set(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), graph);
            sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation), graph);
            sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation), graph);
            SemanticGraph maltGraph = maltTree.Apply(tokens);

            sentence.Set(typeof(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation), maltGraph);
            sentence.Set(typeof(CoreAnnotations.DocIDAnnotation), docid.OrElse("???"));
            sentence.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentenceIndex.OrElse(-1));
            sentence.Set(typeof(CoreAnnotations.TextAnnotation), gloss);
            sentence.Set(typeof(CoreAnnotations.TokenBeginAnnotation), 0);
            sentence.Set(typeof(CoreAnnotations.TokenEndAnnotation), tokens.Count);
            Annotation doc = new Annotation(gloss);

            doc.Set(typeof(CoreAnnotations.TokensAnnotation), tokens);
            doc.Set(typeof(CoreAnnotations.SentencesAnnotation), Java.Util.Collections.SingletonList(sentence));
            doc.Set(typeof(CoreAnnotations.DocIDAnnotation), docid.OrElse("???"));
            doc.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentenceIndex.OrElse(-1));
            return(doc);
        }
        /// <exception cref="System.IO.IOException"/>
        public static IList <ICoreMap> ParseFile(BufferedReader reader, ICollection <string> categoriesAllowed, IDictionary <string, Type> setClassForTheseLabels, bool setGoldClass, string sentIDprefix)
        {
            Pattern          startingLabelToken = Pattern.Compile("<(" + StringUtils.Join(categoriesAllowed, "|") + ")>");
            Pattern          endLabelToken      = Pattern.Compile("</(" + StringUtils.Join(categoriesAllowed, "|") + ")>");
            string           backgroundSymbol   = "O";
            IList <ICoreMap> sentences          = new List <ICoreMap>();
            int    lineNum = -1;
            string l       = null;

            while ((l = reader.ReadLine()) != null)
            {
                lineNum++;
                string[] t    = l.Split("\t", 2);
                string   id   = null;
                string   text = null;
                if (t.Length == 2)
                {
                    id   = t[0];
                    text = t[1];
                }
                else
                {
                    if (t.Length == 1)
                    {
                        text = t[0];
                        id   = lineNum.ToString();
                    }
                }
                id = sentIDprefix + id;
                DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(text));
                PTBTokenizer.PTBTokenizerFactory <CoreLabel> tokenizerFactory = PTBTokenizer.PTBTokenizerFactory.NewCoreLabelTokenizerFactory("ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false");
                dp.SetTokenizerFactory(tokenizerFactory);
                string label   = backgroundSymbol;
                int    sentNum = -1;
                foreach (IList <IHasWord> sentence in dp)
                {
                    sentNum++;
                    string            sentStr = string.Empty;
                    IList <CoreLabel> sent    = new List <CoreLabel>();
                    foreach (IHasWord tokw in sentence)
                    {
                        string  tok             = tokw.Word();
                        Matcher startingMatcher = startingLabelToken.Matcher(tok);
                        Matcher endMatcher      = endLabelToken.Matcher(tok);
                        if (startingMatcher.Matches())
                        {
                            //System.out.println("matched starting");
                            label = startingMatcher.Group(1);
                        }
                        else
                        {
                            if (endMatcher.Matches())
                            {
                                //System.out.println("matched end");
                                label = backgroundSymbol;
                            }
                            else
                            {
                                CoreLabel      c    = new CoreLabel();
                                IList <string> toks = new List <string>();
                                toks.Add(tok);
                                foreach (string toksplit in toks)
                                {
                                    sentStr += " " + toksplit;
                                    c.SetWord(toksplit);
                                    c.SetLemma(toksplit);
                                    c.SetValue(toksplit);
                                    c.Set(typeof(CoreAnnotations.TextAnnotation), toksplit);
                                    c.Set(typeof(CoreAnnotations.OriginalTextAnnotation), tok);
                                    if (setGoldClass)
                                    {
                                        c.Set(typeof(CoreAnnotations.GoldAnswerAnnotation), label);
                                    }
                                    if (setClassForTheseLabels != null && setClassForTheseLabels.Contains(label))
                                    {
                                        c.Set(setClassForTheseLabels[label], label);
                                    }
                                    sent.Add(c);
                                }
                            }
                        }
                    }
                    ICoreMap sentcm = new ArrayCoreMap();
                    sentcm.Set(typeof(CoreAnnotations.TextAnnotation), sentStr.Trim());
                    sentcm.Set(typeof(CoreAnnotations.TokensAnnotation), sent);
                    sentcm.Set(typeof(CoreAnnotations.DocIDAnnotation), id + "-" + sentNum);
                    sentences.Add(sentcm);
                }
            }
            return(sentences);
        }
Exemplo n.º 26
0
        /// <summary>
        /// Reads in a single ACE*.apf.xml file and convert it to RelationSentence
        /// objects.
        /// </summary>
        /// <remarks>
        /// Reads in a single ACE*.apf.xml file and convert it to RelationSentence
        /// objects. However, you probably should call parse() instead.
        /// </remarks>
        /// <param name="prefix">
        /// prefix of ACE filename to read (e.g.
        /// "/u/mcclosky/scr/data/ACE2005/english_test/bc/CNN_CF_20030827.1630.01"
        /// ) (no ".apf.xml" extension)
        /// </param>
        /// <returns>list of RelationSentence objects</returns>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Xml.Sax.SAXException"/>
        /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/>
        private IList <ICoreMap> ReadDocument(string prefix, Annotation corpus)
        {
            logger.Info("Reading document: " + prefix);
            IList <ICoreMap> results = new List <ICoreMap>();
            AceDocument      aceDocument;

            if (aceVersion.Equals("ACE2004"))
            {
                aceDocument = AceDocument.ParseDocument(prefix, false, aceVersion);
            }
            else
            {
                aceDocument = AceDocument.ParseDocument(prefix, false);
            }
            string docId = aceDocument.GetId();
            // map entity mention ID strings to their EntityMention counterparts
            IDictionary <string, EntityMention> entityMentionMap = Generics.NewHashMap();

            /*
             * for (int sentenceIndex = 0; sentenceIndex < aceDocument.getSentenceCount(); sentenceIndex++) {
             * List<AceToken> tokens = aceDocument.getSentence(sentenceIndex);
             * StringBuffer b = new StringBuffer();
             * for(AceToken t: tokens) b.append(t.getLiteral() + " " );
             * logger.info("SENTENCE: " + b.toString());
             * }
             */
            int tokenOffset = 0;

            for (int sentenceIndex = 0; sentenceIndex < aceDocument.GetSentenceCount(); sentenceIndex++)
            {
                IList <AceToken>  tokens      = aceDocument.GetSentence(sentenceIndex);
                IList <CoreLabel> words       = new List <CoreLabel>();
                StringBuilder     textContent = new StringBuilder();
                for (int i = 0; i < tokens.Count; i++)
                {
                    CoreLabel l = new CoreLabel();
                    l.SetWord(tokens[i].GetLiteral());
                    l.Set(typeof(CoreAnnotations.ValueAnnotation), l.Word());
                    l.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), tokens[i].GetByteStart());
                    l.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), tokens[i].GetByteEnd());
                    words.Add(l);
                    if (i > 0)
                    {
                        textContent.Append(" ");
                    }
                    textContent.Append(tokens[i].GetLiteral());
                }
                // skip "sentences" that are really just SGML tags (which come from using the RobustTokenizer)
                if (words.Count == 1)
                {
                    string word = words[0].Word();
                    if (word.StartsWith("<") && word.EndsWith(">"))
                    {
                        tokenOffset += tokens.Count;
                        continue;
                    }
                }
                ICoreMap sentence = new Annotation(textContent.ToString());
                sentence.Set(typeof(CoreAnnotations.DocIDAnnotation), docId);
                sentence.Set(typeof(CoreAnnotations.TokensAnnotation), words);
                logger.Info("Reading sentence: \"" + textContent + "\"");
                IList <AceEntityMention>   entityMentions   = aceDocument.GetEntityMentions(sentenceIndex);
                IList <AceRelationMention> relationMentions = aceDocument.GetRelationMentions(sentenceIndex);
                IList <AceEventMention>    eventMentions    = aceDocument.GetEventMentions(sentenceIndex);
                // convert entity mentions
                foreach (AceEntityMention aceEntityMention in entityMentions)
                {
                    string corefID = string.Empty;
                    foreach (string entityID in aceDocument.GetKeySetEntities())
                    {
                        AceEntity e = aceDocument.GetEntity(entityID);
                        if (e.GetMentions().Contains(aceEntityMention))
                        {
                            corefID = entityID;
                            break;
                        }
                    }
                    EntityMention convertedMention = ConvertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset, corefID);
                    //        EntityMention convertedMention = convertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset);
                    entityCounts.IncrementCount(convertedMention.GetType());
                    logger.Info("CONVERTED MENTION HEAD SPAN: " + convertedMention.GetHead());
                    logger.Info("CONVERTED ENTITY MENTION: " + convertedMention);
                    AnnotationUtils.AddEntityMention(sentence, convertedMention);
                    entityMentionMap[aceEntityMention.GetId()] = convertedMention;
                }
                // TODO: make Entity objects as needed
                // convert relation mentions
                foreach (AceRelationMention aceRelationMention in relationMentions)
                {
                    RelationMention convertedMention = ConvertAceRelationMention(aceRelationMention, docId, sentence, entityMentionMap);
                    if (convertedMention != null)
                    {
                        relationCounts.IncrementCount(convertedMention.GetType());
                        logger.Info("CONVERTED RELATION MENTION: " + convertedMention);
                        AnnotationUtils.AddRelationMention(sentence, convertedMention);
                    }
                }
                // TODO: make Relation objects
                // convert EventMentions
                foreach (AceEventMention aceEventMention in eventMentions)
                {
                    EventMention convertedMention = ConvertAceEventMention(aceEventMention, docId, sentence, entityMentionMap, tokenOffset);
                    if (convertedMention != null)
                    {
                        eventCounts.IncrementCount(convertedMention.GetType());
                        logger.Info("CONVERTED EVENT MENTION: " + convertedMention);
                        AnnotationUtils.AddEventMention(sentence, convertedMention);
                    }
                }
                // TODO: make Event objects
                results.Add(sentence);
                tokenOffset += tokens.Count;
            }
            return(results);
        }
        private Annotation ReadSentence(string docId, IEnumerator <string> lineIterator)
        {
            Annotation sentence = new Annotation(string.Empty);

            sentence.Set(typeof(CoreAnnotations.DocIDAnnotation), docId);
            sentence.Set(typeof(MachineReadingAnnotations.EntityMentionsAnnotation), new List <EntityMention>());
            // we'll need to set things like the tokens and textContent after we've
            // fully read the sentence
            // contains the full text that we've read so far
            StringBuilder textContent = new StringBuilder();
            int           tokenCount  = 0;
            // how many tokens we've seen so far
            IList <CoreLabel> tokens = new List <CoreLabel>();
            // when we've seen two blank lines in a row, this sentence is over (one
            // blank line separates the sentence and the relations
            int    numBlankLinesSeen = 0;
            string sentenceID        = null;
            // keeps tracks of entities we've seen so far for use by relations
            IDictionary <string, EntityMention> indexToEntityMention = new Dictionary <string, EntityMention>();

            while (lineIterator.MoveNext() && numBlankLinesSeen < 2)
            {
                string currentLine = lineIterator.Current;
                currentLine = currentLine.Replace("COMMA", ",");
                IList <string> pieces = StringUtils.Split(currentLine);
                string         identifier;
                int            size = pieces.Count;
                switch (size)
                {
                case 1:
                {
                    // blank line between sentences or relations
                    numBlankLinesSeen++;
                    break;
                }

                case 3:
                {
                    // relation
                    string type = pieces[2];
                    IList <ExtractionObject> args    = new List <ExtractionObject>();
                    EntityMention            entity1 = indexToEntityMention[pieces[0]];
                    EntityMention            entity2 = indexToEntityMention[pieces[1]];
                    args.Add(entity1);
                    args.Add(entity2);
                    Span span = new Span(entity1.GetExtentTokenStart(), entity2.GetExtentTokenEnd());
                    // identifier = "relation" + sentenceID + "-" + sentence.getAllRelations().size();
                    identifier = RelationMention.MakeUniqueId();
                    RelationMention relationMention = new RelationMention(identifier, sentence, span, type, null, args);
                    AnnotationUtils.AddRelationMention(sentence, relationMention);
                    break;
                }

                case 9:
                {
                    // token

                    /*
                     * Roth token lines look like this:
                     *
                     * 19 Peop 9 O NNP/NNP Jamal/Ghosheh O O O
                     */
                    // Entities may be multiple words joined by '/'; we split these up
                    IList <string> words = StringUtils.Split(pieces[5], "/");
                    //List<String> postags = StringUtils.split(pieces.get(4),"/");
                    string text = StringUtils.Join(words, " ");
                    identifier = "entity" + pieces[0] + '-' + pieces[2];
                    string nerTag = GetNormalizedNERTag(pieces[1]);
                    // entity type of the word/expression
                    if (sentenceID == null)
                    {
                        sentenceID = pieces[0];
                    }
                    if (!nerTag.Equals("O"))
                    {
                        Span extentSpan = new Span(tokenCount, tokenCount + words.Count);
                        // Temporarily sets the head span to equal the extent span.
                        // This is so the entity has a head (in particular, getValue() works) even if preprocessSentences isn't called.
                        // The head span is later modified if preprocessSentences is called.
                        EntityMention entity = new EntityMention(identifier, sentence, extentSpan, extentSpan, nerTag, null, null);
                        AnnotationUtils.AddEntityMention(sentence, entity);
                        // we can get by using these indices as strings since we only use them
                        // as a hash key
                        string index = pieces[2];
                        indexToEntityMention[index] = entity;
                    }
                    // int i =0;
                    foreach (string word in words)
                    {
                        CoreLabel label = new CoreLabel();
                        label.SetWord(word);
                        //label.setTag(postags.get(i));
                        label.Set(typeof(CoreAnnotations.TextAnnotation), word);
                        label.Set(typeof(CoreAnnotations.ValueAnnotation), word);
                        // we don't set TokenBeginAnnotation or TokenEndAnnotation since we're
                        // not keeping track of character offsets
                        tokens.Add(label);
                    }
                    // i++;
                    textContent.Append(text);
                    textContent.Append(' ');
                    tokenCount += words.Count;
                    break;
                }
                }
            }
            sentence.Set(typeof(CoreAnnotations.TextAnnotation), textContent.ToString());
            sentence.Set(typeof(CoreAnnotations.ValueAnnotation), textContent.ToString());
            sentence.Set(typeof(CoreAnnotations.TokensAnnotation), tokens);
            sentence.Set(typeof(CoreAnnotations.SentenceIDAnnotation), sentenceID);
            return(sentence);
        }
        /// <summary>Parse a sentence represented as a List of tokens.</summary>
        /// <remarks>
        /// Parse a sentence represented as a List of tokens.
        /// The text must already have been tokenized and
        /// normalized into tokens that are appropriate to the treebank
        /// which was used to train the parser.  The tokens can be of
        /// multiple types, and the list items need not be homogeneous as to type
        /// (in particular, only some words might be given tags):
        /// <ul>
        /// <li>If a token implements HasWord, then the word to be parsed is
        /// given by its word() value.</li>
        /// <li>If a token implements HasTag and the tag() value is not
        /// null or the empty String, then the parser is strongly advised to assign
        /// a part of speech tag that <i>begins</i> with this String.</li>
        /// </ul>
        /// </remarks>
        /// <param name="sentence">The sentence to parse</param>
        /// <returns>true Iff the sentence was accepted by the grammar</returns>
        /// <exception cref="System.NotSupportedException">
        /// If the Sentence is too long or
        /// of zero length or the parse
        /// otherwise fails for resource reasons
        /// </exception>
        private bool ParseInternal <_T0>(IList <_T0> sentence)
            where _T0 : IHasWord
        {
            parseSucceeded   = false;
            parseNoMemory    = false;
            parseUnparsable  = false;
            parseSkipped     = false;
            parseFallback    = false;
            whatFailed       = null;
            addedPunct       = false;
            originalSentence = sentence;
            int length = sentence.Count;

            if (length == 0)
            {
                parseSkipped = true;
                throw new NotSupportedException("Can't parse a zero-length sentence!");
            }
            IList <IHasWord> sentenceB;

            if (op.wordFunction != null)
            {
                sentenceB = Generics.NewArrayList();
                foreach (IHasWord word in originalSentence)
                {
                    if (word is ILabel)
                    {
                        ILabel label    = (ILabel)word;
                        ILabel newLabel = label.LabelFactory().NewLabel(label);
                        if (newLabel is IHasWord)
                        {
                            sentenceB.Add((IHasWord)newLabel);
                        }
                        else
                        {
                            throw new AssertionError("This should have been a HasWord");
                        }
                    }
                    else
                    {
                        if (word is IHasTag)
                        {
                            TaggedWord tw = new TaggedWord(word.Word(), ((IHasTag)word).Tag());
                            sentenceB.Add(tw);
                        }
                        else
                        {
                            sentenceB.Add(new Word(word.Word()));
                        }
                    }
                }
                foreach (IHasWord word_1 in sentenceB)
                {
                    word_1.SetWord(op.wordFunction.Apply(word_1.Word()));
                }
            }
            else
            {
                sentenceB = new List <IHasWord>(sentence);
            }
            if (op.testOptions.addMissingFinalPunctuation)
            {
                addedPunct = AddSentenceFinalPunctIfNeeded(sentenceB, length);
            }
            if (length > op.testOptions.maxLength)
            {
                parseSkipped = true;
                throw new NotSupportedException("Sentence too long: length " + length);
            }
            TreePrint   treePrint = GetTreePrint();
            PrintWriter pwOut     = op.tlpParams.Pw();

            //Insert the boundary symbol
            if (sentence[0] is CoreLabel)
            {
                CoreLabel boundary = new CoreLabel();
                boundary.SetWord(LexiconConstants.Boundary);
                boundary.SetValue(LexiconConstants.Boundary);
                boundary.SetTag(LexiconConstants.BoundaryTag);
                boundary.SetIndex(sentence.Count + 1);
                //1-based indexing used in the parser
                sentenceB.Add(boundary);
            }
            else
            {
                sentenceB.Add(new TaggedWord(LexiconConstants.Boundary, LexiconConstants.BoundaryTag));
            }
            if (Thread.Interrupted())
            {
                throw new RuntimeInterruptedException();
            }
            if (op.doPCFG)
            {
                if (!pparser.Parse(sentenceB))
                {
                    return(parseSucceeded);
                }
                if (op.testOptions.verbose)
                {
                    pwOut.Println("PParser output");
                    // getBestPCFGParse(false).pennPrint(pwOut); // with scores on nodes
                    treePrint.PrintTree(GetBestPCFGParse(false), pwOut);
                }
            }
            // without scores on nodes
            if (Thread.Interrupted())
            {
                throw new RuntimeInterruptedException();
            }
            if (op.doDep && !op.testOptions.useFastFactored)
            {
                if (!dparser.Parse(sentenceB))
                {
                    return(parseSucceeded);
                }
                // cdm nov 2006: should move these printing bits to the main printing section,
                // so don't calculate the best parse twice!
                if (op.testOptions.verbose)
                {
                    pwOut.Println("DParser output");
                    treePrint.PrintTree(dparser.GetBestParse(), pwOut);
                }
            }
            if (Thread.Interrupted())
            {
                throw new RuntimeInterruptedException();
            }
            if (op.doPCFG && op.doDep)
            {
                if (!bparser.Parse(sentenceB))
                {
                    return(parseSucceeded);
                }
                else
                {
                    parseSucceeded = true;
                }
            }
            return(true);
        }