Пример #1
0
 private static void RemoveDeleteSplittersFromSplitters(ITreebankLanguagePack tlp, Options op)
 {
     if (op.trainOptions.deleteSplitters != null)
     {
         IList <string> deleted = new List <string>();
         foreach (string del in op.trainOptions.deleteSplitters)
         {
             string baseDel    = tlp.BasicCategory(del);
             bool   checkBasic = del.Equals(baseDel);
             for (IEnumerator <string> it = op.trainOptions.splitters.GetEnumerator(); it.MoveNext();)
             {
                 string elem     = it.Current;
                 string baseElem = tlp.BasicCategory(elem);
                 bool   delStr   = checkBasic && baseElem.Equals(baseDel) || elem.Equals(del);
                 if (delStr)
                 {
                     it.Remove();
                     deleted.Add(elem);
                 }
             }
         }
         if (op.testOptions.verbose)
         {
             log.Info("Removed from vertical splitters: " + deleted);
         }
     }
 }
Пример #2
0
 public virtual bool Test(Tree tree)
 {
     if (tree == null)
     {
         return(false);
     }
     foreach (Tree child in tree.Children())
     {
         ILabel label = child.Label();
         string value = (label == null) ? null : label.Value();
         if (value == null)
         {
             continue;
         }
         if (pattern.Matcher(value).Matches())
         {
             return(true);
         }
         string basic = tlp.BasicCategory(value);
         if (pattern.Matcher(basic).Matches())
         {
             return(true);
         }
     }
     return(false);
 }
Пример #3
0
 /// <summary>
 /// Remove things like hyphened functional tags and equals from the
 /// end of a node label.
 /// </summary>
 protected internal virtual string CleanUpLabel(string label)
 {
     if (label == null)
     {
         return(root);
     }
     else
     {
         if (nodeCleanup == 1)
         {
             return(tlp.CategoryAndFunction(label));
         }
         else
         {
             if (nodeCleanup == 2)
             {
                 return(tlp.BasicCategory(label));
             }
             else
             {
                 return(label);
             }
         }
     }
 }
 public override ILabel TransformNonterminalLabel(Tree tree)
 {
     if (tree.Label() == null)
     {
         return(null);
     }
     return(tree.Label().LabelFactory().NewLabel(tlp.BasicCategory(tree.Label().Value())));
 }
        public virtual string Project(string tagStr)
        {
            // return tagStr;
            string ret = tlp.BasicCategory(tagStr);

            // log.info("BCTP mapped " + tagStr + " to " + ret);
            return(ret);
        }
        // static only
        /// <summary>
        /// Counts how many spans are present in goldTree, including
        /// preterminals, but not present in guessTree, along with how many
        /// spans are present in guessTree and not goldTree.
        /// </summary>
        /// <remarks>
        /// Counts how many spans are present in goldTree, including
        /// preterminals, but not present in guessTree, along with how many
        /// spans are present in guessTree and not goldTree.  Each one counts
        /// as an error, meaning that something like a mislabeled span or
        /// preterminal counts as two errors.
        /// <br />
        /// Span labels are compared using the basicCategory() function
        /// from the passed in TreebankLanguagePack.
        /// </remarks>
        public static int CountSpanErrors(ITreebankLanguagePack tlp, Tree goldTree, Tree guessTree)
        {
            ICollection <Constituent> goldConstituents        = goldTree.Constituents(LabeledConstituent.Factory());
            ICollection <Constituent> guessConstituents       = guessTree.Constituents(LabeledConstituent.Factory());
            ICollection <Constituent> simpleGoldConstituents  = SimplifyConstituents(tlp, goldConstituents);
            ICollection <Constituent> simpleGuessConstituents = SimplifyConstituents(tlp, guessConstituents);
            //System.out.println(simpleGoldConstituents);
            //System.out.println(simpleGuessConstituents);
            int errors = 0;

            foreach (Constituent gold in simpleGoldConstituents)
            {
                if (!simpleGuessConstituents.Contains(gold))
                {
                    ++errors;
                }
            }
            foreach (Constituent guess in simpleGuessConstituents)
            {
                if (!simpleGoldConstituents.Contains(guess))
                {
                    ++errors;
                }
            }
            // The spans returned by constituents() doesn't include the
            // preterminals, so we need to count those ourselves now
            IList <TaggedWord> goldWords  = goldTree.TaggedYield();
            IList <TaggedWord> guessWords = guessTree.TaggedYield();
            int len = Math.Min(goldWords.Count, guessWords.Count);

            for (int i = 0; i < len; ++i)
            {
                string goldTag  = tlp.BasicCategory(goldWords[i].Tag());
                string guessTag = tlp.BasicCategory(guessWords[i].Tag());
                if (!goldTag.Equals(guessTag))
                {
                    // we count one error for each span that is present in the
                    // gold and not in the guess, and one error for each span that
                    // is present in the guess and not the gold, so this counts as
                    // two errors
                    errors += 2;
                }
            }
            return(errors);
        }
Пример #7
0
        // pcfgPE.printGoodBad();
        private static IList <TaggedWord> CleanTags(IList <TaggedWord> twList, ITreebankLanguagePack tlp)
        {
            int sz = twList.Count;
            IList <TaggedWord> l = new List <TaggedWord>(sz);

            foreach (TaggedWord tw in twList)
            {
                TaggedWord tw2 = new TaggedWord(tw.Word(), tlp.BasicCategory(tw.Tag()));
                l.Add(tw2);
            }
            return(l);
        }
Пример #8
0
        private void PopulateTagsToBaseTags(ITreebankLanguagePack tlp)
        {
            int total = tagIndex.Size();

            tagsToBaseTags = new int[total];
            for (int i = 0; i < total; i++)
            {
                string tag     = tagIndex.Get(i);
                string baseTag = tlp.BasicCategory(tag);
                int    j       = tagIndex.AddToIndex(baseTag);
                tagsToBaseTags[i] = j;
            }
        }
Пример #9
0
 private void InsertNPinPP(Tree t)
 {
     if (tlp.BasicCategory(t.Label().Value()).Equals("PP"))
     {
         Tree[] kids = t.Children();
         int    i    = 0;
         int    j    = kids.Length - 1;
         while (i < j && prepositionTags.Contains(tlp.BasicCategory(kids[i].Label().Value())))
         {
             i++;
         }
         // i now indexes first dtr of new NP
         while (i < j && postpositionTags.Contains(tlp.BasicCategory(kids[j].Label().Value())))
         {
             j--;
         }
         // j now indexes last dtr of new NP
         if (i > j)
         {
             log.Info("##### Warning -- no NP material here!");
             return;
         }
         // there is no NP material!
         int    npKidsLength = j - i + 1;
         Tree[] npKids       = new Tree[npKidsLength];
         System.Array.Copy(kids, i, npKids, 0, npKidsLength);
         Tree   np        = t.TreeFactory().NewTreeNode(t.Label().LabelFactory().NewLabel("NP"), Arrays.AsList(npKids));
         Tree[] newPPkids = new Tree[kids.Length - npKidsLength + 1];
         System.Array.Copy(kids, 0, newPPkids, 0, i + 1);
         newPPkids[i] = np;
         System.Array.Copy(kids, j + 1, newPPkids, i + 1, kids.Length - j - 1);
         t.SetChildren(newPPkids);
         System.Console.Out.WriteLine("#### inserted NP in PP");
         t.PennPrint();
     }
 }
Пример #10
0
        private void EnsureProbs(int word, bool subtractTagScore)
        {
            if (word == lastWord)
            {
                return;
            }
            lastWord = word;
            if (functionWordTags.Contains(wordIndex.Get(word)))
            {
                logProbs = new ClassicCounter <string>();
                string trueTag = functionWordTags[wordIndex.Get(word)];
                foreach (string tag in tagIndex.ObjectsList())
                {
                    if (ctlp.BasicCategory(tag).Equals(trueTag))
                    {
                        logProbs.SetCount(tag, 0);
                    }
                    else
                    {
                        logProbs.SetCount(tag, double.NegativeInfinity);
                    }
                }
                return;
            }
            IDatum datum = new BasicDatum(featExtractor.MakeFeatures(wordIndex.Get(word)));

            logProbs = scorer.LogProbabilityOf(datum);
            if (subtractTagScore)
            {
                ICollection <string> tagSet = logProbs.KeySet();
                foreach (string tag in tagSet)
                {
                    logProbs.IncrementCount(tag, -Math.Log(tagDist.ProbabilityOf(tag)));
                }
            }
        }
            public virtual object ProcessNode(object node)
            {
                ISet s = null;

                if (node is ISet)
                {
                    s = (ISet)node;
                }
                else
                {
                    if (node is IBlock)
                    {
                        IBlock b = (IBlock)node;
                        s = b.GetMembers();
                    }
                    else
                    {
                        throw new Exception("Unexpected node class");
                    }
                }
                object sampleNode = s.GetEnumerator().Current;

                if (s.Count == 1)
                {
                    if (sampleNode is IBlock)
                    {
                        return(ProcessNode(sampleNode));
                    }
                    else
                    {
                        return(sampleNode);
                    }
                }
                // nope there's a set of things
                if (sampleNode is string)
                {
                    string str = (string)sampleNode;
                    if (str[0] != '@')
                    {
                        // passive category...
                        return(tlp.BasicCategory(str) + "-" + s.GetHashCode());
                    }
                }
                // TODO remove b/c there could be collisions
                //          return tlp.basicCategory(str) + "-" + System.identityHashCode(s);
                return("@NodeSet-" + s.GetHashCode());
            }
Пример #12
0
        // only leaves NP-TMP and NP-ADV
        protected internal virtual string CleanUpLabel(string label)
        {
            if (label == null)
            {
                return(string.Empty);
            }
            // This shouldn't really happen, but can happen if there are unlabeled nodes further down a tree, as apparently happens in at least the 20100730 era American National Corpus
            bool nptemp = TmpPattern.Matcher(label).Matches();
            bool npadv  = AdvPattern.Matcher(label).Matches();

            label = tlp.BasicCategory(label);
            if (nptemp)
            {
                label = label + "-TMP";
            }
            else
            {
                if (npadv)
                {
                    label = label + "-ADV";
                }
            }
            return(label);
        }
Пример #13
0
 public virtual void ProcessTreeHelper(string gP, string p, Tree t)
 {
     if (!t.IsLeaf() && (doTags || !t.IsPreTerminal()))
     {
         // stop at words/tags
         IDictionary <string, ClassicCounter <IList <string> > >         nr;
         IDictionary <IList <string>, ClassicCounter <IList <string> > > pr;
         IDictionary <IList <string>, ClassicCounter <IList <string> > > gpr;
         if (t.IsPreTerminal())
         {
             nr  = tagNodeRules;
             pr  = tagPRules;
             gpr = tagGPRules;
         }
         else
         {
             nr  = nodeRules;
             pr  = pRules;
             gpr = gPRules;
         }
         string n = t.Label().Value();
         if (tlp != null)
         {
             p  = tlp.BasicCategory(p);
             gP = tlp.BasicCategory(gP);
         }
         IList <string> kidn = KidLabels(t);
         ClassicCounter <IList <string> > cntr = nr[n];
         if (cntr == null)
         {
             cntr  = new ClassicCounter <IList <string> >();
             nr[n] = cntr;
         }
         cntr.IncrementCount(kidn);
         IList <string> pairStr = new List <string>(2);
         pairStr.Add(n);
         pairStr.Add(p);
         cntr = pr[pairStr];
         if (cntr == null)
         {
             cntr        = new ClassicCounter <IList <string> >();
             pr[pairStr] = cntr;
         }
         cntr.IncrementCount(kidn);
         IList <string> tripleStr = new List <string>(3);
         tripleStr.Add(n);
         tripleStr.Add(p);
         tripleStr.Add(gP);
         cntr = gpr[tripleStr];
         if (cntr == null)
         {
             cntr           = new ClassicCounter <IList <string> >();
             gpr[tripleStr] = cntr;
         }
         cntr.IncrementCount(kidn);
         Tree[] kids = t.Children();
         foreach (Tree kid in kids)
         {
             ProcessTreeHelper(p, n, kid);
         }
     }
 }
        public virtual bool Parse <_T0>(IList <_T0> sentence)
            where _T0 : IHasWord
        {
            if (op.testOptions.verbose)
            {
                Timing.Tick("Starting dependency parse.");
            }
            this.sentence = sentence;
            int length = sentence.Count;

            if (length > arraySize)
            {
                if (length > op.testOptions.maxLength + 1 || length >= myMaxLength)
                {
                    throw new OutOfMemoryException("Refusal to create such large arrays.");
                }
                else
                {
                    try
                    {
                        CreateArrays(length + 1);
                    }
                    catch (OutOfMemoryException e)
                    {
                        myMaxLength = length;
                        if (arraySize > 0)
                        {
                            try
                            {
                                CreateArrays(arraySize);
                            }
                            catch (OutOfMemoryException)
                            {
                                throw new Exception("CANNOT EVEN CREATE ARRAYS OF ORIGINAL SIZE!!! " + arraySize);
                            }
                        }
                        throw;
                    }
                    arraySize = length + 1;
                    if (op.testOptions.verbose)
                    {
                        log.Info("Created dparser arrays of size " + arraySize);
                    }
                }
            }
            if (op.testOptions.verbose)
            {
                log.Info("Initializing...");
            }
            // map to words
            words = new int[length];
            int numTags = dg.NumTagBins();

            //tagIndex.size();
            //System.out.println("\nNumTags: "+numTags);
            //System.out.println(tagIndex);
            bool[][] hasTag = new bool[length][];
            for (int i = 0; i < length; i++)
            {
                //if (wordIndex.contains(sentence.get(i).toString()))
                words[i] = wordIndex.AddToIndex(sentence[i].Word());
            }
            //else
            //words[i] = wordIndex.indexOf(Lexicon.UNKNOWN_WORD);
            for (int head = 0; head < length; head++)
            {
                for (int tag = 0; tag < numTags; tag++)
                {
                    Arrays.Fill(iScoreH[head][tag], float.NegativeInfinity);
                    Arrays.Fill(oScoreH[head][tag], float.NegativeInfinity);
                }
            }
            for (int head_1 = 0; head_1 < length; head_1++)
            {
                for (int loc = 0; loc <= length; loc++)
                {
                    rawDistance[head_1][loc] = (head_1 >= loc ? head_1 - loc : loc - head_1 - 1);
                    binDistance[head_1][loc] = dg.DistanceBin(rawDistance[head_1][loc]);
                }
            }
            if (Thread.Interrupted())
            {
                throw new RuntimeInterruptedException();
            }
            // do tags
            for (int start = 0; start + 1 <= length; start++)
            {
                //Force tags
                string trueTagStr = null;
                if (sentence[start] is IHasTag)
                {
                    trueTagStr = ((IHasTag)sentence[start]).Tag();
                    if (string.Empty.Equals(trueTagStr))
                    {
                        trueTagStr = null;
                    }
                }
                //Word context (e.g., morphosyntactic info)
                string wordContextStr = null;
                if (sentence[start] is IHasContext)
                {
                    wordContextStr = ((IHasContext)sentence[start]).OriginalText();
                    if (string.Empty.Equals(wordContextStr))
                    {
                        wordContextStr = null;
                    }
                }
                int word = words[start];
                for (IEnumerator <IntTaggedWord> taggingI = lex.RuleIteratorByWord(word, start, wordContextStr); taggingI.MoveNext();)
                {
                    IntTaggedWord tagging = taggingI.Current;
                    if (trueTagStr != null)
                    {
                        if (!tlp.BasicCategory(tagging.TagString(tagIndex)).Equals(trueTagStr))
                        {
                            continue;
                        }
                    }
                    float score = lex.Score(tagging, start, wordIndex.Get(tagging.word), wordContextStr);
                    //iScoreH[start][tag][start] = (op.dcTags ? (float)op.testOptions.depWeight*score : 0.0f);
                    if (score > float.NegativeInfinity)
                    {
                        int tag = tagging.tag;
                        iScoreH[start][dg.TagBin(tag)][start]     = 0.0f;
                        iScoreH[start][dg.TagBin(tag)][start + 1] = 0.0f;
                    }
                }
            }
            for (int hWord = 0; hWord < length; hWord++)
            {
                for (int hTag = 0; hTag < numTags; hTag++)
                {
                    hasTag[hWord][hTag] = (iScoreH[hWord][hTag][hWord] + iScoreH[hWord][hTag][hWord + 1] > float.NegativeInfinity);
                    Arrays.Fill(headStop[hWord][hTag], float.NegativeInfinity);
                    for (int aWord = 0; aWord < length; aWord++)
                    {
                        for (int dist = 0; dist < dg.NumDistBins(); dist++)
                        {
                            Arrays.Fill(headScore[dist][hWord][hTag][aWord], float.NegativeInfinity);
                        }
                    }
                }
            }
            // score and cache all pairs -- headScores and stops
            //int hit = 0;
            for (int hWord_1 = 0; hWord_1 < length; hWord_1++)
            {
                for (int hTag = 0; hTag < numTags; hTag++)
                {
                    //Arrays.fill(headStopL[hWord][hTag], Float.NEGATIVE_INFINITY);
                    //Arrays.fill(headStopR[hWord][hTag], Float.NEGATIVE_INFINITY);
                    //Arrays.fill(headStop[hWord][hTag], Float.NEGATIVE_INFINITY);
                    if (!hasTag[hWord_1][hTag])
                    {
                        continue;
                    }
                    for (int split = 0; split <= length; split++)
                    {
                        if (split <= hWord_1)
                        {
                            headStop[hWord_1][hTag][split] = (float)dg.ScoreTB(words[hWord_1], hTag, -2, -2, false, hWord_1 - split);
                        }
                        else
                        {
                            //System.out.println("headstopL " + hWord +" " + hTag + " " + split + " " + headStopL[hWord][hTag][split]); // debugging
                            headStop[hWord_1][hTag][split] = (float)dg.ScoreTB(words[hWord_1], hTag, -2, -2, true, split - hWord_1 - 1);
                        }
                    }
                    //System.out.println("headstopR " + hWord +" " + hTag + " " + split + " " + headStopR[hWord][hTag][split]); // debugging
                    //hit++;
                    //Timing.tick("hWord: "+hWord+" hTag: "+hTag+" piddle count: "+hit);
                    for (int aWord = 0; aWord < length; aWord++)
                    {
                        if (aWord == hWord_1)
                        {
                            continue;
                        }
                        // can't be argument of yourself
                        bool leftHeaded = hWord_1 < aWord;
                        int  start_1;
                        int  end;
                        if (leftHeaded)
                        {
                            start_1 = hWord_1 + 1;
                            end     = aWord + 1;
                        }
                        else
                        {
                            start_1 = aWord + 1;
                            end     = hWord_1 + 1;
                        }
                        for (int aTag = 0; aTag < numTags; aTag++)
                        {
                            if (!hasTag[aWord][aTag])
                            {
                                continue;
                            }
                            for (int split_1 = start_1; split_1 < end; split_1++)
                            {
                                // Moved this stuff out two loops- GMA
                                //              for (int split = 0; split <= length; split++) {
                                // if leftHeaded, go from hWord+1 to aWord
                                // else go from aWord+1 to hWord
                                //              if ((leftHeaded && (split <= hWord || split > aWord)) ||
                                //                      ((!leftHeaded) && (split <= aWord || split > hWord)))
                                //                continue;
                                int headDistance = rawDistance[hWord_1][split_1];
                                int binDist      = binDistance[hWord_1][split_1];
                                headScore[binDist][hWord_1][hTag][aWord][aTag] = (float)dg.ScoreTB(words[hWord_1], hTag, words[aWord], aTag, leftHeaded, headDistance);
                                //hit++;
                                // skip other splits with same binDist
                                while (split_1 + 1 < end && binDistance[hWord_1][split_1 + 1] == binDist)
                                {
                                    split_1++;
                                }
                            }
                        }
                    }
                }
            }
            // end split
            // end aTag
            // end aWord
            // end hTag
            // end hWord
            if (op.testOptions.verbose)
            {
                Timing.Tick("done.");
                // displayHeadScores();
                log.Info("Starting insides...");
            }
            // do larger spans
            for (int diff = 2; diff <= length; diff++)
            {
                if (Thread.Interrupted())
                {
                    throw new RuntimeInterruptedException();
                }
                for (int start_1 = 0; start_1 + diff <= length; start_1++)
                {
                    int end = start_1 + diff;
                    // left extension
                    int endHead = end - 1;
                    for (int endTag = 0; endTag < numTags; endTag++)
                    {
                        if (!hasTag[endHead][endTag])
                        {
                            continue;
                        }
                        // bestScore is max for iScoreH
                        float bestScore = float.NegativeInfinity;
                        for (int argHead = start_1; argHead < endHead; argHead++)
                        {
                            for (int argTag = 0; argTag < numTags; argTag++)
                            {
                                if (!hasTag[argHead][argTag])
                                {
                                    continue;
                                }
                                float argLeftScore = iScoreH[argHead][argTag][start_1];
                                if (argLeftScore == float.NegativeInfinity)
                                {
                                    continue;
                                }
                                float stopLeftScore = headStop[argHead][argTag][start_1];
                                if (stopLeftScore == float.NegativeInfinity)
                                {
                                    continue;
                                }
                                for (int split = argHead + 1; split < end; split++)
                                {
                                    // short circuit if dependency is impossible
                                    float depScore = headScore[binDistance[endHead][split]][endHead][endTag][argHead][argTag];
                                    if (depScore == float.NegativeInfinity)
                                    {
                                        continue;
                                    }
                                    float score = iScoreH[endHead][endTag][split] + argLeftScore + iScoreH[argHead][argTag][split] + depScore + stopLeftScore + headStop[argHead][argTag][split];
                                    if (score > bestScore)
                                    {
                                        bestScore = score;
                                    }
                                }
                            }
                        }
                        // end for split
                        // sum for iScoreHSum
                        // end for argTag : tags
                        // end for argHead
                        iScoreH[endHead][endTag][start_1] = bestScore;
                    }
                    // end for endTag : tags
                    // right extension
                    int startHead = start_1;
                    for (int startTag = 0; startTag < numTags; startTag++)
                    {
                        if (!hasTag[startHead][startTag])
                        {
                            continue;
                        }
                        // bestScore is max for iScoreH
                        float bestScore = float.NegativeInfinity;
                        for (int argHead = start_1 + 1; argHead < end; argHead++)
                        {
                            for (int argTag = 0; argTag < numTags; argTag++)
                            {
                                if (!hasTag[argHead][argTag])
                                {
                                    continue;
                                }
                                float argRightScore = iScoreH[argHead][argTag][end];
                                if (argRightScore == float.NegativeInfinity)
                                {
                                    continue;
                                }
                                float stopRightScore = headStop[argHead][argTag][end];
                                if (stopRightScore == float.NegativeInfinity)
                                {
                                    continue;
                                }
                                for (int split = start_1 + 1; split <= argHead; split++)
                                {
                                    // short circuit if dependency is impossible
                                    float depScore = headScore[binDistance[startHead][split]][startHead][startTag][argHead][argTag];
                                    if (depScore == float.NegativeInfinity)
                                    {
                                        continue;
                                    }
                                    float score = iScoreH[startHead][startTag][split] + iScoreH[argHead][argTag][split] + argRightScore + depScore + stopRightScore + headStop[argHead][argTag][split];
                                    if (score > bestScore)
                                    {
                                        bestScore = score;
                                    }
                                }
                            }
                        }
                        // sum for iScoreHSum
                        // end for argTag: tags
                        // end for argHead
                        iScoreH[startHead][startTag][end] = bestScore;
                    }
                }
            }
            // end for startTag: tags
            // end for start
            // end for diff (i.e., span)
            int goalTag = dg.TagBin(tagIndex.IndexOf(LexiconConstants.BoundaryTag));

            if (op.testOptions.verbose)
            {
                Timing.Tick("done.");
                log.Info("Dep  parsing " + length + " words (incl. stop): insideScore " + (iScoreH[length - 1][goalTag][0] + iScoreH[length - 1][goalTag][length]));
            }
            if (!op.doPCFG)
            {
                return(HasParse());
            }
            if (op.testOptions.verbose)
            {
                log.Info("Starting outsides...");
            }
            oScoreH[length - 1][goalTag][0]      = 0.0f;
            oScoreH[length - 1][goalTag][length] = 0.0f;
            for (int diff_1 = length; diff_1 > 1; diff_1--)
            {
                if (Thread.Interrupted())
                {
                    throw new RuntimeInterruptedException();
                }
                for (int start_1 = 0; start_1 + diff_1 <= length; start_1++)
                {
                    int end = start_1 + diff_1;
                    // left half
                    int endHead = end - 1;
                    for (int endTag = 0; endTag < numTags; endTag++)
                    {
                        if (!hasTag[endHead][endTag])
                        {
                            continue;
                        }
                        for (int argHead = start_1; argHead < endHead; argHead++)
                        {
                            for (int argTag = 0; argTag < numTags; argTag++)
                            {
                                if (!hasTag[argHead][argTag])
                                {
                                    continue;
                                }
                                for (int split = argHead; split <= endHead; split++)
                                {
                                    float subScore   = (oScoreH[endHead][endTag][start_1] + headScore[binDistance[endHead][split]][endHead][endTag][argHead][argTag] + headStop[argHead][argTag][start_1] + headStop[argHead][argTag][split]);
                                    float scoreRight = (subScore + iScoreH[argHead][argTag][start_1] + iScoreH[argHead][argTag][split]);
                                    float scoreMid   = (subScore + iScoreH[argHead][argTag][start_1] + iScoreH[endHead][endTag][split]);
                                    float scoreLeft  = (subScore + iScoreH[argHead][argTag][split] + iScoreH[endHead][endTag][split]);
                                    if (scoreRight > oScoreH[endHead][endTag][split])
                                    {
                                        oScoreH[endHead][endTag][split] = scoreRight;
                                    }
                                    if (scoreMid > oScoreH[argHead][argTag][split])
                                    {
                                        oScoreH[argHead][argTag][split] = scoreMid;
                                    }
                                    if (scoreLeft > oScoreH[argHead][argTag][start_1])
                                    {
                                        oScoreH[argHead][argTag][start_1] = scoreLeft;
                                    }
                                }
                            }
                        }
                    }
                    // right half
                    int startHead = start_1;
                    for (int startTag = 0; startTag < numTags; startTag++)
                    {
                        if (!hasTag[startHead][startTag])
                        {
                            continue;
                        }
                        for (int argHead = startHead + 1; argHead < end; argHead++)
                        {
                            for (int argTag = 0; argTag < numTags; argTag++)
                            {
                                if (!hasTag[argHead][argTag])
                                {
                                    continue;
                                }
                                for (int split = startHead + 1; split <= argHead; split++)
                                {
                                    float subScore   = (oScoreH[startHead][startTag][end] + headScore[binDistance[startHead][split]][startHead][startTag][argHead][argTag] + headStop[argHead][argTag][split] + headStop[argHead][argTag][end]);
                                    float scoreLeft  = (subScore + iScoreH[argHead][argTag][split] + iScoreH[argHead][argTag][end]);
                                    float scoreMid   = (subScore + iScoreH[startHead][startTag][split] + iScoreH[argHead][argTag][end]);
                                    float scoreRight = (subScore + iScoreH[startHead][startTag][split] + iScoreH[argHead][argTag][split]);
                                    if (scoreLeft > oScoreH[startHead][startTag][split])
                                    {
                                        oScoreH[startHead][startTag][split] = scoreLeft;
                                    }
                                    if (scoreMid > oScoreH[argHead][argTag][split])
                                    {
                                        oScoreH[argHead][argTag][split] = scoreMid;
                                    }
                                    if (scoreRight > oScoreH[argHead][argTag][end])
                                    {
                                        oScoreH[argHead][argTag][end] = scoreRight;
                                    }
                                }
                            }
                        }
                    }
                }
            }
            if (op.testOptions.verbose)
            {
                Timing.Tick("done.");
                log.Info("Starting half-filters...");
            }
            for (int loc_1 = 0; loc_1 <= length; loc_1++)
            {
                for (int head_2 = 0; head_2 < length; head_2++)
                {
                    Arrays.Fill(iPossibleByL[loc_1][head_2], false);
                    Arrays.Fill(iPossibleByR[loc_1][head_2], false);
                    Arrays.Fill(oPossibleByL[loc_1][head_2], false);
                    Arrays.Fill(oPossibleByR[loc_1][head_2], false);
                }
            }
            if (Thread.Interrupted())
            {
                throw new RuntimeInterruptedException();
            }
            for (int head_3 = 0; head_3 < length; head_3++)
            {
                for (int tag = 0; tag < numTags; tag++)
                {
                    if (!hasTag[head_3][tag])
                    {
                        continue;
                    }
                    for (int start_1 = 0; start_1 <= head_3; start_1++)
                    {
                        for (int end = head_3 + 1; end <= length; end++)
                        {
                            if (iScoreH[head_3][tag][start_1] + iScoreH[head_3][tag][end] > float.NegativeInfinity && oScoreH[head_3][tag][start_1] + oScoreH[head_3][tag][end] > float.NegativeInfinity)
                            {
                                iPossibleByR[end][head_3][tag]     = true;
                                iPossibleByL[start_1][head_3][tag] = true;
                                oPossibleByR[end][head_3][tag]     = true;
                                oPossibleByL[start_1][head_3][tag] = true;
                            }
                        }
                    }
                }
            }
            if (op.testOptions.verbose)
            {
                Timing.Tick("done.");
            }
            return(HasParse());
        }
        public virtual Tree TransformTree(Tree tree)
        {
            if (tree == null)
            {
                return(null);
            }
            ITreeFactory tf = tree.TreeFactory();
            string       s  = tree.Value();

            if (tlp.IsStartSymbol(s))
            {
                return(TransformTree(tree.FirstChild()));
            }
            if (tree.IsLeaf())
            {
                return(tf.NewLeaf(tree.Label()));
            }
            s = tlp.BasicCategory(s);
            if (((whOption & 1) != 0) && s.StartsWith("WH"))
            {
                s = Sharpen.Runtime.Substring(s, 2);
            }
            if ((whOption & 2) != 0)
            {
                s = s.ReplaceAll("^WP", "PRP");
                // does both WP and WP$ !!
                s = s.ReplaceAll("^WDT", "DT");
                s = s.ReplaceAll("^WRB", "RB");
            }
            if (((whOption & 4) != 0) && s.StartsWith("WH"))
            {
                s = Sharpen.Runtime.Substring(s, 2);
            }
            // wsg2010: Might need a better way to deal with tag ambiguity. This still doesn't handle the
            // case where the GOLD tree does not label a punctuation mark as such (common in French), and
            // the guess tree does.
            if (deletePunct && tree.IsPreTerminal() && (tlp.IsEvalBIgnoredPunctuationTag(s) || tlp.IsPunctuationWord(tree.FirstChild().Value())))
            {
                return(null);
            }
            // remove the extra NPs inserted in the collinsBaseNP option
            if (fixCollinsBaseNP && s.Equals("NP"))
            {
                Tree[] kids = tree.Children();
                if (kids.Length == 1 && tlp.BasicCategory(kids[0].Value()).Equals("NP"))
                {
                    return(TransformTree(kids[0]));
                }
            }
            // Magerman erased this distinction, and everyone else has followed like sheep...
            if (s.Equals("PRT"))
            {
                s = "ADVP";
            }
            IList <Tree> children = new List <Tree>();

            for (int cNum = 0; cNum < numKids; cNum++)
            {
                Tree child    = tree.Children()[cNum];
                Tree newChild = TransformTree(child);
                if (newChild != null)
                {
                    children.Add(newChild);
                }
            }
            if (children.IsEmpty())
            {
                return(null);
            }
            Tree node = tf.NewTreeNode(tree.Label(), children);

            node.SetValue(s);
            return(node);
        }
Пример #16
0
 public virtual string Apply(string @in)
 {
     return(tlp.BasicCategory(@in));
 }
Пример #17
0
        /// <summary>
        /// Called by determineHead and may be overridden in subclasses
        /// if special treatment is necessary for particular categories.
        /// </summary>
        /// <param name="t">The tre to determine the head daughter of</param>
        /// <param name="parent">The parent of t (or may be null)</param>
        /// <returns>The head daughter of t</returns>
        protected internal virtual Tree DetermineNonTrivialHead(Tree t, Tree parent)
        {
            Tree   theHead   = null;
            string motherCat = tlp.BasicCategory(t.Label().Value());

            if (motherCat.StartsWith("@"))
            {
                motherCat = Sharpen.Runtime.Substring(motherCat, 1);
            }
            if (Debug)
            {
                log.Info("Looking for head of " + t.Label() + "; value is |" + t.Label().Value() + "|, " + " baseCat is |" + motherCat + '|');
            }
            // We know we have nonterminals underneath
            // (a bit of a Penn Treebank assumption, but).
            // Look at label.
            // a total special case....
            // first look for POS tag at end
            // this appears to be redundant in the Collins case since the rule already would do that
            //    Tree lastDtr = t.lastChild();
            //    if (tlp.basicCategory(lastDtr.label().value()).equals("POS")) {
            //      theHead = lastDtr;
            //    } else {
            string[][] how  = nonTerminalInfo[motherCat];
            Tree[]     kids = t.Children();
            if (how == null)
            {
                if (Debug)
                {
                    log.Info("Warning: No rule found for " + motherCat + " (first char: " + motherCat[0] + ')');
                    log.Info("Known nonterms are: " + nonTerminalInfo.Keys);
                }
                if (defaultRule != null)
                {
                    if (Debug)
                    {
                        log.Info("  Using defaultRule");
                    }
                    return(TraverseLocate(kids, defaultRule, true));
                }
                else
                {
                    // TreePrint because TreeGraphNode only prints the node number,
                    // doesn't print the tree structure
                    TreePrint    printer = new TreePrint("penn");
                    StringWriter buffer  = new StringWriter();
                    printer.PrintTree(t, new PrintWriter(buffer));
                    // TODO: we could get really fancy and define our own
                    // exception class to represent this
                    throw new ArgumentException("No head rule defined for " + motherCat + " using " + this.GetType() + " in " + buffer.ToString());
                }
            }
            for (int i = 0; i < how.Length; i++)
            {
                bool lastResort = (i == how.Length - 1);
                theHead = TraverseLocate(kids, how[i], lastResort);
                if (theHead != null)
                {
                    break;
                }
            }
            if (Debug)
            {
                log.Info("  Chose " + theHead.Label());
            }
            return(theHead);
        }
Пример #18
0
        /* some documentation for Roger's convenience
         * {pcfg,dep,combo}{PE,DE,TE} are precision/dep/tagging evals for the models
         *
         * parser is the PCFG parser
         * dparser is the dependency parser
         * bparser is the combining parser
         *
         * during testing:
         * tree is the test tree (gold tree)
         * binaryTree is the gold tree binarized
         * tree2b is the best PCFG paser, binarized
         * tree2 is the best PCFG parse (debinarized)
         * tree3 is the dependency parse, binarized
         * tree3db is the dependency parser, debinarized
         * tree4 is the best combo parse, binarized and then debinarized
         * tree4b is the best combo parse, binarized
         */
        public static void Main(string[] args)
        {
            Options op = new Options(new EnglishTreebankParserParams());

            // op.tlpParams may be changed to something else later, so don't use it till
            // after options are parsed.
            StringUtils.LogInvocationString(log, args);
            string path          = "/u/nlp/stuff/corpora/Treebank3/parsed/mrg/wsj";
            int    trainLow      = 200;
            int    trainHigh     = 2199;
            int    testLow       = 2200;
            int    testHigh      = 2219;
            string serializeFile = null;
            int    i             = 0;

            while (i < args.Length && args[i].StartsWith("-"))
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-path") && (i + 1 < args.Length))
                {
                    path = args[i + 1];
                    i   += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-train") && (i + 2 < args.Length))
                    {
                        trainLow  = System.Convert.ToInt32(args[i + 1]);
                        trainHigh = System.Convert.ToInt32(args[i + 2]);
                        i        += 3;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-test") && (i + 2 < args.Length))
                        {
                            testLow  = System.Convert.ToInt32(args[i + 1]);
                            testHigh = System.Convert.ToInt32(args[i + 2]);
                            i       += 3;
                        }
                        else
                        {
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-serialize") && (i + 1 < args.Length))
                            {
                                serializeFile = args[i + 1];
                                i            += 2;
                            }
                            else
                            {
                                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tLPP") && (i + 1 < args.Length))
                                {
                                    try
                                    {
                                        op.tlpParams = (ITreebankLangParserParams)System.Activator.CreateInstance(Sharpen.Runtime.GetType(args[i + 1]));
                                    }
                                    catch (TypeLoadException e)
                                    {
                                        log.Info("Class not found: " + args[i + 1]);
                                        throw new Exception(e);
                                    }
                                    catch (InstantiationException e)
                                    {
                                        log.Info("Couldn't instantiate: " + args[i + 1] + ": " + e.ToString());
                                        throw new Exception(e);
                                    }
                                    catch (MemberAccessException e)
                                    {
                                        log.Info("illegal access" + e);
                                        throw new Exception(e);
                                    }
                                    i += 2;
                                }
                                else
                                {
                                    if (args[i].Equals("-encoding"))
                                    {
                                        // sets encoding for TreebankLangParserParams
                                        op.tlpParams.SetInputEncoding(args[i + 1]);
                                        op.tlpParams.SetOutputEncoding(args[i + 1]);
                                        i += 2;
                                    }
                                    else
                                    {
                                        i = op.SetOptionOrWarn(args, i);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // System.out.println(tlpParams.getClass());
            ITreebankLanguagePack tlp = op.tlpParams.TreebankLanguagePack();

            op.trainOptions.sisterSplitters = Generics.NewHashSet(Arrays.AsList(op.tlpParams.SisterSplitters()));
            //    BinarizerFactory.TreeAnnotator.setTreebankLang(tlpParams);
            PrintWriter pw = op.tlpParams.Pw();

            op.testOptions.Display();
            op.trainOptions.Display();
            op.Display();
            op.tlpParams.Display();
            // setup tree transforms
            Treebank       trainTreebank = op.tlpParams.MemoryTreebank();
            MemoryTreebank testTreebank  = op.tlpParams.TestMemoryTreebank();

            // Treebank blippTreebank = ((EnglishTreebankParserParams) tlpParams).diskTreebank();
            // String blippPath = "/afs/ir.stanford.edu/data/linguistic-data/BLLIP-WSJ/";
            // blippTreebank.loadPath(blippPath, "", true);
            Timing.StartTime();
            log.Info("Reading trees...");
            testTreebank.LoadPath(path, new NumberRangeFileFilter(testLow, testHigh, true));
            if (op.testOptions.increasingLength)
            {
                testTreebank.Sort(new TreeLengthComparator());
            }
            trainTreebank.LoadPath(path, new NumberRangeFileFilter(trainLow, trainHigh, true));
            Timing.Tick("done.");
            log.Info("Binarizing trees...");
            TreeAnnotatorAndBinarizer binarizer;

            if (!op.trainOptions.leftToRight)
            {
                binarizer = new TreeAnnotatorAndBinarizer(op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op);
            }
            else
            {
                binarizer = new TreeAnnotatorAndBinarizer(op.tlpParams.HeadFinder(), new LeftHeadFinder(), op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op);
            }
            CollinsPuncTransformer collinsPuncTransformer = null;

            if (op.trainOptions.collinsPunc)
            {
                collinsPuncTransformer = new CollinsPuncTransformer(tlp);
            }
            ITreeTransformer debinarizer      = new Debinarizer(op.forceCNF);
            IList <Tree>     binaryTrainTrees = new List <Tree>();

            if (op.trainOptions.selectiveSplit)
            {
                op.trainOptions.splitters = ParentAnnotationStats.GetSplitCategories(trainTreebank, op.trainOptions.tagSelectiveSplit, 0, op.trainOptions.selectiveSplitCutOff, op.trainOptions.tagSelectiveSplitCutOff, op.tlpParams.TreebankLanguagePack());
                if (op.trainOptions.deleteSplitters != null)
                {
                    IList <string> deleted = new List <string>();
                    foreach (string del in op.trainOptions.deleteSplitters)
                    {
                        string baseDel    = tlp.BasicCategory(del);
                        bool   checkBasic = del.Equals(baseDel);
                        for (IEnumerator <string> it = op.trainOptions.splitters.GetEnumerator(); it.MoveNext();)
                        {
                            string elem     = it.Current;
                            string baseElem = tlp.BasicCategory(elem);
                            bool   delStr   = checkBasic && baseElem.Equals(baseDel) || elem.Equals(del);
                            if (delStr)
                            {
                                it.Remove();
                                deleted.Add(elem);
                            }
                        }
                    }
                    log.Info("Removed from vertical splitters: " + deleted);
                }
            }
            if (op.trainOptions.selectivePostSplit)
            {
                ITreeTransformer myTransformer = new TreeAnnotator(op.tlpParams.HeadFinder(), op.tlpParams, op);
                Treebank         annotatedTB   = trainTreebank.Transform(myTransformer);
                op.trainOptions.postSplitters = ParentAnnotationStats.GetSplitCategories(annotatedTB, true, 0, op.trainOptions.selectivePostSplitCutOff, op.trainOptions.tagSelectivePostSplitCutOff, op.tlpParams.TreebankLanguagePack());
            }
            if (op.trainOptions.hSelSplit)
            {
                binarizer.SetDoSelectiveSplit(false);
                foreach (Tree tree in trainTreebank)
                {
                    if (op.trainOptions.collinsPunc)
                    {
                        tree = collinsPuncTransformer.TransformTree(tree);
                    }
                    //tree.pennPrint(tlpParams.pw());
                    tree = binarizer.TransformTree(tree);
                }
                //binaryTrainTrees.add(tree);
                binarizer.SetDoSelectiveSplit(true);
            }
            foreach (Tree tree_1 in trainTreebank)
            {
                if (op.trainOptions.collinsPunc)
                {
                    tree_1 = collinsPuncTransformer.TransformTree(tree_1);
                }
                tree_1 = binarizer.TransformTree(tree_1);
                binaryTrainTrees.Add(tree_1);
            }
            if (op.testOptions.verbose)
            {
                binarizer.DumpStats();
            }
            IList <Tree> binaryTestTrees = new List <Tree>();

            foreach (Tree tree_2 in testTreebank)
            {
                if (op.trainOptions.collinsPunc)
                {
                    tree_2 = collinsPuncTransformer.TransformTree(tree_2);
                }
                tree_2 = binarizer.TransformTree(tree_2);
                binaryTestTrees.Add(tree_2);
            }
            Timing.Tick("done.");
            // binarization
            BinaryGrammar      bg = null;
            UnaryGrammar       ug = null;
            IDependencyGrammar dg = null;
            // DependencyGrammar dgBLIPP = null;
            ILexicon        lex        = null;
            IIndex <string> stateIndex = new HashIndex <string>();
            // extract grammars
            IExtractor <Pair <UnaryGrammar, BinaryGrammar> > bgExtractor = new BinaryGrammarExtractor(op, stateIndex);

            //Extractor bgExtractor = new SmoothedBinaryGrammarExtractor();//new BinaryGrammarExtractor();
            // Extractor lexExtractor = new LexiconExtractor();
            //Extractor dgExtractor = new DependencyMemGrammarExtractor();
            if (op.doPCFG)
            {
                log.Info("Extracting PCFG...");
                Pair <UnaryGrammar, BinaryGrammar> bgug = null;
                if (op.trainOptions.cheatPCFG)
                {
                    IList <Tree> allTrees = new List <Tree>(binaryTrainTrees);
                    Sharpen.Collections.AddAll(allTrees, binaryTestTrees);
                    bgug = bgExtractor.Extract(allTrees);
                }
                else
                {
                    bgug = bgExtractor.Extract(binaryTrainTrees);
                }
                bg = bgug.second;
                bg.SplitRules();
                ug = bgug.first;
                ug.PurgeRules();
                Timing.Tick("done.");
            }
            log.Info("Extracting Lexicon...");
            IIndex <string> wordIndex = new HashIndex <string>();
            IIndex <string> tagIndex  = new HashIndex <string>();

            lex = op.tlpParams.Lex(op, wordIndex, tagIndex);
            lex.InitializeTraining(binaryTrainTrees.Count);
            lex.Train(binaryTrainTrees);
            lex.FinishTraining();
            Timing.Tick("done.");
            if (op.doDep)
            {
                log.Info("Extracting Dependencies...");
                binaryTrainTrees.Clear();
                IExtractor <IDependencyGrammar> dgExtractor = new MLEDependencyGrammarExtractor(op, wordIndex, tagIndex);
                // dgBLIPP = (DependencyGrammar) dgExtractor.extract(new ConcatenationIterator(trainTreebank.iterator(),blippTreebank.iterator()),new TransformTreeDependency(tlpParams,true));
                // DependencyGrammar dg1 = dgExtractor.extract(trainTreebank.iterator(), new TransformTreeDependency(op.tlpParams, true));
                //dgBLIPP=(DependencyGrammar)dgExtractor.extract(blippTreebank.iterator(),new TransformTreeDependency(tlpParams));
                //dg = (DependencyGrammar) dgExtractor.extract(new ConcatenationIterator(trainTreebank.iterator(),blippTreebank.iterator()),new TransformTreeDependency(tlpParams));
                // dg=new DependencyGrammarCombination(dg1,dgBLIPP,2);
                dg = dgExtractor.Extract(binaryTrainTrees);
                //uses information whether the words are known or not, discards unknown words
                Timing.Tick("done.");
                //System.out.print("Extracting Unknown Word Model...");
                //UnknownWordModel uwm = (UnknownWordModel)uwmExtractor.extract(binaryTrainTrees);
                //Timing.tick("done.");
                System.Console.Out.Write("Tuning Dependency Model...");
                dg.Tune(binaryTestTrees);
                //System.out.println("TUNE DEPS: "+tuneDeps);
                Timing.Tick("done.");
            }
            BinaryGrammar      boundBG = bg;
            UnaryGrammar       boundUG = ug;
            IGrammarProjection gp      = new NullGrammarProjection(bg, ug);

            // serialization
            if (serializeFile != null)
            {
                log.Info("Serializing parser...");
                LexicalizedParser parser = new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op);
                parser.SaveParserToSerialized(serializeFile);
                Timing.Tick("done.");
            }
            // test: pcfg-parse and output
            ExhaustivePCFGParser parser_1 = null;

            if (op.doPCFG)
            {
                parser_1 = new ExhaustivePCFGParser(boundBG, boundUG, lex, op, stateIndex, wordIndex, tagIndex);
            }
            ExhaustiveDependencyParser dparser = ((op.doDep && !op.testOptions.useFastFactored) ? new ExhaustiveDependencyParser(dg, lex, op, wordIndex, tagIndex) : null);
            IScorer scorer = (op.doPCFG ? new TwinScorer(new ProjectionScorer(parser_1, gp, op), dparser) : null);
            //Scorer scorer = parser;
            BiLexPCFGParser bparser = null;

            if (op.doPCFG && op.doDep)
            {
                bparser = (op.testOptions.useN5) ? new BiLexPCFGParser.N5BiLexPCFGParser(scorer, parser_1, dparser, bg, ug, dg, lex, op, gp, stateIndex, wordIndex, tagIndex) : new BiLexPCFGParser(scorer, parser_1, dparser, bg, ug, dg, lex, op, gp, stateIndex
                                                                                                                                                                                                    , wordIndex, tagIndex);
            }
            Evalb        pcfgPE         = new Evalb("pcfg  PE", true);
            Evalb        comboPE        = new Evalb("combo PE", true);
            AbstractEval pcfgCB         = new Evalb.CBEval("pcfg  CB", true);
            AbstractEval pcfgTE         = new TaggingEval("pcfg  TE");
            AbstractEval comboTE        = new TaggingEval("combo TE");
            AbstractEval pcfgTEnoPunct  = new TaggingEval("pcfg nopunct TE");
            AbstractEval comboTEnoPunct = new TaggingEval("combo nopunct TE");
            AbstractEval depTE          = new TaggingEval("depnd TE");
            AbstractEval depDE          = new UnlabeledAttachmentEval("depnd DE", true, null, tlp.PunctuationWordRejectFilter());
            AbstractEval comboDE        = new UnlabeledAttachmentEval("combo DE", true, null, tlp.PunctuationWordRejectFilter());

            if (op.testOptions.evalb)
            {
                EvalbFormatWriter.InitEVALBfiles(op.tlpParams);
            }
            // int[] countByLength = new int[op.testOptions.maxLength+1];
            // Use a reflection ruse, so one can run this without needing the
            // tagger.  Using a function rather than a MaxentTagger means we
            // can distribute a version of the parser that doesn't include the
            // entire tagger.
            IFunction <IList <IHasWord>, List <TaggedWord> > tagger = null;

            if (op.testOptions.preTag)
            {
                try
                {
                    Type[]   argsClass = new Type[] { typeof(string) };
                    object[] arguments = new object[] { op.testOptions.taggerSerializedFile };
                    tagger = (IFunction <IList <IHasWord>, List <TaggedWord> >)Sharpen.Runtime.GetType("edu.stanford.nlp.tagger.maxent.MaxentTagger").GetConstructor(argsClass).NewInstance(arguments);
                }
                catch (Exception e)
                {
                    log.Info(e);
                    log.Info("Warning: No pretagging of sentences will be done.");
                }
            }
            for (int tNum = 0; tNum < ttSize; tNum++)
            {
                Tree tree        = testTreebank[tNum];
                int  testTreeLen = tree_2.Yield().Count;
                if (testTreeLen > op.testOptions.maxLength)
                {
                    continue;
                }
                Tree binaryTree = binaryTestTrees[tNum];
                // countByLength[testTreeLen]++;
                System.Console.Out.WriteLine("-------------------------------------");
                System.Console.Out.WriteLine("Number: " + (tNum + 1));
                System.Console.Out.WriteLine("Length: " + testTreeLen);
                //tree.pennPrint(pw);
                // System.out.println("XXXX The binary tree is");
                // binaryTree.pennPrint(pw);
                //System.out.println("Here are the tags in the lexicon:");
                //System.out.println(lex.showTags());
                //System.out.println("Here's the tagnumberer:");
                //System.out.println(Numberer.getGlobalNumberer("tags").toString());
                long timeMil1 = Runtime.CurrentTimeMillis();
                Timing.Tick("Starting parse.");
                if (op.doPCFG)
                {
                    //log.info(op.testOptions.forceTags);
                    if (op.testOptions.forceTags)
                    {
                        if (tagger != null)
                        {
                            //System.out.println("Using a tagger to set tags");
                            //System.out.println("Tagged sentence as: " + tagger.processSentence(cutLast(wordify(binaryTree.yield()))).toString(false));
                            parser_1.Parse(AddLast(tagger.Apply(CutLast(Wordify(binaryTree.Yield())))));
                        }
                        else
                        {
                            //System.out.println("Forcing tags to match input.");
                            parser_1.Parse(CleanTags(binaryTree.TaggedYield(), tlp));
                        }
                    }
                    else
                    {
                        // System.out.println("XXXX Parsing " + binaryTree.yield());
                        parser_1.Parse(binaryTree.YieldHasWord());
                    }
                }
                //Timing.tick("Done with pcfg phase.");
                if (op.doDep)
                {
                    dparser.Parse(binaryTree.YieldHasWord());
                }
                //Timing.tick("Done with dependency phase.");
                bool bothPassed = false;
                if (op.doPCFG && op.doDep)
                {
                    bothPassed = bparser.Parse(binaryTree.YieldHasWord());
                }
                //Timing.tick("Done with combination phase.");
                long timeMil2 = Runtime.CurrentTimeMillis();
                long elapsed  = timeMil2 - timeMil1;
                log.Info("Time: " + ((int)(elapsed / 100)) / 10.00 + " sec.");
                //System.out.println("PCFG Best Parse:");
                Tree tree2b = null;
                Tree tree2  = null;
                //System.out.println("Got full best parse...");
                if (op.doPCFG)
                {
                    tree2b = parser_1.GetBestParse();
                    tree2  = debinarizer.TransformTree(tree2b);
                }
                //System.out.println("Debinarized parse...");
                //tree2.pennPrint();
                //System.out.println("DepG Best Parse:");
                Tree tree3   = null;
                Tree tree3db = null;
                if (op.doDep)
                {
                    tree3 = dparser.GetBestParse();
                    // was: but wrong Tree tree3db = debinarizer.transformTree(tree2);
                    tree3db = debinarizer.TransformTree(tree3);
                    tree3.PennPrint(pw);
                }
                //tree.pennPrint();
                //((Tree)binaryTrainTrees.get(tNum)).pennPrint();
                //System.out.println("Combo Best Parse:");
                Tree tree4 = null;
                if (op.doPCFG && op.doDep)
                {
                    try
                    {
                        tree4 = bparser.GetBestParse();
                        if (tree4 == null)
                        {
                            tree4 = tree2b;
                        }
                    }
                    catch (ArgumentNullException)
                    {
                        log.Info("Blocked, using PCFG parse!");
                        tree4 = tree2b;
                    }
                }
                if (op.doPCFG && !bothPassed)
                {
                    tree4 = tree2b;
                }
                //tree4.pennPrint();
                if (op.doDep)
                {
                    depDE.Evaluate(tree3, binaryTree, pw);
                    depTE.Evaluate(tree3db, tree_2, pw);
                }
                ITreeTransformer tc      = op.tlpParams.Collinizer();
                ITreeTransformer tcEvalb = op.tlpParams.CollinizerEvalb();
                if (op.doPCFG)
                {
                    // System.out.println("XXXX Best PCFG was: ");
                    // tree2.pennPrint();
                    // System.out.println("XXXX Transformed best PCFG is: ");
                    // tc.transformTree(tree2).pennPrint();
                    //System.out.println("True Best Parse:");
                    //tree.pennPrint();
                    //tc.transformTree(tree).pennPrint();
                    pcfgPE.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw);
                    pcfgCB.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw);
                    Tree tree4b = null;
                    if (op.doDep)
                    {
                        comboDE.Evaluate((bothPassed ? tree4 : tree3), binaryTree, pw);
                        tree4b = tree4;
                        tree4  = debinarizer.TransformTree(tree4);
                        if (op.nodePrune)
                        {
                            NodePruner np = new NodePruner(parser_1, debinarizer);
                            tree4 = np.Prune(tree4);
                        }
                        //tree4.pennPrint();
                        comboPE.Evaluate(tc.TransformTree(tree4), tc.TransformTree(tree_2), pw);
                    }
                    //pcfgTE.evaluate(tree2, tree);
                    pcfgTE.Evaluate(tcEvalb.TransformTree(tree2), tcEvalb.TransformTree(tree_2), pw);
                    pcfgTEnoPunct.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw);
                    if (op.doDep)
                    {
                        comboTE.Evaluate(tcEvalb.TransformTree(tree4), tcEvalb.TransformTree(tree_2), pw);
                        comboTEnoPunct.Evaluate(tc.TransformTree(tree4), tc.TransformTree(tree_2), pw);
                    }
                    System.Console.Out.WriteLine("PCFG only: " + parser_1.ScoreBinarizedTree(tree2b, 0));
                    //tc.transformTree(tree2).pennPrint();
                    tree2.PennPrint(pw);
                    if (op.doDep)
                    {
                        System.Console.Out.WriteLine("Combo: " + parser_1.ScoreBinarizedTree(tree4b, 0));
                        // tc.transformTree(tree4).pennPrint(pw);
                        tree4.PennPrint(pw);
                    }
                    System.Console.Out.WriteLine("Correct:" + parser_1.ScoreBinarizedTree(binaryTree, 0));

                    /*
                     * if (parser.scoreBinarizedTree(tree2b,true) < parser.scoreBinarizedTree(binaryTree,true)) {
                     * System.out.println("SCORE INVERSION");
                     * parser.validateBinarizedTree(binaryTree,0);
                     * }
                     */
                    tree_2.PennPrint(pw);
                }
                // end if doPCFG
                if (op.testOptions.evalb)
                {
                    if (op.doPCFG && op.doDep)
                    {
                        EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree4));
                    }
                    else
                    {
                        if (op.doPCFG)
                        {
                            EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree2));
                        }
                        else
                        {
                            if (op.doDep)
                            {
                                EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree3db));
                            }
                        }
                    }
                }
            }
            // end for each tree in test treebank
            if (op.testOptions.evalb)
            {
                EvalbFormatWriter.CloseEVALBfiles();
            }
            // op.testOptions.display();
            if (op.doPCFG)
            {
                pcfgPE.Display(false, pw);
                System.Console.Out.WriteLine("Grammar size: " + stateIndex.Size());
                pcfgCB.Display(false, pw);
                if (op.doDep)
                {
                    comboPE.Display(false, pw);
                }
                pcfgTE.Display(false, pw);
                pcfgTEnoPunct.Display(false, pw);
                if (op.doDep)
                {
                    comboTE.Display(false, pw);
                    comboTEnoPunct.Display(false, pw);
                }
            }
            if (op.doDep)
            {
                depTE.Display(false, pw);
                depDE.Display(false, pw);
            }
            if (op.doPCFG && op.doDep)
            {
                comboDE.Display(false, pw);
            }
        }
        public static ICollection <Constituent> SimplifyConstituents(ITreebankLanguagePack tlp, ICollection <Constituent> constituents)
        {
            ICollection <Constituent> newConstituents = new HashSet <Constituent>();

            foreach (Constituent con in constituents)
            {
                if (!(con is LabeledConstituent))
                {
                    throw new AssertionError("Unexpected constituent type " + con.GetType());
                }
                LabeledConstituent labeled = (LabeledConstituent)con;
                newConstituents.Add(new LabeledConstituent(labeled.Start(), labeled.End(), tlp.BasicCategory(labeled.Value())));
            }
            return(newConstituents);
        }