/// <summary>Adds dependencies to list depList.</summary>
        /// <remarks>
        /// Adds dependencies to list depList.  These are in terms of the original
        /// tag set not the reduced (projected) tag set.
        /// </remarks>
        protected internal static MLEDependencyGrammar.EndHead TreeToDependencyHelper(Tree tree, IList <IntDependency> depList, int loc, IIndex <string> wordIndex, IIndex <string> tagIndex)
        {
            //       try {
            //  PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out,"GB18030"),true);
            //  tree.pennPrint(pw);
            //       }
            //       catch (UnsupportedEncodingException e) {}
            if (tree.IsLeaf() || tree.IsPreTerminal())
            {
                MLEDependencyGrammar.EndHead tempEndHead = new MLEDependencyGrammar.EndHead();
                tempEndHead.head = loc;
                tempEndHead.end  = loc + 1;
                return(tempEndHead);
            }
            Tree[] kids = tree.Children();
            if (kids.Length == 1)
            {
                return(TreeToDependencyHelper(kids[0], depList, loc, wordIndex, tagIndex));
            }
            MLEDependencyGrammar.EndHead tempEndHead_1 = TreeToDependencyHelper(kids[0], depList, loc, wordIndex, tagIndex);
            int lHead = tempEndHead_1.head;
            int split = tempEndHead_1.end;

            tempEndHead_1 = TreeToDependencyHelper(kids[1], depList, tempEndHead_1.end, wordIndex, tagIndex);
            int           end        = tempEndHead_1.end;
            int           rHead      = tempEndHead_1.head;
            string        hTag       = ((IHasTag)tree.Label()).Tag();
            string        lTag       = ((IHasTag)kids[0].Label()).Tag();
            string        rTag       = ((IHasTag)kids[1].Label()).Tag();
            string        hWord      = ((IHasWord)tree.Label()).Word();
            string        lWord      = ((IHasWord)kids[0].Label()).Word();
            string        rWord      = ((IHasWord)kids[1].Label()).Word();
            bool          leftHeaded = hWord.Equals(lWord);
            string        aTag       = (leftHeaded ? rTag : lTag);
            string        aWord      = (leftHeaded ? rWord : lWord);
            int           hT         = tagIndex.IndexOf(hTag);
            int           aT         = tagIndex.IndexOf(aTag);
            int           hW         = (wordIndex.Contains(hWord) ? wordIndex.IndexOf(hWord) : wordIndex.IndexOf(LexiconConstants.UnknownWord));
            int           aW         = (wordIndex.Contains(aWord) ? wordIndex.IndexOf(aWord) : wordIndex.IndexOf(LexiconConstants.UnknownWord));
            int           head       = (leftHeaded ? lHead : rHead);
            int           arg        = (leftHeaded ? rHead : lHead);
            IntDependency dependency = new IntDependency(hW, hT, aW, aT, leftHeaded, (leftHeaded ? split - head - 1 : head - split));

            depList.Add(dependency);
            IntDependency stopL = new IntDependency(aW, aT, IntTaggedWord.StopWordInt, IntTaggedWord.StopTagInt, false, (leftHeaded ? arg - split : arg - loc));

            depList.Add(stopL);
            IntDependency stopR = new IntDependency(aW, aT, IntTaggedWord.StopWordInt, IntTaggedWord.StopTagInt, true, (leftHeaded ? end - arg - 1 : split - arg - 1));

            depList.Add(stopR);
            //System.out.println("Adding: "+dependency+" at "+tree.label());
            tempEndHead_1.head = head;
            return(tempEndHead_1);
        }
 /// <summary>The dependency arg is still in the full tag space.</summary>
 /// <param name="dependency">An opbserved dependency</param>
 /// <param name="count">The weight of the dependency</param>
 protected internal virtual void ExpandDependency(IntDependency dependency, double count)
 {
     //if (Test.prunePunc && pruneTW(dependency.arg))
     //  return;
     if (dependency.head == null || dependency.arg == null)
     {
         return;
     }
     if (dependency.arg.word != IntTaggedWord.StopWordInt)
     {
         ExpandArg(dependency, ValenceBin(dependency.distance), count);
     }
     ExpandStop(dependency, DistanceBin(dependency.distance), count, true);
 }
Exemplo n.º 3
0
        /// <summary>
        /// This is a custom interner that simultaneously creates and interns
        /// an IntDependency.
        /// </summary>
        /// <returns>An interned IntDependency</returns>
        protected internal virtual IntDependency Intern(IntTaggedWord headTW, IntTaggedWord argTW, bool leftHeaded, short dist)
        {
            IDictionary <IntDependency, IntDependency> map = expandDependencyMap;
            IntDependency internTempDependency             = new IntDependency(itwInterner.Intern(headTW), itwInterner.Intern(argTW), leftHeaded, dist);
            IntDependency returnDependency = internTempDependency;

            if (map != null)
            {
                returnDependency = map[internTempDependency];
                if (returnDependency == null)
                {
                    map[internTempDependency] = internTempDependency;
                    returnDependency          = internTempDependency;
                }
            }
            return(returnDependency);
        }
        /// <summary>Add this dependency with the given count to the grammar.</summary>
        /// <remarks>
        /// Add this dependency with the given count to the grammar.
        /// This is the main entry point of MLEDependencyGrammarExtractor.
        /// This is a dependency represented in the full tag space.
        /// </remarks>
        public virtual void AddRule(IntDependency dependency, double count)
        {
            if (!directional)
            {
                dependency = new IntDependency(dependency.head, dependency.arg, false, dependency.distance);
            }
            //    coreDependencies.incrementCount(dependency, count);

            /*new IntDependency(dependency.head.word,
             * dependency.head.tag,
             * dependency.arg.word,
             * dependency.arg.tag,
             * dependency.leftHeaded,
             * dependency.distance), count);
             */
            ExpandDependency(dependency, count);
        }
        /// <summary>
        /// Populates data in this DependencyGrammar from the character stream
        /// given by the Reader r.
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        public override void ReadData(BufferedReader @in)
        {
            string Left    = "left";
            int    lineNum = 1;
            // all lines have one rule per line
            bool doingStop = false;

            for (string line = @in.ReadLine(); line != null && line.Length > 0; line = @in.ReadLine())
            {
                try
                {
                    if (line.Equals("BEGIN_STOP"))
                    {
                        doingStop = true;
                        continue;
                    }
                    string[] fields = StringUtils.SplitOnCharWithQuoting(line, ' ', '\"', '\\');
                    // split on spaces, quote with doublequote, and escape with backslash
                    //        System.out.println("fields:\n" + fields[0] + "\n" + fields[1] + "\n" + fields[2] + "\n" + fields[3] + "\n" + fields[4] + "\n" + fields[5]);
                    short         distance       = (short)System.Convert.ToInt32(fields[4]);
                    IntTaggedWord tempHead       = new IntTaggedWord(fields[0], '/', wordIndex, tagIndex);
                    IntTaggedWord tempArg        = new IntTaggedWord(fields[2], '/', wordIndex, tagIndex);
                    IntDependency tempDependency = new IntDependency(tempHead, tempArg, fields[3].Equals(Left), distance);
                    double        count          = double.Parse(fields[5]);
                    if (doingStop)
                    {
                        ExpandStop(tempDependency, distance, count, false);
                    }
                    else
                    {
                        ExpandArg(tempDependency, distance, count);
                    }
                }
                catch (Exception e)
                {
                    IOException ioe = new IOException("Error on line " + lineNum + ": " + line);
                    ioe.InitCause(e);
                    throw ioe;
                }
                //      System.out.println("read line " + lineNum + ": " + line);
                lineNum++;
            }
        }
        private void ExpandStop(IntDependency dependency, short distBinDist, double count, bool wildForStop)
        {
            IntTaggedWord headT = GetCachedITW(dependency.head.tag);
            IntTaggedWord head  = new IntTaggedWord(dependency.head.word, TagBin(dependency.head.tag));
            //dependency.head;
            IntTaggedWord arg = new IntTaggedWord(dependency.arg.word, TagBin(dependency.arg.tag));
            //dependency.arg;
            bool leftHeaded = dependency.leftHeaded;

            if (arg.word == IntTaggedWord.StopWordInt)
            {
                stopCounter.IncrementCount(Intern(head, arg, leftHeaded, distBinDist), count);
                stopCounter.IncrementCount(Intern(headT, arg, leftHeaded, distBinDist), count);
            }
            if (wildForStop || arg.word != IntTaggedWord.StopWordInt)
            {
                stopCounter.IncrementCount(Intern(head, wildTW, leftHeaded, distBinDist), count);
                stopCounter.IncrementCount(Intern(headT, wildTW, leftHeaded, distBinDist), count);
            }
        }
        /// <summary>
        /// Return the probability (as a real number between 0 and 1) of stopping
        /// rather than generating another argument at this position.
        /// </summary>
        /// <param name="dependency">
        /// The dependency used as the basis for stopping on.
        /// Tags are assumed to be in the TagProjection space.
        /// </param>
        /// <returns>The probability of generating this stop probability</returns>
        protected internal virtual double GetStopProb(IntDependency dependency)
        {
            short         binDistance  = DistanceBin(dependency.distance);
            IntTaggedWord unknownHead  = new IntTaggedWord(-1, dependency.head.tag);
            IntTaggedWord anyHead      = new IntTaggedWord(IntTaggedWord.AnyWordInt, dependency.head.tag);
            IntDependency temp         = new IntDependency(dependency.head, stopTW, dependency.leftHeaded, binDistance);
            double        c_stop_hTWds = stopCounter.GetCount(temp);

            temp = new IntDependency(unknownHead, stopTW, dependency.leftHeaded, binDistance);
            double c_stop_hTds = stopCounter.GetCount(temp);

            temp = new IntDependency(dependency.head, wildTW, dependency.leftHeaded, binDistance);
            double c_hTWds = stopCounter.GetCount(temp);

            temp = new IntDependency(anyHead, wildTW, dependency.leftHeaded, binDistance);
            double c_hTds        = stopCounter.GetCount(temp);
            double p_stop_hTds   = (c_hTds > 0.0 ? c_stop_hTds / c_hTds : 1.0);
            double pb_stop_hTWds = (c_stop_hTWds + smooth_stop * p_stop_hTds) / (c_hTWds + smooth_stop);

            return(pb_stop_hTWds);
        }
        /// <summary>Collect counts for a non-STOP dependent.</summary>
        /// <remarks>
        /// Collect counts for a non-STOP dependent.
        /// The dependency arg is still in the full tag space.
        /// </remarks>
        /// <param name="dependency">A non-stop dependency</param>
        /// <param name="valBinDist">A binned distance</param>
        /// <param name="count">The weight with which to add this dependency</param>
        private void ExpandArg(IntDependency dependency, short valBinDist, double count)
        {
            IntTaggedWord headT = GetCachedITW(dependency.head.tag);
            IntTaggedWord argT  = GetCachedITW(dependency.arg.tag);
            IntTaggedWord head  = new IntTaggedWord(dependency.head.word, TagBin(dependency.head.tag));
            //dependency.head;
            IntTaggedWord arg = new IntTaggedWord(dependency.arg.word, TagBin(dependency.arg.tag));
            //dependency.arg;
            bool leftHeaded = dependency.leftHeaded;

            // argCounter stores stuff in both the original and the reduced tag space???
            argCounter.IncrementCount(Intern(head, arg, leftHeaded, valBinDist), count);
            argCounter.IncrementCount(Intern(headT, arg, leftHeaded, valBinDist), count);
            argCounter.IncrementCount(Intern(head, argT, leftHeaded, valBinDist), count);
            argCounter.IncrementCount(Intern(headT, argT, leftHeaded, valBinDist), count);
            argCounter.IncrementCount(Intern(head, wildTW, leftHeaded, valBinDist), count);
            argCounter.IncrementCount(Intern(headT, wildTW, leftHeaded, valBinDist), count);
            // the WILD head stats are always directionless and not useDistance!
            argCounter.IncrementCount(Intern(wildTW, arg, false, (short)-1), count);
            argCounter.IncrementCount(Intern(wildTW, argT, false, (short)-1), count);
            if (useSmoothTagProjection)
            {
                // added stuff to do more smoothing.  CDM Jan 2007
                IntTaggedWord headP  = new IntTaggedWord(dependency.head.word, TagProject(dependency.head.tag));
                IntTaggedWord headTP = new IntTaggedWord(IntTaggedWord.AnyWordInt, TagProject(dependency.head.tag));
                IntTaggedWord argP   = new IntTaggedWord(dependency.arg.word, TagProject(dependency.arg.tag));
                IntTaggedWord argTP  = new IntTaggedWord(IntTaggedWord.AnyWordInt, TagProject(dependency.arg.tag));
                argCounter.IncrementCount(Intern(headP, argP, leftHeaded, valBinDist), count);
                argCounter.IncrementCount(Intern(headTP, argP, leftHeaded, valBinDist), count);
                argCounter.IncrementCount(Intern(headP, argTP, leftHeaded, valBinDist), count);
                argCounter.IncrementCount(Intern(headTP, argTP, leftHeaded, valBinDist), count);
                argCounter.IncrementCount(Intern(headP, wildTW, leftHeaded, valBinDist), count);
                argCounter.IncrementCount(Intern(headTP, wildTW, leftHeaded, valBinDist), count);
                // the WILD head stats are always directionless and not useDistance!
                argCounter.IncrementCount(Intern(wildTW, argP, false, (short)-1), count);
                argCounter.IncrementCount(Intern(wildTW, argTP, false, (short)-1), count);
                argCounter.IncrementCount(Intern(wildTW, new IntTaggedWord(dependency.head.word, IntTaggedWord.AnyTagInt), false, (short)-1), count);
            }
            numWordTokens++;
        }
Exemplo n.º 9
0
 public override double ScoreTB(IntDependency dependency)
 {
     //return op.testOptions.depWeight * Math.log(probSimilarWordAvg(dependency));
     return(op.testOptions.depWeight * Math.Log(ProbTBwithSimWords(dependency)));
 }
Exemplo n.º 10
0
        private double ProbSimilarWordAvg(IntDependency dep)
        {
            double regProb = ProbTB(dep);

            statsCounter.IncrementCount("total");
            IList <Triple <int, string, double> > sim2arg  = simArgMap[new Pair <int, string>(dep.arg.word, StringBasicCategory(dep.arg.tag))];
            IList <Triple <int, string, double> > sim2head = simHeadMap[new Pair <int, string>(dep.head.word, StringBasicCategory(dep.head.tag))];

            if (sim2head == null && sim2arg == null)
            {
                return(regProb);
            }
            double sumScores  = 0;
            double sumWeights = 0;

            if (sim2head == null)
            {
                statsCounter.IncrementCount("aSim");
                foreach (Triple <int, string, double> simArg in sim2arg)
                {
                    //double weight = 1 - simArg.third;
                    double weight = Math.Exp(-50 * simArg.third);
                    for (int tag = 0; tag < numT; tag++)
                    {
                        if (!StringBasicCategory(tag).Equals(simArg.second))
                        {
                            continue;
                        }
                        IntTaggedWord tempArg = new IntTaggedWord(simArg.first, tag);
                        IntDependency tempDep = new IntDependency(dep.head, tempArg, dep.leftHeaded, dep.distance);
                        double        probArg = Math.Exp(lex.Score(tempArg, 0, wordIndex.Get(tempArg.word), null));
                        if (probArg == 0.0)
                        {
                            continue;
                        }
                        sumScores  += ProbTB(tempDep) * weight / probArg;
                        sumWeights += weight;
                    }
                }
            }
            else
            {
                if (sim2arg == null)
                {
                    statsCounter.IncrementCount("hSim");
                    foreach (Triple <int, string, double> simHead in sim2head)
                    {
                        //double weight = 1 - simHead.third;
                        double weight = Math.Exp(-50 * simHead.third);
                        for (int tag = 0; tag < numT; tag++)
                        {
                            if (!StringBasicCategory(tag).Equals(simHead.second))
                            {
                                continue;
                            }
                            IntTaggedWord tempHead = new IntTaggedWord(simHead.first, tag);
                            IntDependency tempDep  = new IntDependency(tempHead, dep.arg, dep.leftHeaded, dep.distance);
                            sumScores  += ProbTB(tempDep) * weight;
                            sumWeights += weight;
                        }
                    }
                }
                else
                {
                    statsCounter.IncrementCount("hSim");
                    statsCounter.IncrementCount("aSim");
                    statsCounter.IncrementCount("aSim&hSim");
                    foreach (Triple <int, string, double> simArg in sim2arg)
                    {
                        for (int aTag = 0; aTag < numT; aTag++)
                        {
                            if (!StringBasicCategory(aTag).Equals(simArg.second))
                            {
                                continue;
                            }
                            IntTaggedWord tempArg = new IntTaggedWord(simArg.first, aTag);
                            double        probArg = Math.Exp(lex.Score(tempArg, 0, wordIndex.Get(tempArg.word), null));
                            if (probArg == 0.0)
                            {
                                continue;
                            }
                            foreach (Triple <int, string, double> simHead in sim2head)
                            {
                                for (int hTag = 0; hTag < numT; hTag++)
                                {
                                    if (!StringBasicCategory(hTag).Equals(simHead.second))
                                    {
                                        continue;
                                    }
                                    IntTaggedWord tempHead = new IntTaggedWord(simHead.first, aTag);
                                    IntDependency tempDep  = new IntDependency(tempHead, tempArg, dep.leftHeaded, dep.distance);
                                    //double weight = (1-simHead.third) * (1-simArg.third);
                                    double weight = Math.Exp(-50 * simHead.third) * Math.Exp(-50 * simArg.third);
                                    sumScores  += ProbTB(tempDep) * weight / probArg;
                                    sumWeights += weight;
                                }
                            }
                        }
                    }
                }
            }
            IntDependency temp      = new IntDependency(dep.head, wildTW, dep.leftHeaded, dep.distance);
            double        countHead = argCounter.GetCount(temp);
            double        simProb;

            if (sim2arg == null)
            {
                simProb = sumScores / sumWeights;
            }
            else
            {
                double probArg = Math.Exp(lex.Score(dep.arg, 0, wordIndex.Get(dep.arg.word), null));
                simProb = probArg * sumScores / sumWeights;
            }
            if (simProb == 0)
            {
                statsCounter.IncrementCount("simProbZero");
            }
            if (regProb == 0)
            {
                //      log.info("zero reg prob");
                statsCounter.IncrementCount("regProbZero");
            }
            double smoothProb = (countHead * regProb + simSmooth * simProb) / (countHead + simSmooth);

            if (smoothProb == 0)
            {
                //      log.info("zero smooth prob");
                statsCounter.IncrementCount("smoothProbZero");
            }
            return(smoothProb);
        }
Exemplo n.º 11
0
        /*
        ** An alternative kind of smoothing.
        ** The first one is "probSimilarWordAvg" implemented by Galen
        ** This one is trying to modify "probTB" in MLEDependencyGrammar using the simWords list we have
        ** -pichuan
        */
        private double ProbTBwithSimWords(IntDependency dependency)
        {
            bool          leftHeaded  = dependency.leftHeaded && directional;
            IntTaggedWord unknownHead = new IntTaggedWord(-1, dependency.head.tag);
            IntTaggedWord unknownArg  = new IntTaggedWord(-1, dependency.arg.tag);
            short         distance    = dependency.distance;
            // int hW = dependency.head.word;
            // int aW = dependency.arg.word;
            IntTaggedWord aTW = dependency.arg;
            // IntTaggedWord hTW = dependency.head;
            double pb_stop_hTWds = GetStopProb(dependency);
            bool   isRoot        = RootTW(dependency.head);

            if (dependency.arg.word == -2)
            {
                // did we generate stop?
                if (isRoot)
                {
                    return(0.0);
                }
                return(pb_stop_hTWds);
            }
            double pb_go_hTWds = 1.0 - pb_stop_hTWds;

            if (isRoot)
            {
                pb_go_hTWds = 1.0;
            }
            // generate the argument
            int valenceBinDistance = ValenceBin(distance);
            // KEY:
            // c_     count of
            // p_     MLE prob of
            // pb_    MAP prob of
            // a      arg
            // h      head
            // T      tag
            // W      word
            // d      direction
            // ds     distance
            IntDependency temp       = new IntDependency(dependency.head, dependency.arg, leftHeaded, valenceBinDistance);
            double        c_aTW_hTWd = argCounter.GetCount(temp);

            temp = new IntDependency(dependency.head, unknownArg, leftHeaded, valenceBinDistance);
            double c_aT_hTWd = argCounter.GetCount(temp);

            temp = new IntDependency(dependency.head, wildTW, leftHeaded, valenceBinDistance);
            double c_hTWd = argCounter.GetCount(temp);

            temp = new IntDependency(unknownHead, dependency.arg, leftHeaded, valenceBinDistance);
            double c_aTW_hTd = argCounter.GetCount(temp);

            temp = new IntDependency(unknownHead, unknownArg, leftHeaded, valenceBinDistance);
            double c_aT_hTd = argCounter.GetCount(temp);

            temp = new IntDependency(unknownHead, wildTW, leftHeaded, valenceBinDistance);
            double c_hTd = argCounter.GetCount(temp);

            temp = new IntDependency(wildTW, dependency.arg, false, -1);
            double c_aTW = argCounter.GetCount(temp);

            temp = new IntDependency(wildTW, unknownArg, false, -1);
            double c_aT = argCounter.GetCount(temp);
            // do the magic
            double p_aTW_hTd = (c_hTd > 0.0 ? c_aTW_hTd / c_hTd : 0.0);
            double p_aT_hTd  = (c_hTd > 0.0 ? c_aT_hTd / c_hTd : 0.0);
            double p_aTW_aT  = (c_aTW > 0.0 ? c_aTW / c_aT : 1.0);
            double pb_aTW_hTWd;
            // = (c_aTW_hTWd + smooth_aTW_hTWd * p_aTW_hTd) / (c_hTWd + smooth_aTW_hTWd);
            double pb_aT_hTWd = (c_aT_hTWd + smooth_aT_hTWd * p_aT_hTd) / (c_hTWd + smooth_aT_hTWd);
            double score;
            // = (interp * pb_aTW_hTWd + (1.0 - interp) * p_aTW_aT * pb_aT_hTWd) * pb_go_hTWds;
            /* smooth by simWords -pichuan */
            IList <Triple <int, string, double> > sim2arg  = simArgMap[new Pair <int, string>(dependency.arg.word, StringBasicCategory(dependency.arg.tag))];
            IList <Triple <int, string, double> > sim2head = simHeadMap[new Pair <int, string>(dependency.head.word, StringBasicCategory(dependency.head.tag))];
            IList <int> simArg  = new List <int>();
            IList <int> simHead = new List <int>();

            if (sim2arg != null)
            {
                foreach (Triple <int, string, double> t in sim2arg)
                {
                    simArg.Add(t.first);
                }
            }
            if (sim2head != null)
            {
                foreach (Triple <int, string, double> t in sim2head)
                {
                    simHead.Add(t.first);
                }
            }
            double cSim_aTW_hTd = 0;
            double cSim_hTd     = 0;

            foreach (int h in simHead)
            {
                IntTaggedWord hWord = new IntTaggedWord(h, dependency.head.tag);
                temp          = new IntDependency(hWord, dependency.arg, dependency.leftHeaded, dependency.distance);
                cSim_aTW_hTd += argCounter.GetCount(temp);
                temp          = new IntDependency(hWord, wildTW, dependency.leftHeaded, dependency.distance);
                cSim_hTd     += argCounter.GetCount(temp);
            }
            double pSim_aTW_hTd = (cSim_hTd > 0.0 ? cSim_aTW_hTd / cSim_hTd : 0.0);

            // P(Wa,Ta|Th)
            //if (simHead.size() > 0 && cSim_hTd == 0.0) {
            if (pSim_aTW_hTd > 0.0)
            {
                //System.out.println("# simHead("+dependency.head.word+"-"+wordNumberer.object(dependency.head.word)+") =\t"+cSim_hTd);
                System.Console.Out.WriteLine(dependency + "\t" + pSim_aTW_hTd);
            }
            //System.out.println(wordNumberer);
            //pb_aTW_hTWd = (c_aTW_hTWd + smooth_aTW_hTWd * pSim_aTW_hTd + smooth_aTW_hTWd * p_aTW_hTd) / (c_hTWd + smooth_aTW_hTWd + smooth_aTW_hTWd);
            //if (pSim_aTW_hTd > 0.0) {
            double smoothSim_aTW_hTWd = 17.7;
            double smooth_aTW_hTWd    = 17.7 * 2;

            //smooth_aTW_hTWd = smooth_aTW_hTWd*2;
            pb_aTW_hTWd = (c_aTW_hTWd + smoothSim_aTW_hTWd * pSim_aTW_hTd + smooth_aTW_hTWd * p_aTW_hTd) / (c_hTWd + smoothSim_aTW_hTWd + smooth_aTW_hTWd);
            System.Console.Out.WriteLine(dependency);
            System.Console.Out.WriteLine(c_aTW_hTWd + " + " + smoothSim_aTW_hTWd + " * " + pSim_aTW_hTd + " + " + smooth_aTW_hTWd + " * " + p_aTW_hTd);
            System.Console.Out.WriteLine("--------------------------------  = " + pb_aTW_hTWd);
            System.Console.Out.WriteLine(c_hTWd + " + " + smoothSim_aTW_hTWd + " + " + smooth_aTW_hTWd);
            System.Console.Out.WriteLine();
            //}
            //pb_aT_hTWd = (c_aT_hTWd + smooth_aT_hTWd * p_aT_hTd) / (c_hTWd + smooth_aT_hTWd);
            score = (interp * pb_aTW_hTWd + (1.0 - interp) * p_aTW_aT * pb_aT_hTWd) * pb_go_hTWds;
            if (op.testOptions.prunePunc && PruneTW(aTW))
            {
                return(1.0);
            }
            if (double.IsNaN(score))
            {
                score = 0.0;
            }
            //if (op.testOptions.rightBonus && ! dependency.leftHeaded)
            //  score -= 0.2;
            if (score < MinProbability)
            {
                score = 0.0;
            }
            return(score);
        }
Exemplo n.º 12
0
 public abstract double ScoreTB(IntDependency arg1);
Exemplo n.º 13
0
        // this method tag bins
        public virtual double ScoreTB(int headWord, int headTag, int argWord, int argTag, bool leftHeaded, int dist)
        {
            IntDependency tempDependency = new IntDependency(headWord, headTag, argWord, argTag, leftHeaded, dist);

            return(ScoreTB(tempDependency));
        }
Exemplo n.º 14
0
 public virtual double Score(IntDependency dependency)
 {
     return(ScoreTB(dependency.head.word, TagBin(dependency.head.tag), dependency.arg.word, TagBin(dependency.arg.tag), dependency.leftHeaded, dependency.distance));
 }
        public virtual double CountHistory(IntDependency dependency)
        {
            IntDependency temp = new IntDependency(dependency.head.word, TagBin(dependency.head.tag), wildTW.word, wildTW.tag, dependency.leftHeaded, ValenceBin(dependency.distance));

            return(argCounter.GetCount(temp));
        }
        /// <summary>
        /// Tune the smoothing and interpolation parameters of the dependency
        /// grammar based on a tuning treebank.
        /// </summary>
        /// <param name="trees">A Collection of Trees for setting parameters</param>
        public override void Tune(ICollection <Tree> trees)
        {
            IList <IntDependency> deps = new List <IntDependency>();

            foreach (Tree tree in trees)
            {
                Sharpen.Collections.AddAll(deps, TreeToDependencyList(tree, wordIndex, tagIndex));
            }
            double bestScore           = double.NegativeInfinity;
            double bestSmooth_stop     = 0.0;
            double bestSmooth_aTW_hTWd = 0.0;
            double bestSmooth_aT_hTWd  = 0.0;
            double bestInterp          = 0.0;

            log.Info("Tuning smooth_stop...");
            for (smooth_stop = 1.0 / 100.0; smooth_stop < 100.0; smooth_stop *= 1.25)
            {
                double totalScore = 0.0;
                foreach (IntDependency dep in deps)
                {
                    if (!RootTW(dep.head))
                    {
                        double stopProb = GetStopProb(dep);
                        if (!dep.arg.Equals(stopTW))
                        {
                            stopProb = 1.0 - stopProb;
                        }
                        if (stopProb > 0.0)
                        {
                            totalScore += Math.Log(stopProb);
                        }
                    }
                }
                if (totalScore > bestScore)
                {
                    bestScore       = totalScore;
                    bestSmooth_stop = smooth_stop;
                }
            }
            smooth_stop = bestSmooth_stop;
            log.Info("Tuning selected smooth_stop: " + smooth_stop);
            for (IEnumerator <IntDependency> iter = deps.GetEnumerator(); iter.MoveNext();)
            {
                IntDependency dep = iter.Current;
                if (dep.arg.Equals(stopTW))
                {
                    iter.Remove();
                }
            }
            log.Info("Tuning other parameters...");
            if (!useSmoothTagProjection)
            {
                bestScore = double.NegativeInfinity;
                for (smooth_aTW_hTWd = 0.5; smooth_aTW_hTWd < 100.0; smooth_aTW_hTWd *= 1.25)
                {
                    log.Info(".");
                    for (smooth_aT_hTWd = 0.5; smooth_aT_hTWd < 100.0; smooth_aT_hTWd *= 1.25)
                    {
                        for (interp = 0.02; interp < 1.0; interp += 0.02)
                        {
                            double totalScore = 0.0;
                            foreach (IntDependency dep in deps)
                            {
                                double score = Score(dep);
                                if (score > double.NegativeInfinity)
                                {
                                    totalScore += score;
                                }
                            }
                            if (totalScore > bestScore)
                            {
                                bestScore           = totalScore;
                                bestInterp          = interp;
                                bestSmooth_aTW_hTWd = smooth_aTW_hTWd;
                                bestSmooth_aT_hTWd  = smooth_aT_hTWd;
                                log.Info("Current best interp: " + interp + " with score " + totalScore);
                            }
                        }
                    }
                }
                smooth_aTW_hTWd = bestSmooth_aTW_hTWd;
                smooth_aT_hTWd  = bestSmooth_aT_hTWd;
                interp          = bestInterp;
            }
            else
            {
                // for useSmoothTagProjection
                double bestSmooth_aTW_aT  = 0.0;
                double bestSmooth_aTW_hTd = 0.0;
                double bestSmooth_aT_hTd  = 0.0;
                bestScore = double.NegativeInfinity;
                for (smooth_aTW_hTWd = 1.125; smooth_aTW_hTWd < 100.0; smooth_aTW_hTWd *= 1.5)
                {
                    log.Info("#");
                    for (smooth_aT_hTWd = 1.125; smooth_aT_hTWd < 100.0; smooth_aT_hTWd *= 1.5)
                    {
                        log.Info(":");
                        for (smooth_aTW_aT = 1.125; smooth_aTW_aT < 200.0; smooth_aTW_aT *= 1.5)
                        {
                            log.Info(".");
                            for (smooth_aTW_hTd = 1.125; smooth_aTW_hTd < 100.0; smooth_aTW_hTd *= 1.5)
                            {
                                for (smooth_aT_hTd = 1.125; smooth_aT_hTd < 100.0; smooth_aT_hTd *= 1.5)
                                {
                                    for (interp = 0.2; interp <= 0.8; interp += 0.02)
                                    {
                                        double totalScore = 0.0;
                                        foreach (IntDependency dep in deps)
                                        {
                                            double score = Score(dep);
                                            if (score > double.NegativeInfinity)
                                            {
                                                totalScore += score;
                                            }
                                        }
                                        if (totalScore > bestScore)
                                        {
                                            bestScore           = totalScore;
                                            bestInterp          = interp;
                                            bestSmooth_aTW_hTWd = smooth_aTW_hTWd;
                                            bestSmooth_aT_hTWd  = smooth_aT_hTWd;
                                            bestSmooth_aTW_aT   = smooth_aTW_aT;
                                            bestSmooth_aTW_hTd  = smooth_aTW_hTd;
                                            bestSmooth_aT_hTd   = smooth_aT_hTd;
                                            log.Info("Current best interp: " + interp + " with score " + totalScore);
                                        }
                                    }
                                }
                            }
                        }
                    }
                    log.Info();
                }
                smooth_aTW_hTWd = bestSmooth_aTW_hTWd;
                smooth_aT_hTWd  = bestSmooth_aT_hTWd;
                smooth_aTW_aT   = bestSmooth_aTW_aT;
                smooth_aTW_hTd  = bestSmooth_aTW_hTd;
                smooth_aT_hTd   = bestSmooth_aT_hTd;
                interp          = bestInterp;
            }
            log.Info("\nTuning selected smooth_aTW_hTWd: " + smooth_aTW_hTWd + " smooth_aT_hTWd: " + smooth_aT_hTWd + " interp: " + interp + " smooth_aTW_aT: " + smooth_aTW_aT + " smooth_aTW_hTd: " + smooth_aTW_hTd + " smooth_aT_hTd: " + smooth_aT_hTd);
        }
        /// <summary>
        /// Calculate the probability of a dependency as a real probability between
        /// 0 and 1 inclusive.
        /// </summary>
        /// <param name="dependency">
        /// The dependency for which the probability is to be
        /// calculated.   The tags in this dependency are in the reduced
        /// TagProjection space.
        /// </param>
        /// <returns>The probability of the dependency</returns>
        protected internal virtual double ProbTB(IntDependency dependency)
        {
            // System.out.println("tagIndex: " + tagIndex);
            bool          leftHeaded = dependency.leftHeaded && directional;
            int           hW         = dependency.head.word;
            int           aW         = dependency.arg.word;
            short         hT         = dependency.head.tag;
            short         aT         = dependency.arg.tag;
            IntTaggedWord aTW        = dependency.arg;
            IntTaggedWord hTW        = dependency.head;
            bool          isRoot     = RootTW(dependency.head);
            double        pb_stop_hTWds;

            if (isRoot)
            {
                pb_stop_hTWds = 0.0;
            }
            else
            {
                pb_stop_hTWds = GetStopProb(dependency);
            }
            if (dependency.arg.word == IntTaggedWord.StopWordInt)
            {
                // did we generate stop?
                return(pb_stop_hTWds);
            }
            double pb_go_hTWds = 1.0 - pb_stop_hTWds;
            // generate the argument
            short binDistance = ValenceBin(dependency.distance);
            // KEY:
            // c_     count of (read as joint count of first and second)
            // p_     MLE prob of (or MAP if useSmoothTagProjection)
            // pb_    MAP prob of (read as prob of first given second thing)
            // a      arg
            // h      head
            // T      tag
            // PT     projected tag
            // W      word
            // d      direction
            // ds     distance (implicit: there when direction is mentioned!)
            IntTaggedWord anyHead    = new IntTaggedWord(IntTaggedWord.AnyWordInt, dependency.head.tag);
            IntTaggedWord anyArg     = new IntTaggedWord(IntTaggedWord.AnyWordInt, dependency.arg.tag);
            IntTaggedWord anyTagArg  = new IntTaggedWord(dependency.arg.word, IntTaggedWord.AnyTagInt);
            IntDependency temp       = new IntDependency(dependency.head, dependency.arg, leftHeaded, binDistance);
            double        c_aTW_hTWd = argCounter.GetCount(temp);

            temp = new IntDependency(dependency.head, anyArg, leftHeaded, binDistance);
            double c_aT_hTWd = argCounter.GetCount(temp);

            temp = new IntDependency(dependency.head, wildTW, leftHeaded, binDistance);
            double c_hTWd = argCounter.GetCount(temp);

            temp = new IntDependency(anyHead, dependency.arg, leftHeaded, binDistance);
            double c_aTW_hTd = argCounter.GetCount(temp);

            temp = new IntDependency(anyHead, anyArg, leftHeaded, binDistance);
            double c_aT_hTd = argCounter.GetCount(temp);

            temp = new IntDependency(anyHead, wildTW, leftHeaded, binDistance);
            double c_hTd = argCounter.GetCount(temp);
            // for smooth tag projection
            short  aPT         = short.MinValue;
            double c_aPTW_hPTd = double.NaN;
            double c_aPT_hPTd  = double.NaN;
            double c_hPTd      = double.NaN;
            double c_aPTW_aPT  = double.NaN;
            double c_aPT       = double.NaN;

            if (useSmoothTagProjection)
            {
                aPT = TagProject(dependency.arg.tag);
                short         hPT              = TagProject(dependency.head.tag);
                IntTaggedWord projectedArg     = new IntTaggedWord(dependency.arg.word, aPT);
                IntTaggedWord projectedAnyHead = new IntTaggedWord(IntTaggedWord.AnyWordInt, hPT);
                IntTaggedWord projectedAnyArg  = new IntTaggedWord(IntTaggedWord.AnyWordInt, aPT);
                temp        = new IntDependency(projectedAnyHead, projectedArg, leftHeaded, binDistance);
                c_aPTW_hPTd = argCounter.GetCount(temp);
                temp        = new IntDependency(projectedAnyHead, projectedAnyArg, leftHeaded, binDistance);
                c_aPT_hPTd  = argCounter.GetCount(temp);
                temp        = new IntDependency(projectedAnyHead, wildTW, leftHeaded, binDistance);
                c_hPTd      = argCounter.GetCount(temp);
                temp        = new IntDependency(wildTW, projectedArg, false, IntDependency.AnyDistanceInt);
                c_aPTW_aPT  = argCounter.GetCount(temp);
                temp        = new IntDependency(wildTW, projectedAnyArg, false, IntDependency.AnyDistanceInt);
                c_aPT       = argCounter.GetCount(temp);
            }
            // wild head is always directionless and no use distance
            temp = new IntDependency(wildTW, dependency.arg, false, IntDependency.AnyDistanceInt);
            double c_aTW = argCounter.GetCount(temp);

            temp = new IntDependency(wildTW, anyArg, false, IntDependency.AnyDistanceInt);
            double c_aT = argCounter.GetCount(temp);

            temp = new IntDependency(wildTW, anyTagArg, false, IntDependency.AnyDistanceInt);
            double c_aW = argCounter.GetCount(temp);
            // do the Bayesian magic
            // MLE probs
            double p_aTW_hTd;
            double p_aT_hTd;
            double p_aTW_aT;
            double p_aW;
            double p_aPTW_aPT;
            double p_aPTW_hPTd;
            double p_aPT_hPTd;

            // backoffs either mle or themselves bayesian smoothed depending on useSmoothTagProjection
            if (useSmoothTagProjection)
            {
                if (useUnigramWordSmoothing)
                {
                    p_aW = c_aW > 0.0 ? (c_aW / numWordTokens) : 1.0;
                    // NEED this 1.0 for unknown words!!!
                    p_aPTW_aPT = (c_aPTW_aPT + smooth_aPTW_aPT * p_aW) / (c_aPT + smooth_aPTW_aPT);
                }
                else
                {
                    p_aPTW_aPT = c_aPTW_aPT > 0.0 ? (c_aPTW_aPT / c_aPT) : 1.0;
                }
                // NEED this 1.0 for unknown words!!!
                p_aTW_aT    = (c_aTW + smooth_aTW_aT * p_aPTW_aPT) / (c_aT + smooth_aTW_aT);
                p_aPTW_hPTd = c_hPTd > 0.0 ? (c_aPTW_hPTd / c_hPTd) : 0.0;
                p_aTW_hTd   = (c_aTW_hTd + smooth_aTW_hTd * p_aPTW_hPTd) / (c_hTd + smooth_aTW_hTd);
                p_aPT_hPTd  = c_hPTd > 0.0 ? (c_aPT_hPTd / c_hPTd) : 0.0;
                p_aT_hTd    = (c_aT_hTd + smooth_aT_hTd * p_aPT_hPTd) / (c_hTd + smooth_aT_hTd);
            }
            else
            {
                // here word generation isn't smoothed - can't get previously unseen word with tag.  Ugh.
                if (op.testOptions.useLexiconToScoreDependencyPwGt)
                {
                    // We don't know the position.  Now -1 means average over 0 and 1.
                    p_aTW_aT = dependency.leftHeaded ? Math.Exp(lex.Score(dependency.arg, 1, wordIndex.Get(dependency.arg.word), null)) : Math.Exp(lex.Score(dependency.arg, -1, wordIndex.Get(dependency.arg.word), null));
                }
                else
                {
                    // double oldScore = c_aTW > 0.0 ? (c_aTW / c_aT) : 1.0;
                    // if (oldScore == 1.0) {
                    //  log.info("#### arg=" + dependency.arg + " score=" + p_aTW_aT +
                    //                      " oldScore=" + oldScore + " c_aTW=" + c_aTW + " c_aW=" + c_aW);
                    // }
                    p_aTW_aT = c_aTW > 0.0 ? (c_aTW / c_aT) : 1.0;
                }
                p_aTW_hTd = c_hTd > 0.0 ? (c_aTW_hTd / c_hTd) : 0.0;
                p_aT_hTd  = c_hTd > 0.0 ? (c_aT_hTd / c_hTd) : 0.0;
            }
            double pb_aTW_hTWd = (c_aTW_hTWd + smooth_aTW_hTWd * p_aTW_hTd) / (c_hTWd + smooth_aTW_hTWd);
            double pb_aT_hTWd  = (c_aT_hTWd + smooth_aT_hTWd * p_aT_hTd) / (c_hTWd + smooth_aT_hTWd);
            double score       = (interp * pb_aTW_hTWd + (1.0 - interp) * p_aTW_aT * pb_aT_hTWd) * pb_go_hTWds;

            if (op.testOptions.prunePunc && PruneTW(aTW))
            {
                return(1.0);
            }
            if (double.IsNaN(score))
            {
                score = 0.0;
            }
            //if (op.testOptions.rightBonus && ! dependency.leftHeaded)
            //  score -= 0.2;
            if (score < MinProbability)
            {
                score = 0.0;
            }
            return(score);
        }
 /// <summary>Score a tag binned dependency.</summary>
 public override double ScoreTB(IntDependency dependency)
 {
     return(op.testOptions.depWeight * Math.Log(ProbTB(dependency)));
 }