コード例 #1
0
ファイル: MaximumEntropyParser.cs プロジェクト: shuk/OpenNlp
        // Methods ------------------------------

        /// <summary>
        /// Returns a parse for the specified parse of tokens.
        /// </summary>
        /// <param name="flatParse">
        /// A flat parse containing only tokens and a root node, p.
        /// </param>
        /// <param name="parseCount">
        /// the number of parses required
        /// </param>
        /// <returns>
        /// A full parse of the specified tokens or the flat chunks of the tokens if a full parse could not be found.
        /// </returns>
        public virtual Parse[] FullParse(Parse flatParse, int parseCount)
        {
            if (CreateDerivationString)
            {
                flatParse.InitializeDerivationBuffer();
            }

            var oldDerivationsHeap = new Util.SortedSet <Parse>();
            var parses             = new Util.SortedSet <Parse>();

            int derivationLength    = 0;
            int maxDerivationLength = 2 * flatParse.ChildCount + 3;

            oldDerivationsHeap.Add(flatParse);
            Parse  guessParse   = null;
            double bestComplete = -100000;              //approximating -infinity/0 in ln domain

            var buildProbabilities = new double[this.buildModel.OutcomeCount];
            var checkProbabilities = new double[this.checkModel.OutcomeCount];

            while (parses.Count < m && derivationLength < maxDerivationLength)
            {
                var newDerivationsHeap = new Util.TreeSet <Parse>();
                if (oldDerivationsHeap.Count > 0)
                {
                    int derivationsProcessed = 0;

                    foreach (Parse currentParse in oldDerivationsHeap)
                    {
                        derivationsProcessed++;
                        if (derivationsProcessed >= k)
                        {
                            break;
                        }

                        // for each derivation
                        //Parse currentParse = (Parse) pi.Current;
                        if (currentParse.Probability < bestComplete)                          //this parse and the ones which follow will never win, stop advancing.
                        {
                            break;
                        }
                        if (guessParse == null && derivationLength == 2)
                        {
                            guessParse = currentParse;
                        }

                        Parse[] newDerivations = null;
                        if (0 == derivationLength)
                        {
                            newDerivations = AdvanceTags(currentParse);
                        }
                        else if (1 == derivationLength)
                        {
                            if (newDerivationsHeap.Count < k)
                            {
                                newDerivations = AdvanceChunks(currentParse, bestComplete);
                            }
                            else
                            {
                                newDerivations = AdvanceChunks(currentParse, newDerivationsHeap.Last().Probability);
                            }
                        }
                        else
                        {                         // derivationLength > 1
                            newDerivations = AdvanceParses(currentParse, q, buildProbabilities, checkProbabilities);
                        }

                        if (newDerivations != null)
                        {
                            for (int currentDerivation = 0, derivationCount = newDerivations.Length; currentDerivation < derivationCount; currentDerivation++)
                            {
                                if (newDerivations[currentDerivation].IsComplete)
                                {
                                    AdvanceTop(newDerivations[currentDerivation], buildProbabilities, checkProbabilities);
                                    if (newDerivations[currentDerivation].Probability > bestComplete)
                                    {
                                        bestComplete = newDerivations[currentDerivation].Probability;
                                    }
                                    parses.Add(newDerivations[currentDerivation]);
                                }
                                else
                                {
                                    newDerivationsHeap.Add(newDerivations[currentDerivation]);
                                }
                            }
                            //RN added sort
                            newDerivationsHeap.Sort();
                        }
                        else
                        {
                            //Console.Error.WriteLine("Couldn't advance parse " + derivationLength + " stage " + derivationsProcessed + "!\n");
                        }
                    }
                    derivationLength++;
                    oldDerivationsHeap = newDerivationsHeap;
                }
                else
                {
                    break;
                }
            }

            //RN added sort
            parses.Sort();

            if (parses.Count == 0)
            {
                //Console.Error.WriteLine("Couldn't find parse for: " + flatParse);
                //oFullParse = (Parse) mOldDerivationsHeap.First();
                return(new Parse[] { guessParse });
            }
            else if (parseCount == 1)
            {
                //RN added parent adjustment
                Parse topParse = parses.First();
                topParse.UpdateChildParents();
                return(new Parse[] { topParse });
            }
            else
            {
                var topParses = new List <Parse>(parseCount);
                while (!parses.IsEmpty() && topParses.Count < parseCount)
                {
                    Parse topParse = parses.First();
                    //RN added parent adjustment
                    topParse.UpdateChildParents();
                    topParses.Add(topParse);
                    parses.Remove(topParse);
                }
                return(topParses.ToArray());
            }
        }
コード例 #2
0
        /// <summary>
        /// Returns a parse for the specified parse of tokens.
        /// </summary>
        /// <param name="flatParse">
        /// A flat parse containing only tokens and a root node, p.
        /// </param>
        /// <param name="parseCount">
        /// the number of parses required
        /// </param>
        /// <returns>
        /// A full parse of the specified tokens or the flat chunks of the tokens if a full parse could not be found.
        /// </returns>
        public virtual Parse[] FullParse(Parse flatParse, int parseCount)
        {
            if (CreateDerivationString)
            {
                flatParse.InitializeDerivationBuffer();
            }
            mOldDerivationsHeap.Clear();
            mNewDerivationsHeap.Clear();
            mParses.Clear();
            int derivationLength    = 0;
            int maxDerivationLength = 2 * flatParse.ChildCount + 3;

            mOldDerivationsHeap.Add(flatParse);
            Parse  guessParse   = null;
            double bestComplete = -100000;              //approximating -infinity/0 in ln domain

            while (mParses.Count < M && derivationLength < maxDerivationLength)
            {
                mNewDerivationsHeap = new Util.TreeSet <Parse>();
                if (mOldDerivationsHeap.Count > 0)
                {
                    int derivationsProcessed = 0;

                    foreach (Parse currentParse in mOldDerivationsHeap)
                    //for (System.Collections.IEnumerator pi = mOldDerivationsHeap.GetEnumerator(); pi.MoveNext() && derivationsProcessed < K; derivationsProcessed++)
                    {
                        derivationsProcessed++;
                        if (derivationsProcessed >= K)
                        {
                            break;
                        }

                        // for each derivation
                        //Parse currentParse = (Parse) pi.Current;
                        if (currentParse.Probability < bestComplete)                          //this parse and the ones which follow will never win, stop advancing.
                        {
                            break;
                        }
                        if (guessParse == null && derivationLength == 2)
                        {
                            guessParse = currentParse;
                        }

                        //System.Console.Out.Write(derivationLength + " " + derivationsProcessed + " "+currentParse.Probability);
                        //System.Console.Out.Write(currentParse.Show());
                        //System.Console.Out.WriteLine();

                        Parse[] newDerivations = null;
                        if (0 == derivationLength)
                        {
                            newDerivations = AdvanceTags(currentParse);
                        }
                        else if (1 == derivationLength)
                        {
                            if (mNewDerivationsHeap.Count < K)
                            {
                                newDerivations = AdvanceChunks(currentParse, bestComplete);
                            }
                            else
                            {
                                newDerivations = AdvanceChunks(currentParse, ((Parse)mNewDerivationsHeap.Last()).Probability);
                            }
                        }
                        else
                        {                         // derivationLength > 1
                            newDerivations = AdvanceParses(currentParse, Q);
                        }

                        if (newDerivations != null)
                        {
                            for (int currentDerivation = 0, derivationCount = newDerivations.Length; currentDerivation < derivationCount; currentDerivation++)
                            {
                                if (newDerivations[currentDerivation].IsComplete)
                                {
                                    AdvanceTop(newDerivations[currentDerivation]);
                                    if (newDerivations[currentDerivation].Probability > bestComplete)
                                    {
                                        bestComplete = newDerivations[currentDerivation].Probability;
                                    }
                                    mParses.Add(newDerivations[currentDerivation]);
                                }
                                else
                                {
                                    mNewDerivationsHeap.Add(newDerivations[currentDerivation]);
                                }
                            }
                            //RN added sort
                            mNewDerivationsHeap.Sort();
                        }
                        else
                        {
                            //Console.Error.WriteLine("Couldn't advance parse " + derivationLength + " stage " + derivationsProcessed + "!\n");
                        }
                    }
                    derivationLength++;
                    mOldDerivationsHeap = mNewDerivationsHeap;
                }
                else
                {
                    break;
                }
            }

            //RN added sort
            mParses.Sort();

            if (mParses.Count == 0)
            {
                //Console.Error.WriteLine("Couldn't find parse for: " + flatParse);
                //oFullParse = (Parse) mOldDerivationsHeap.First();
                return(new Parse[] { guessParse });
            }
            else if (parseCount == 1)
            {
                //RN added parent adjustment
                Parse topParse = mParses.First();
                topParse.UpdateChildParents();
                return(new Parse[] { topParse });
            }
            else
            {
                var topParses = new List <Parse>(parseCount);
                while (!mParses.IsEmpty() && topParses.Count < parseCount)
                {
                    Parse topParse = mParses.First();
                    //RN added parent adjustment
                    topParse.UpdateChildParents();
                    topParses.Add(topParse);
                    mParses.Remove(topParse);
                }
                return(topParses.ToArray());
            }
        }