// Methods ------------------------------ /// <summary> /// Returns a parse for the specified parse of tokens. /// </summary> /// <param name="flatParse"> /// A flat parse containing only tokens and a root node, p. /// </param> /// <param name="parseCount"> /// the number of parses required /// </param> /// <returns> /// A full parse of the specified tokens or the flat chunks of the tokens if a full parse could not be found. /// </returns> public virtual Parse[] FullParse(Parse flatParse, int parseCount) { if (CreateDerivationString) { flatParse.InitializeDerivationBuffer(); } var oldDerivationsHeap = new Util.SortedSet <Parse>(); var parses = new Util.SortedSet <Parse>(); int derivationLength = 0; int maxDerivationLength = 2 * flatParse.ChildCount + 3; oldDerivationsHeap.Add(flatParse); Parse guessParse = null; double bestComplete = -100000; //approximating -infinity/0 in ln domain var buildProbabilities = new double[this.buildModel.OutcomeCount]; var checkProbabilities = new double[this.checkModel.OutcomeCount]; while (parses.Count < m && derivationLength < maxDerivationLength) { var newDerivationsHeap = new Util.TreeSet <Parse>(); if (oldDerivationsHeap.Count > 0) { int derivationsProcessed = 0; foreach (Parse currentParse in oldDerivationsHeap) { derivationsProcessed++; if (derivationsProcessed >= k) { break; } // for each derivation //Parse currentParse = (Parse) pi.Current; if (currentParse.Probability < bestComplete) //this parse and the ones which follow will never win, stop advancing. { break; } if (guessParse == null && derivationLength == 2) { guessParse = currentParse; } Parse[] newDerivations = null; if (0 == derivationLength) { newDerivations = AdvanceTags(currentParse); } else if (1 == derivationLength) { if (newDerivationsHeap.Count < k) { newDerivations = AdvanceChunks(currentParse, bestComplete); } else { newDerivations = AdvanceChunks(currentParse, newDerivationsHeap.Last().Probability); } } else { // derivationLength > 1 newDerivations = AdvanceParses(currentParse, q, buildProbabilities, checkProbabilities); } if (newDerivations != null) { for (int currentDerivation = 0, derivationCount = newDerivations.Length; currentDerivation < derivationCount; currentDerivation++) { if (newDerivations[currentDerivation].IsComplete) { AdvanceTop(newDerivations[currentDerivation], buildProbabilities, checkProbabilities); if (newDerivations[currentDerivation].Probability > bestComplete) { bestComplete = newDerivations[currentDerivation].Probability; } parses.Add(newDerivations[currentDerivation]); } else { newDerivationsHeap.Add(newDerivations[currentDerivation]); } } //RN added sort newDerivationsHeap.Sort(); } else { //Console.Error.WriteLine("Couldn't advance parse " + derivationLength + " stage " + derivationsProcessed + "!\n"); } } derivationLength++; oldDerivationsHeap = newDerivationsHeap; } else { break; } } //RN added sort parses.Sort(); if (parses.Count == 0) { //Console.Error.WriteLine("Couldn't find parse for: " + flatParse); //oFullParse = (Parse) mOldDerivationsHeap.First(); return(new Parse[] { guessParse }); } else if (parseCount == 1) { //RN added parent adjustment Parse topParse = parses.First(); topParse.UpdateChildParents(); return(new Parse[] { topParse }); } else { var topParses = new List <Parse>(parseCount); while (!parses.IsEmpty() && topParses.Count < parseCount) { Parse topParse = parses.First(); //RN added parent adjustment topParse.UpdateChildParents(); topParses.Add(topParse); parses.Remove(topParse); } return(topParses.ToArray()); } }
/// <summary> /// Returns a parse for the specified parse of tokens. /// </summary> /// <param name="flatParse"> /// A flat parse containing only tokens and a root node, p. /// </param> /// <param name="parseCount"> /// the number of parses required /// </param> /// <returns> /// A full parse of the specified tokens or the flat chunks of the tokens if a full parse could not be found. /// </returns> public virtual Parse[] FullParse(Parse flatParse, int parseCount) { if (CreateDerivationString) { flatParse.InitializeDerivationBuffer(); } mOldDerivationsHeap.Clear(); mNewDerivationsHeap.Clear(); mParses.Clear(); int derivationLength = 0; int maxDerivationLength = 2 * flatParse.ChildCount + 3; mOldDerivationsHeap.Add(flatParse); Parse guessParse = null; double bestComplete = -100000; //approximating -infinity/0 in ln domain while (mParses.Count < M && derivationLength < maxDerivationLength) { mNewDerivationsHeap = new Util.TreeSet <Parse>(); if (mOldDerivationsHeap.Count > 0) { int derivationsProcessed = 0; foreach (Parse currentParse in mOldDerivationsHeap) //for (System.Collections.IEnumerator pi = mOldDerivationsHeap.GetEnumerator(); pi.MoveNext() && derivationsProcessed < K; derivationsProcessed++) { derivationsProcessed++; if (derivationsProcessed >= K) { break; } // for each derivation //Parse currentParse = (Parse) pi.Current; if (currentParse.Probability < bestComplete) //this parse and the ones which follow will never win, stop advancing. { break; } if (guessParse == null && derivationLength == 2) { guessParse = currentParse; } //System.Console.Out.Write(derivationLength + " " + derivationsProcessed + " "+currentParse.Probability); //System.Console.Out.Write(currentParse.Show()); //System.Console.Out.WriteLine(); Parse[] newDerivations = null; if (0 == derivationLength) { newDerivations = AdvanceTags(currentParse); } else if (1 == derivationLength) { if (mNewDerivationsHeap.Count < K) { newDerivations = AdvanceChunks(currentParse, bestComplete); } else { newDerivations = AdvanceChunks(currentParse, ((Parse)mNewDerivationsHeap.Last()).Probability); } } else { // derivationLength > 1 newDerivations = AdvanceParses(currentParse, Q); } if (newDerivations != null) { for (int currentDerivation = 0, derivationCount = newDerivations.Length; currentDerivation < derivationCount; currentDerivation++) { if (newDerivations[currentDerivation].IsComplete) { AdvanceTop(newDerivations[currentDerivation]); if (newDerivations[currentDerivation].Probability > bestComplete) { bestComplete = newDerivations[currentDerivation].Probability; } mParses.Add(newDerivations[currentDerivation]); } else { mNewDerivationsHeap.Add(newDerivations[currentDerivation]); } } //RN added sort mNewDerivationsHeap.Sort(); } else { //Console.Error.WriteLine("Couldn't advance parse " + derivationLength + " stage " + derivationsProcessed + "!\n"); } } derivationLength++; mOldDerivationsHeap = mNewDerivationsHeap; } else { break; } } //RN added sort mParses.Sort(); if (mParses.Count == 0) { //Console.Error.WriteLine("Couldn't find parse for: " + flatParse); //oFullParse = (Parse) mOldDerivationsHeap.First(); return(new Parse[] { guessParse }); } else if (parseCount == 1) { //RN added parent adjustment Parse topParse = mParses.First(); topParse.UpdateChildParents(); return(new Parse[] { topParse }); } else { var topParses = new List <Parse>(parseCount); while (!mParses.IsEmpty() && topParses.Count < parseCount) { Parse topParse = mParses.First(); //RN added parent adjustment topParse.UpdateChildParents(); topParses.Add(topParse); mParses.Remove(topParse); } return(topParses.ToArray()); } }