///<summary> ///Advances the specified parse and returns the an array advanced parses whose probability accounts for ///more than the speicficed amount of probability mass, Q. ///</summary> ///<param name="inputParse"> ///The parse to advance. ///</param> ///<param name="Q"> ///The amount of probability mass that should be accounted for by the advanced parses. ///</param> private Parse[] AdvanceParses(Parse inputParse, double Q) { double q = 1 - Q; Parse lastStartNode = null; // The closest previous node which has been labeled as a start node. int lastStartIndex = -1; // The index of the closest previous node which has been labeled as a start node. string lastStartType = null; // The type of the closest previous node which has been labeled as a start node. int advanceNodeIndex; // The index of the node which will be labeled in this iteration of advancing the parse. Parse advanceNode = null; // The node which will be labeled in this iteration of advancing the parse. Parse[] children = inputParse.GetChildren(); int nodeCount = children.Length; //determines which node needs to be labeled and prior labels. for (advanceNodeIndex = 0; advanceNodeIndex < nodeCount; advanceNodeIndex++) { advanceNode = children[advanceNodeIndex]; if (advanceNode.Label == null) { break; } else if (mStartTypeMap.ContainsKey(advanceNode.Label)) { lastStartType = mStartTypeMap[advanceNode.Label]; lastStartNode = advanceNode; lastStartIndex = advanceNodeIndex; //System.Console.Error.WriteLine("lastStart " + lastStartIndex + " " + lastStartNode.Label + " " + lastStartNode.Probability); } } List <Parse> newParsesList = new List <Parse>(mBuildModel.OutcomeCount); //call build mBuildModel.Evaluate(mBuildContextGenerator.GetContext(children, advanceNodeIndex), mBuildProbabilities); double buildProbabilitiesSum = 0; while (buildProbabilitiesSum < Q) { // The largest unadvanced labeling. int highestBuildProbabilityIndex = 0; for (int probabilityIndex = 1; probabilityIndex < mBuildProbabilities.Length; probabilityIndex++) { //for each build outcome if (mBuildProbabilities[probabilityIndex] > mBuildProbabilities[highestBuildProbabilityIndex]) { highestBuildProbabilityIndex = probabilityIndex; } } if (mBuildProbabilities[highestBuildProbabilityIndex] == 0) { break; } double highestBuildProbability = mBuildProbabilities[highestBuildProbabilityIndex]; mBuildProbabilities[highestBuildProbabilityIndex] = 0; //zero out so new max can be found buildProbabilitiesSum += highestBuildProbability; string tag = mBuildModel.GetOutcomeName(highestBuildProbabilityIndex); //System.Console.Out.WriteLine("trying " + tag + " " + buildProbabilitiesSum + " lst=" + lst); if (highestBuildProbabilityIndex == mTopStartIndex) { // can't have top until complete continue; } //System.Console.Error.WriteLine(probabilityIndex + " " + tag + " " + highestBuildProbability); if (mStartTypeMap.ContainsKey(tag)) { //update last start lastStartIndex = advanceNodeIndex; lastStartNode = advanceNode; lastStartType = mStartTypeMap[tag]; } else if (mContinueTypeMap.ContainsKey(tag)) { if (lastStartNode == null || lastStartType != mContinueTypeMap[tag]) { continue; //Cont must match previous start or continue } } Parse newParse1 = (Parse)inputParse.Clone(); //clone parse if (mCreateDerivationString) { newParse1.AppendDerivationBuffer(highestBuildProbabilityIndex.ToString(System.Globalization.CultureInfo.InvariantCulture)); newParse1.AppendDerivationBuffer("-"); } newParse1.SetChild(advanceNodeIndex, tag); //replace constituent labeled newParse1.AddProbability(System.Math.Log(highestBuildProbability)); //check mCheckModel.Evaluate(mCheckContextGenerator.GetContext(newParse1.GetChildren(), lastStartType, lastStartIndex, advanceNodeIndex), mCheckProbabilities); //System.Console.Out.WriteLine("check " + mCheckProbabilities[mCompleteIndex] + " " + mCheckProbabilities[mIncompleteIndex]); Parse newParse2 = newParse1; if (mCheckProbabilities[mCompleteIndex] > q) { //make sure a reduce is likely newParse2 = (Parse)newParse1.Clone(); if (mCreateDerivationString) { newParse2.AppendDerivationBuffer("1"); newParse2.AppendDerivationBuffer("."); } newParse2.AddProbability(System.Math.Log(mCheckProbabilities[1])); Parse[] constituent = new Parse[advanceNodeIndex - lastStartIndex + 1]; bool isFlat = true; //first constituent[0] = lastStartNode; if (constituent[0].Type != constituent[0].Head.Type) { isFlat = false; } //last constituent[advanceNodeIndex - lastStartIndex] = advanceNode; if (isFlat && constituent[advanceNodeIndex - lastStartIndex].Type != constituent[advanceNodeIndex - lastStartIndex].Head.Type) { isFlat = false; } //middle for (int constituentIndex = 1; constituentIndex < advanceNodeIndex - lastStartIndex; constituentIndex++) { constituent[constituentIndex] = children[constituentIndex + lastStartIndex]; if (isFlat && constituent[constituentIndex].Type != constituent[constituentIndex].Head.Type) { isFlat = false; } } if (!isFlat) { //flat chunks are done by chunker newParse2.Insert(new Parse(inputParse.Text, new Util.Span(lastStartNode.Span.Start, advanceNode.Span.End), lastStartType, mCheckProbabilities[1], mHeadRules.GetHead(constituent, lastStartType))); newParsesList.Add(newParse2); } } if (mCheckProbabilities[mIncompleteIndex] > q) { //make sure a shift is likely if (mCreateDerivationString) { newParse1.AppendDerivationBuffer("0"); newParse1.AppendDerivationBuffer("."); } if (advanceNodeIndex != nodeCount - 1) { //can't shift last element newParse1.AddProbability(System.Math.Log(mCheckProbabilities[0])); newParsesList.Add(newParse1); } } } Parse[] newParses = newParsesList.ToArray(); return(newParses); }