protected override Parse[] AdvanceParses(Parse p, double probMass) { var q = 1 - probMass; /** The index of the node which will be labeled in this iteration of advancing the parse. */ int advanceNodeIndex; /** The node which will be labeled in this iteration of advancing the parse. */ Parse advanceNode = null; var originalChildren = p.Children; var children = CollapsePunctuation(originalChildren, punctSet); var numNodes = children.Length; if (numNodes == 0) { return(null); } if (numNodes == 1) { //put sentence initial and final punct in top node if (children[0].IsPosTag) { return(null); } p.ExpandTopNode(children[0]); return(new[] { p }); } //determines which node needs to advanced. for (advanceNodeIndex = 0; advanceNodeIndex < numNodes; advanceNodeIndex++) { advanceNode = children[advanceNodeIndex]; if (!IsBuilt(advanceNode)) { break; } } if (advanceNode == null) { throw new InvalidOperationException("advanceNode is null."); } var originalZeroIndex = MapParseIndex(0, children, originalChildren); var originalAdvanceIndex = MapParseIndex(advanceNodeIndex, children, originalChildren); var newParsesList = new List <Parse>(); //call build model buildModel.Eval(buildContextGenerator.GetContext(children, advanceNodeIndex), bProbs); var doneProb = bProbs[doneIndex]; Debug("adi=" + advanceNodeIndex + " " + advanceNode.Type + "." + advanceNode.Label + " " + advanceNode + " choose build=" + (1 - doneProb) + " attach=" + doneProb); if (1 - doneProb > q) { double bprobSum = 0; while (bprobSum < probMass) { /** The largest un advanced labeling. */ var max = 0; for (var pi = 1; pi < bProbs.Length; pi++) { //for each build outcome if (bProbs[pi] > bProbs[max]) { max = pi; } } if (bProbs[max].Equals(0d)) { break; } var bprob = bProbs[max]; bProbs[max] = 0; //zero out so new max can be found bprobSum += bprob; var tag = buildModel.GetOutcome(max); if (!tag.Equals(DONE)) { var newParse1 = (Parse)p.Clone(); var newNode = new Parse(p.Text, advanceNode.Span, tag, bprob, advanceNode.Head); newParse1.Insert(newNode); newParse1.AddProbability(Math.Log(bprob)); newParsesList.Add(newParse1); if (checkComplete) { cProbs = checkModel.Eval(checkContextGenerator.GetContext(newNode, children, advanceNodeIndex, false)); Debug("building " + tag + " " + bprob + " c=" + cProbs[completeIndex]); if (cProbs[completeIndex] > probMass) { //just complete advances SetComplete(newNode); newParse1.AddProbability(Math.Log(cProbs[completeIndex])); Debug("Only advancing complete node"); } else if (1 - cProbs[completeIndex] > probMass) { //just incomplete advances SetIncomplete(newNode); newParse1.AddProbability(Math.Log(1 - cProbs[completeIndex])); Debug("Only advancing incomplete node"); } else { //both complete and incomplete advance Debug("Advancing both complete and incomplete nodes"); SetComplete(newNode); newParse1.AddProbability(Math.Log(cProbs[completeIndex])); var newParse2 = (Parse)p.Clone(); var newNode2 = new Parse(p.Text, advanceNode.Span, tag, bprob, advanceNode.Head); newParse2.Insert(newNode2); newParse2.AddProbability(Math.Log(bprob)); newParsesList.Add(newParse2); newParse2.AddProbability(Math.Log(1 - cProbs[completeIndex])); SetIncomplete(newNode2); //set incomplete for non-clone } } else { Debug("building " + tag + " " + bprob); } } } } //advance attaches if (doneProb > q) { var newParse1 = (Parse)p.Clone(); //clone parse //mark nodes as built if (checkComplete) { if (IsComplete(advanceNode)) { newParse1.SetChild(originalAdvanceIndex, BUILT + "." + COMPLETE); //replace constituent being labeled to create new derivation } else { newParse1.SetChild(originalAdvanceIndex, BUILT + "." + INCOMPLETE); //replace constituent being labeled to create new derivation } } else { newParse1.SetChild(originalAdvanceIndex, BUILT); //replace constituent being labeled to create new derivation } newParse1.AddProbability(Math.Log(doneProb)); if (advanceNodeIndex == 0) { //no attach if first node. newParsesList.Add(newParse1); } else { var rf = GetRightFrontier(p, punctSet); for (int fi = 0, fs = rf.Count; fi < fs; fi++) { var fn = rf[fi]; attachModel.Eval(attachContextGenerator.GetContext(children, advanceNodeIndex, rf, fi), aProbs); if (debugOn) { //List cs = java.util.Arrays.asList(attachContextGenerator.getContext(children, advanceNodeIndex,rf,fi,punctSet)); Debug("Frontier node(" + fi + "): " + fn.Type + "." + fn.Label + " " + fn + " <- " + advanceNode.Type + " " + advanceNode + " d=" + aProbs[daughterAttachIndex] + " s=" + aProbs[sisterAttachIndex] + " "); } foreach (int attachment in attachments) { var prob = aProbs[attachment]; //should we try an attach if p > threshold and // if !checkComplete then prevent daughter attaching to chunk // if checkComplete then prevent daughter attaching to complete node or // sister attaching to an incomplete node if (prob > q && ( (!checkComplete && (attachment != daughterAttachIndex || !IsComplete(fn))) || (checkComplete && ((attachment == daughterAttachIndex && !IsComplete(fn)) || (attachment == sisterAttachIndex && IsComplete(fn)))))) { var newParse2 = newParse1.CloneRoot(fn, originalZeroIndex); var newKids = CollapsePunctuation(newParse2.Children, punctSet); //remove node from top level since were going to attach it (including punct) for (var ri = originalZeroIndex + 1; ri <= originalAdvanceIndex; ri++) { //System.out.println(at"-removing "+(originalZeroIndex+1)+" "+newParse2.getChildren()[originalZeroIndex+1]); newParse2.Remove(originalZeroIndex + 1); } var crf = GetRightFrontier(newParse2, punctSet); Parse updatedNode; if (attachment == daughterAttachIndex) { //attach daughter updatedNode = crf[fi]; updatedNode.Add(advanceNode, headRules); } else { //attach sister Parse psite; if (fi + 1 < crf.Count) { psite = crf[fi + 1]; updatedNode = psite.AdJoin(advanceNode, headRules); } else { psite = newParse2; updatedNode = psite.AdJoinRoot(advanceNode, headRules, originalZeroIndex); newKids[0] = updatedNode; } } //update spans affected by attachment for (var ni = fi + 1; ni < crf.Count; ni++) { var node = crf[ni]; node.UpdateSpan(); } //if (debugOn) {System.out.print(ai+"-result: ");newParse2.show();System.out.println();} newParse2.AddProbability(Math.Log(prob)); newParsesList.Add(newParse2); if (checkComplete) { cProbs = checkModel.Eval(checkContextGenerator.GetContext(updatedNode, newKids, advanceNodeIndex, true)); if (cProbs[completeIndex] > probMass) { SetComplete(updatedNode); newParse2.AddProbability(Math.Log(cProbs[completeIndex])); Debug("Only advancing complete node"); } else if (1 - cProbs[completeIndex] > probMass) { SetIncomplete(updatedNode); newParse2.AddProbability(Math.Log(1 - cProbs[completeIndex])); Debug("Only advancing incomplete node"); } else { SetComplete(updatedNode); var newParse3 = newParse2.CloneRoot(updatedNode, originalZeroIndex); newParse3.AddProbability(Math.Log(cProbs[completeIndex])); newParsesList.Add(newParse3); SetIncomplete(updatedNode); newParse2.AddProbability(Math.Log(1 - cProbs[completeIndex])); Debug("Advancing both complete and incomplete nodes; c=" + cProbs[completeIndex]); } } } else { Debug("Skipping " + fn.Type + "." + fn.Label + " " + fn + " daughter=" + (attachment == daughterAttachIndex) + " complete=" + IsComplete(fn) + " prob=" + prob); } } if (checkComplete && !IsComplete(fn)) { Debug("Stopping at incomplete node(" + fi + "): " + fn.Type + "." + fn.Label + " " + fn); break; } } } } return(newParsesList.ToArray()); }
/// <summary> /// Produces all events for the specified sentence chunks and adds them to the specified list. /// </summary> /// <param name="newEvents">A list of events to be added to.</param> /// <param name="chunks">Pre-chunked constituents of a sentence.</param> protected override void AddParseEvents(List <Event> newEvents, Parse[] chunks) { /* Frontier nodes built from node in a completed parse. Specifically, * they have all their children regardless of the stage of parsing.*/ var rightFrontier = new List <Parse>(); var builtNodes = new List <Parse>(); /* Nodes which characterize what the parse looks like to the parser as its being built. * Specifically, these nodes don't have all their children attached like the parents of * the chunk nodes do.*/ var currentChunks = new Parse[chunks.Length]; for (var ci = 0; ci < chunks.Length; ci++) { currentChunks[ci] = (Parse)chunks[ci].Clone(); currentChunks[ci].PreviousPunctuationSet = chunks[ci].PreviousPunctuationSet; currentChunks[ci].NextPunctuationSet = chunks[ci].NextPunctuationSet; currentChunks[ci].Label = AbstractBottomUpParser.COMPLETE; chunks[ci].Label = AbstractBottomUpParser.COMPLETE; } for (var ci = 0; ci < chunks.Length; ci++) { //System.err.println("parserEventStream.addParseEvents: chunks="+Arrays.asList(chunks)); var parent = chunks[ci].Parent; var prevParent = chunks[ci]; //var off = 0; //build un-built parents if (!chunks[ci].IsPosTag) { builtNodes.Add(chunks[ci]); //builtNodes[off++] = chunks[ci]; } //perform build stages while (parent.Type != AbstractBottomUpParser.TOP_NODE && parent.Label == null) { if (parent.Label == null && prevParent.Type != parent.Type) { //build level // if (debug) System.err.println("Build: " + parent.Type + " for: " + currentChunks[ci]); if (Type == ParserEventTypeEnum.Build) { newEvents.Add(new Event(parent.Type, buildContextGenerator.GetContext(currentChunks, ci))); } builtNodes.Add(parent); //builtNodes[off++] = parent; var newParent = new Parse(currentChunks[ci].Text, currentChunks[ci].Span, parent.Type, 1, 0); newParent.Add(currentChunks[ci], Rules); newParent.PreviousPunctuationSet = currentChunks[ci].PreviousPunctuationSet; newParent.NextPunctuationSet = currentChunks[ci].NextPunctuationSet; currentChunks[ci].Parent = newParent; currentChunks[ci] = newParent; newParent.Label = Parser.BUILT; //see if chunk is complete if (LastChild(chunks[ci], parent)) { if (Type == ParserEventTypeEnum.Check) { newEvents.Add(new Event(AbstractBottomUpParser.COMPLETE, checkContextGenerator.GetContext(currentChunks[ci], currentChunks, ci, false))); } currentChunks[ci].Label = AbstractBottomUpParser.COMPLETE; parent.Label = AbstractBottomUpParser.COMPLETE; } else { if (Type == ParserEventTypeEnum.Check) { newEvents.Add(new Event(AbstractBottomUpParser.INCOMPLETE, checkContextGenerator.GetContext(currentChunks[ci], currentChunks, ci, false))); } currentChunks[ci].Label = AbstractBottomUpParser.INCOMPLETE; parent.Label = AbstractBottomUpParser.COMPLETE; } chunks[ci] = parent; //System.err.println("build: "+newParent+" for "+parent); } //TODO: Consider whether we need to set this label or train parses at all. parent.Label = Parser.BUILT; prevParent = parent; parent = parent.Parent; } //decide to attach if (Type == ParserEventTypeEnum.Build) { newEvents.Add(new Event(Parser.DONE, buildContextGenerator.GetContext(currentChunks, ci))); } //attach node string attachType = null; /* Node selected for attachment. */ Parse attachNode = null; var attachNodeIndex = -1; if (ci == 0) { var top = new Parse(currentChunks[ci].Text, new Span(0, currentChunks[ci].Text.Length), AbstractBottomUpParser.TOP_NODE, 1, 0); top.Insert(currentChunks[ci]); } else { /* Right frontier consisting of partially-built nodes based on current state of the parse.*/ var currentRightFrontier = Parser.GetRightFrontier(currentChunks[0], Punctuation); if (currentRightFrontier.Count != rightFrontier.Count) { throw new InvalidOperationException("frontiers mis-aligned: " + currentRightFrontier.Count + " != " + rightFrontier.Count + " " + currentRightFrontier + " " + rightFrontier); //System.exit(1); } var parents = GetNonAdjoinedParent(chunks[ci]); //try daughters first. for (var cfi = 0; cfi < currentRightFrontier.Count; cfi++) { var frontierNode = rightFrontier[cfi]; var cfn = currentRightFrontier[cfi]; if (!Parser.checkComplete || cfn.Label != AbstractBottomUpParser.COMPLETE) { //if (debug) System.err.println("Looking at attachment site (" + cfi + "): " + cfn.Type + " ci=" + i + " cs=" + nonPunctChildCount(cfn) + ", " + cfn + " :for " + currentChunks[ci].Type + " " + currentChunks[ci] + " -> " + parents); if (parents.ContainsKey(frontierNode) && attachNode == null && parents[frontierNode] == NonPunctChildCount(cfn)) { attachType = Parser.ATTACH_DAUGHTER; attachNodeIndex = cfi; attachNode = cfn; if (Type == ParserEventTypeEnum.Attach) { newEvents.Add(new Event(attachType, attachContextGenerator.GetContext(currentChunks, ci, currentRightFrontier, attachNodeIndex))); } //System.err.println("daughter attach "+attachNode+" at "+fi); } } /* else { * if (debug) System.err.println("Skipping (" + cfi + "): " + cfn.Type + "," + cfn.getPreviousPunctuationSet() + " " + cfn + " :for " + currentChunks[ci].Type + " " + currentChunks[ci] + " -> " + parents); * } * // Can't attach past first incomplete node. * if (Parser.checkComplete && cfn.getLabel().equals(Parser.INCOMPLETE)) { * if (debug) System.err.println("breaking on incomplete:" + cfn.Type + " " + cfn); * break; * } */ } //try sisters, and generate non-attach events. for (var cfi = 0; cfi < currentRightFrontier.Count; cfi++) { var frontierNode = rightFrontier[cfi]; var cfn = currentRightFrontier[cfi]; if (attachNode == null && parents.ContainsKey(frontierNode.Parent) && frontierNode.Type.Equals(frontierNode.Parent.Type) ) { //&& frontierNode.Parent.getLabel() == null) { attachType = Parser.ATTACH_SISTER; attachNode = cfn; attachNodeIndex = cfi; if (Type == ParserEventTypeEnum.Attach) { newEvents.Add(new Event(Parser.ATTACH_SISTER, attachContextGenerator.GetContext(currentChunks, ci, currentRightFrontier, cfi))); } chunks[ci].Parent.Label = Parser.BUILT; //System.err.println("in search sister attach "+attachNode+" at "+cfi); } else if (cfi == attachNodeIndex) { //skip over previously attached daughter. } else { if (Type == ParserEventTypeEnum.Attach) { newEvents.Add(new Event(Parser.NON_ATTACH, attachContextGenerator.GetContext(currentChunks, ci, currentRightFrontier, cfi))); } } //Can't attach past first incomplete node. if (Parser.checkComplete && cfn.Label.Equals(AbstractBottomUpParser.INCOMPLETE)) { //if (debug) System.err.println("breaking on incomplete:" + cfn.Type + " " + cfn); break; } } //attach Node if (attachNode != null) { if (attachType == Parser.ATTACH_DAUGHTER) { var daughter = currentChunks[ci]; //if (debug) System.err.println("daughter attach a=" + attachNode.Type + ":" + attachNode + " d=" + daughter + " com=" + lastChild(chunks[ci], rightFrontier.get(attachNodeIndex))); attachNode.Add(daughter, Rules); daughter.Parent = attachNode; if (LastChild(chunks[ci], rightFrontier[attachNodeIndex])) { if (Type == ParserEventTypeEnum.Check) { newEvents.Add(new Event(AbstractBottomUpParser.COMPLETE, checkContextGenerator.GetContext(attachNode, currentChunks, ci, true))); } attachNode.Label = AbstractBottomUpParser.COMPLETE; } else { if (Type == ParserEventTypeEnum.Check) { newEvents.Add(new Event(AbstractBottomUpParser.INCOMPLETE, checkContextGenerator.GetContext(attachNode, currentChunks, ci, true))); } } } else if (attachType == Parser.ATTACH_SISTER) { var frontierNode = rightFrontier[attachNodeIndex]; rightFrontier[attachNodeIndex] = frontierNode.Parent; var sister = currentChunks[ci]; //if (debug) System.err.println("sister attach a=" + attachNode.Type + ":" + attachNode + " s=" + sister + " ap=" + attachNode.Parent + " com=" + lastChild(chunks[ci], rightFrontier.get(attachNodeIndex))); var newParent = attachNode.Parent.AdJoin(sister, Rules); newParent.Parent = attachNode.Parent; attachNode.Parent = newParent; sister.Parent = newParent; if (Equals(attachNode, currentChunks[0])) { currentChunks[0] = newParent; } if (LastChild(chunks[ci], rightFrontier[attachNodeIndex])) { if (Type == ParserEventTypeEnum.Check) { newEvents.Add(new Event(AbstractBottomUpParser.COMPLETE, checkContextGenerator.GetContext(newParent, currentChunks, ci, true))); } newParent.Label = AbstractBottomUpParser.COMPLETE; } else { if (Type == ParserEventTypeEnum.Check) { newEvents.Add(new Event(AbstractBottomUpParser.INCOMPLETE, checkContextGenerator.GetContext(newParent, currentChunks, ci, true))); } newParent.Label = AbstractBottomUpParser.INCOMPLETE; } } //update right frontier for (var ni = 0; ni < attachNodeIndex; ni++) { //System.err.println("removing: "+rightFrontier.get(0)); rightFrontier.RemoveAt(0); } } else { //System.err.println("No attachment!"); throw new InvalidOperationException("No Attachment: " + chunks[ci]); } } rightFrontier.InsertRange(0, builtNodes); builtNodes.Clear(); } }