/// <summary> /// Adds events for parsing (post tagging and chunking to the specified list of events for the specified parse chunks. /// </summary> /// <param name="parseEvents">The events for the specified chunks.</param> /// <param name="chunks">The incomplete parses to be parsed.</param> protected override void AddParseEvents(List <Event> parseEvents, Parse[] chunks) { var ci = 0; while (ci < chunks.Length) { //System.err.println("parserEventStream.addParseEvents: chunks="+Arrays.asList(chunks)); var c = chunks[ci]; var parent = c.Parent; if (parent != null) { var type = parent.Type; String outcome; if (FirstChild(c, parent)) { outcome = AbstractBottomUpParser.START + type; } else { outcome = AbstractBottomUpParser.CONT + type; } //System.err.println("parserEventStream.addParseEvents: chunks["+ci+"]="+c+" label="+outcome+" bcg="+bcg); c.Label = outcome; if (Type == ParserEventTypeEnum.Build) { parseEvents.Add(new Event(outcome, bcg.GetContext(chunks, ci))); } var start = ci - 1; while (start >= 0 && chunks[start].Parent.Equals(parent)) { start--; } if (LastChild(c, parent)) { if (Type == ParserEventTypeEnum.Check) { parseEvents.Add(new Event(AbstractBottomUpParser.COMPLETE, kcg.GetContext(chunks, type, start + 1, ci))); } //perform reduce var reduceStart = ci; while (reduceStart >= 0 && chunks[reduceStart].Equals(parent)) { reduceStart--; } reduceStart++; chunks = ReduceChunks(chunks, ref ci, parent); ci = reduceStart - 1; //ci will be incremented at end of loop } else { if (Type == ParserEventTypeEnum.Check) { parseEvents.Add(new Event(AbstractBottomUpParser.INCOMPLETE, kcg.GetContext(chunks, type, start + 1, ci))); } } } ci++; } }
/// <summary> /// Advances the specified parse and returns the an array advanced parses whose probability accounts for /// more than the specified amount of probability mass. /// </summary> /// <param name="p">The parse to advance.</param> /// <param name="probMass">The amount of probability mass that should be accounted for by the advanced parses.</param> protected override Parse[] AdvanceParses(Parse p, double probMass) { var q = 1 - probMass; /** The closest previous node which has been labeled as a start node. */ Parse lastStartNode = null; /** The index of the closest previous node which has been labeled as a start node. */ var lastStartIndex = -1; /** The type of the closest previous node which has been labeled as a start node. */ string lastStartType = null; /** The index of the node which will be labeled in this iteration of advancing the parse. */ int advanceNodeIndex; /** The node which will be labeled in this iteration of advancing the parse. */ Parse advanceNode = null; var originalChildren = p.Children; var children = CollapsePunctuation(originalChildren, punctSet); var numNodes = children.Length; if (numNodes == 0) { return(null); } //determines which node needs to be labeled and prior labels. for (advanceNodeIndex = 0; advanceNodeIndex < numNodes; advanceNodeIndex++) { advanceNode = children[advanceNodeIndex]; if (advanceNode.Label == null) { break; } if (startTypeMap.ContainsKey(advanceNode.Label)) { lastStartType = startTypeMap[advanceNode.Label]; lastStartNode = advanceNode; lastStartIndex = advanceNodeIndex; //System.err.println("lastStart "+i+" "+lastStart.label+" "+lastStart.prob); } } var originalAdvanceIndex = MapParseIndex(advanceNodeIndex, children, originalChildren); var newParsesList = new List <Parse>(buildModel.GetNumOutcomes()); //call build buildModel.Eval(buildContextGenerator.GetContext(children, advanceNodeIndex), bProbs); var bProbSum = 0d; while (bProbSum < probMass) { // The largest un-advanced labeling. var max = 0; for (var pi = 1; pi < bProbs.Length; pi++) { //for each build outcome if (bProbs[pi] > bProbs[max]) { max = pi; } } if (bProbs[max].Equals(0d)) { break; } var bProb = bProbs[max]; bProbs[max] = 0; //zero out so new max can be found bProbSum += bProb; var tag = buildModel.GetOutcome(max); //System.out.println("trying "+tag+" "+bprobSum+" lst="+lst); if (max == topStartIndex) { // can't have top until complete continue; } //System.err.println(i+" "+tag+" "+bprob); if (startTypeMap.ContainsKey(tag)) { //update last start lastStartIndex = advanceNodeIndex; lastStartNode = advanceNode; lastStartType = startTypeMap[tag]; } else if (contTypeMap.ContainsKey(tag)) { if (lastStartNode == null || !lastStartType.Equals(contTypeMap[tag])) { continue; //Cont must match previous start or continue } } var newParse1 = (Parse)p.Clone(); //clone parse if (createDerivationString) { newParse1.Derivation.Append(max).Append("-"); } newParse1.SetChild(originalAdvanceIndex, tag); //replace constituent being labeled to create new derivation newParse1.AddProbability(Math.Log(bProb)); //check //String[] context = checkContextGenerator.getContext(newParse1.getChildren(), lastStartType, lastStartIndex, advanceNodeIndex); checkModel.Eval( checkContextGenerator.GetContext( CollapsePunctuation(newParse1.Children, punctSet), lastStartType, lastStartIndex, advanceNodeIndex), cProbs); //System.out.println("check "+lastStartType+" "+cprobs[completeIndex]+" "+cprobs[incompleteIndex]+" "+tag+" "+java.util.Arrays.asList(context)); if (cProbs[completeIndex] > q) { //make sure a reduce is likely var newParse2 = (Parse)newParse1.Clone(); if (createDerivationString) { newParse2.Derivation.Append(1).Append("."); } newParse2.AddProbability(Math.Log(cProbs[completeIndex])); var cons = new Parse[advanceNodeIndex - lastStartIndex + 1]; var flat = true; if (lastStartNode == null) { throw new InvalidOperationException("lastStartNode is null."); } //first cons[0] = lastStartNode; flat &= cons[0].IsPosTag; //last cons[advanceNodeIndex - lastStartIndex] = advanceNode; flat &= cons[advanceNodeIndex - lastStartIndex].IsPosTag; //middle for (var ci = 1; ci < advanceNodeIndex - lastStartIndex; ci++) { cons[ci] = children[ci + lastStartIndex]; flat &= cons[ci].IsPosTag; } if (!flat) { //flat chunks are done by chunker if (lastStartIndex == 0 && advanceNodeIndex == numNodes - 1) { //check for top node to include end and beginning punctuation //System.err.println("ParserME.advanceParses: reducing entire span: "+new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd())+" "+lastStartType+" "+java.util.Arrays.asList(children)); newParse2.Insert(new Parse(p.Text, p.Span, lastStartType, cProbs[1], headRules.GetHead(cons, lastStartType))); } else { newParse2.Insert(new Parse(p.Text, new Span(lastStartNode.Span.Start, advanceNode.Span.End), lastStartType, cProbs[1], headRules.GetHead(cons, lastStartType))); } newParsesList.Add(newParse2); } } if (cProbs[incompleteIndex] > q) { //make sure a shift is likely if (createDerivationString) { newParse1.Derivation.Append(0).Append("."); } if (advanceNodeIndex != numNodes - 1) { //can't shift last element newParse1.AddProbability(Math.Log(cProbs[incompleteIndex])); newParsesList.Add(newParse1); } } } return(newParsesList.ToArray()); }