示例#1
0
        protected override Parse[] AdvanceParses(Parse p, double probMass)
        {
            var q = 1 - probMass;
            /** The index of the node which will be labeled in this iteration of advancing the parse. */
            int advanceNodeIndex;
            /** The node which will be labeled in this iteration of advancing the parse. */
            Parse advanceNode      = null;
            var   originalChildren = p.Children;
            var   children         = CollapsePunctuation(originalChildren, punctSet);
            var   numNodes         = children.Length;

            if (numNodes == 0)
            {
                return(null);
            }
            if (numNodes == 1)
            {
                //put sentence initial and final punct in top node
                if (children[0].IsPosTag)
                {
                    return(null);
                }
                p.ExpandTopNode(children[0]);
                return(new[] { p });
            }
            //determines which node needs to advanced.
            for (advanceNodeIndex = 0; advanceNodeIndex < numNodes; advanceNodeIndex++)
            {
                advanceNode = children[advanceNodeIndex];
                if (!IsBuilt(advanceNode))
                {
                    break;
                }
            }

            if (advanceNode == null)
            {
                throw new InvalidOperationException("advanceNode is null.");
            }

            var originalZeroIndex    = MapParseIndex(0, children, originalChildren);
            var originalAdvanceIndex = MapParseIndex(advanceNodeIndex, children, originalChildren);
            var newParsesList        = new List <Parse>();

            //call build model
            buildModel.Eval(buildContextGenerator.GetContext(children, advanceNodeIndex), bProbs);
            var doneProb = bProbs[doneIndex];

            Debug("adi=" + advanceNodeIndex + " " + advanceNode.Type + "." + advanceNode.Label + " " + advanceNode + " choose build=" + (1 - doneProb) + " attach=" + doneProb);

            if (1 - doneProb > q)
            {
                double bprobSum = 0;
                while (bprobSum < probMass)
                {
                    /** The largest un advanced labeling. */
                    var max = 0;
                    for (var pi = 1; pi < bProbs.Length; pi++)
                    {
                        //for each build outcome
                        if (bProbs[pi] > bProbs[max])
                        {
                            max = pi;
                        }
                    }
                    if (bProbs[max].Equals(0d))
                    {
                        break;
                    }
                    var bprob = bProbs[max];
                    bProbs[max] = 0; //zero out so new max can be found
                    bprobSum   += bprob;
                    var tag = buildModel.GetOutcome(max);
                    if (!tag.Equals(DONE))
                    {
                        var newParse1 = (Parse)p.Clone();
                        var newNode   = new Parse(p.Text, advanceNode.Span, tag, bprob, advanceNode.Head);
                        newParse1.Insert(newNode);
                        newParse1.AddProbability(Math.Log(bprob));
                        newParsesList.Add(newParse1);
                        if (checkComplete)
                        {
                            cProbs =
                                checkModel.Eval(checkContextGenerator.GetContext(newNode, children, advanceNodeIndex,
                                                                                 false));

                            Debug("building " + tag + " " + bprob + " c=" + cProbs[completeIndex]);

                            if (cProbs[completeIndex] > probMass)
                            {
                                //just complete advances
                                SetComplete(newNode);
                                newParse1.AddProbability(Math.Log(cProbs[completeIndex]));

                                Debug("Only advancing complete node");
                            }
                            else if (1 - cProbs[completeIndex] > probMass)
                            {
                                //just incomplete advances
                                SetIncomplete(newNode);
                                newParse1.AddProbability(Math.Log(1 - cProbs[completeIndex]));
                                Debug("Only advancing incomplete node");
                            }
                            else
                            {
                                //both complete and incomplete advance
                                Debug("Advancing both complete and incomplete nodes");
                                SetComplete(newNode);
                                newParse1.AddProbability(Math.Log(cProbs[completeIndex]));

                                var newParse2 = (Parse)p.Clone();
                                var newNode2  = new Parse(p.Text, advanceNode.Span, tag, bprob, advanceNode.Head);
                                newParse2.Insert(newNode2);
                                newParse2.AddProbability(Math.Log(bprob));
                                newParsesList.Add(newParse2);
                                newParse2.AddProbability(Math.Log(1 - cProbs[completeIndex]));
                                SetIncomplete(newNode2); //set incomplete for non-clone
                            }
                        }
                        else
                        {
                            Debug("building " + tag + " " + bprob);
                        }
                    }
                }
            }
            //advance attaches
            if (doneProb > q)
            {
                var newParse1 = (Parse)p.Clone();  //clone parse
                //mark nodes as built
                if (checkComplete)
                {
                    if (IsComplete(advanceNode))
                    {
                        newParse1.SetChild(originalAdvanceIndex, BUILT + "." + COMPLETE);
                        //replace constituent being labeled to create new derivation
                    }
                    else
                    {
                        newParse1.SetChild(originalAdvanceIndex, BUILT + "." + INCOMPLETE);
                        //replace constituent being labeled to create new derivation
                    }
                }
                else
                {
                    newParse1.SetChild(originalAdvanceIndex, BUILT);
                    //replace constituent being labeled to create new derivation
                }
                newParse1.AddProbability(Math.Log(doneProb));
                if (advanceNodeIndex == 0)
                {
                    //no attach if first node.
                    newParsesList.Add(newParse1);
                }
                else
                {
                    var rf = GetRightFrontier(p, punctSet);
                    for (int fi = 0, fs = rf.Count; fi < fs; fi++)
                    {
                        var fn = rf[fi];
                        attachModel.Eval(attachContextGenerator.GetContext(children, advanceNodeIndex, rf, fi), aProbs);
                        if (debugOn)
                        {
                            //List cs = java.util.Arrays.asList(attachContextGenerator.getContext(children, advanceNodeIndex,rf,fi,punctSet));
                            Debug("Frontier node(" + fi + "): " + fn.Type + "." + fn.Label + " " + fn + " <- " +
                                  advanceNode.Type + " " + advanceNode + " d=" + aProbs[daughterAttachIndex] + " s=" +
                                  aProbs[sisterAttachIndex] + " ");
                        }
                        foreach (int attachment in attachments)
                        {
                            var prob = aProbs[attachment];
                            //should we try an attach if p > threshold and
                            // if !checkComplete then prevent daughter attaching to chunk
                            // if checkComplete then prevent daughter attaching to complete node or
                            //    sister attaching to an incomplete node
                            if (prob > q && (
                                    (!checkComplete && (attachment != daughterAttachIndex || !IsComplete(fn)))
                                    ||
                                    (checkComplete &&
                                     ((attachment == daughterAttachIndex && !IsComplete(fn)) ||
                                      (attachment == sisterAttachIndex && IsComplete(fn))))))
                            {
                                var newParse2 = newParse1.CloneRoot(fn, originalZeroIndex);
                                var newKids   = CollapsePunctuation(newParse2.Children, punctSet);
                                //remove node from top level since were going to attach it (including punct)
                                for (var ri = originalZeroIndex + 1; ri <= originalAdvanceIndex; ri++)
                                {
                                    //System.out.println(at"-removing "+(originalZeroIndex+1)+" "+newParse2.getChildren()[originalZeroIndex+1]);
                                    newParse2.Remove(originalZeroIndex + 1);
                                }
                                var   crf = GetRightFrontier(newParse2, punctSet);
                                Parse updatedNode;
                                if (attachment == daughterAttachIndex)
                                {
                                    //attach daughter
                                    updatedNode = crf[fi];
                                    updatedNode.Add(advanceNode, headRules);
                                }
                                else
                                {
                                    //attach sister
                                    Parse psite;
                                    if (fi + 1 < crf.Count)
                                    {
                                        psite       = crf[fi + 1];
                                        updatedNode = psite.AdJoin(advanceNode, headRules);
                                    }
                                    else
                                    {
                                        psite       = newParse2;
                                        updatedNode = psite.AdJoinRoot(advanceNode, headRules, originalZeroIndex);
                                        newKids[0]  = updatedNode;
                                    }
                                }
                                //update spans affected by attachment
                                for (var ni = fi + 1; ni < crf.Count; ni++)
                                {
                                    var node = crf[ni];
                                    node.UpdateSpan();
                                }
                                //if (debugOn) {System.out.print(ai+"-result: ");newParse2.show();System.out.println();}
                                newParse2.AddProbability(Math.Log(prob));
                                newParsesList.Add(newParse2);
                                if (checkComplete)
                                {
                                    cProbs =
                                        checkModel.Eval(checkContextGenerator.GetContext(updatedNode, newKids,
                                                                                         advanceNodeIndex, true));
                                    if (cProbs[completeIndex] > probMass)
                                    {
                                        SetComplete(updatedNode);
                                        newParse2.AddProbability(Math.Log(cProbs[completeIndex]));

                                        Debug("Only advancing complete node");
                                    }
                                    else if (1 - cProbs[completeIndex] > probMass)
                                    {
                                        SetIncomplete(updatedNode);
                                        newParse2.AddProbability(Math.Log(1 - cProbs[completeIndex]));
                                        Debug("Only advancing incomplete node");
                                    }
                                    else
                                    {
                                        SetComplete(updatedNode);
                                        var newParse3 = newParse2.CloneRoot(updatedNode, originalZeroIndex);
                                        newParse3.AddProbability(Math.Log(cProbs[completeIndex]));
                                        newParsesList.Add(newParse3);
                                        SetIncomplete(updatedNode);
                                        newParse2.AddProbability(Math.Log(1 - cProbs[completeIndex]));
                                        Debug("Advancing both complete and incomplete nodes; c=" + cProbs[completeIndex]);
                                    }
                                }
                            }
                            else
                            {
                                Debug("Skipping " + fn.Type + "." + fn.Label + " " + fn + " daughter=" +
                                      (attachment == daughterAttachIndex) + " complete=" + IsComplete(fn) +
                                      " prob=" + prob);
                            }
                        }
                        if (checkComplete && !IsComplete(fn))
                        {
                            Debug("Stopping at incomplete node(" + fi + "): " + fn.Type + "." + fn.Label + " " + fn);
                            break;
                        }
                    }
                }
            }
            return(newParsesList.ToArray());
        }
        /// <summary>
        /// Produces all events for the specified sentence chunks and adds them to the specified list.
        /// </summary>
        /// <param name="newEvents">A list of events to be added to.</param>
        /// <param name="chunks">Pre-chunked constituents of a sentence.</param>
        protected override void AddParseEvents(List <Event> newEvents, Parse[] chunks)
        {
            /* Frontier nodes built from node in a completed parse.  Specifically,
             * they have all their children regardless of the stage of parsing.*/

            var rightFrontier = new List <Parse>();
            var builtNodes    = new List <Parse>();

            /* Nodes which characterize what the parse looks like to the parser as its being built.
             * Specifically, these nodes don't have all their children attached like the parents of
             * the chunk nodes do.*/
            var currentChunks = new Parse[chunks.Length];

            for (var ci = 0; ci < chunks.Length; ci++)
            {
                currentChunks[ci] = (Parse)chunks[ci].Clone();
                currentChunks[ci].PreviousPunctuationSet = chunks[ci].PreviousPunctuationSet;
                currentChunks[ci].NextPunctuationSet     = chunks[ci].NextPunctuationSet;
                currentChunks[ci].Label = AbstractBottomUpParser.COMPLETE;
                chunks[ci].Label        = AbstractBottomUpParser.COMPLETE;
            }
            for (var ci = 0; ci < chunks.Length; ci++)
            {
                //System.err.println("parserEventStream.addParseEvents: chunks="+Arrays.asList(chunks));
                var parent     = chunks[ci].Parent;
                var prevParent = chunks[ci];

                //var off = 0;
                //build un-built parents
                if (!chunks[ci].IsPosTag)
                {
                    builtNodes.Add(chunks[ci]);
                    //builtNodes[off++] = chunks[ci];
                }

                //perform build stages
                while (parent.Type != AbstractBottomUpParser.TOP_NODE && parent.Label == null)
                {
                    if (parent.Label == null && prevParent.Type != parent.Type)
                    {
                        //build level
                        // if (debug) System.err.println("Build: " + parent.Type + " for: " + currentChunks[ci]);
                        if (Type == ParserEventTypeEnum.Build)
                        {
                            newEvents.Add(new Event(parent.Type, buildContextGenerator.GetContext(currentChunks, ci)));
                        }
                        builtNodes.Add(parent);
                        //builtNodes[off++] = parent;
                        var newParent = new Parse(currentChunks[ci].Text, currentChunks[ci].Span, parent.Type, 1, 0);
                        newParent.Add(currentChunks[ci], Rules);
                        newParent.PreviousPunctuationSet = currentChunks[ci].PreviousPunctuationSet;
                        newParent.NextPunctuationSet     = currentChunks[ci].NextPunctuationSet;
                        currentChunks[ci].Parent         = newParent;
                        currentChunks[ci] = newParent;
                        newParent.Label   = Parser.BUILT;

                        //see if chunk is complete
                        if (LastChild(chunks[ci], parent))
                        {
                            if (Type == ParserEventTypeEnum.Check)
                            {
                                newEvents.Add(new Event(AbstractBottomUpParser.COMPLETE,
                                                        checkContextGenerator.GetContext(currentChunks[ci], currentChunks, ci, false)));
                            }
                            currentChunks[ci].Label = AbstractBottomUpParser.COMPLETE;
                            parent.Label            = AbstractBottomUpParser.COMPLETE;
                        }
                        else
                        {
                            if (Type == ParserEventTypeEnum.Check)
                            {
                                newEvents.Add(new Event(AbstractBottomUpParser.INCOMPLETE,
                                                        checkContextGenerator.GetContext(currentChunks[ci], currentChunks, ci, false)));
                            }
                            currentChunks[ci].Label = AbstractBottomUpParser.INCOMPLETE;
                            parent.Label            = AbstractBottomUpParser.COMPLETE;
                        }

                        chunks[ci] = parent;
                        //System.err.println("build: "+newParent+" for "+parent);
                    }
                    //TODO: Consider whether we need to set this label or train parses at all.

                    parent.Label = Parser.BUILT;
                    prevParent   = parent;
                    parent       = parent.Parent;
                }
                //decide to attach
                if (Type == ParserEventTypeEnum.Build)
                {
                    newEvents.Add(new Event(Parser.DONE, buildContextGenerator.GetContext(currentChunks, ci)));
                }
                //attach node
                string attachType = null;
                /* Node selected for attachment. */
                Parse attachNode      = null;
                var   attachNodeIndex = -1;
                if (ci == 0)
                {
                    var top = new Parse(currentChunks[ci].Text, new Span(0, currentChunks[ci].Text.Length),
                                        AbstractBottomUpParser.TOP_NODE, 1, 0);
                    top.Insert(currentChunks[ci]);
                }
                else
                {
                    /* Right frontier consisting of partially-built nodes based on current state of the parse.*/
                    var currentRightFrontier = Parser.GetRightFrontier(currentChunks[0], Punctuation);
                    if (currentRightFrontier.Count != rightFrontier.Count)
                    {
                        throw new InvalidOperationException("frontiers mis-aligned: " + currentRightFrontier.Count +
                                                            " != " + rightFrontier.Count + " " + currentRightFrontier +
                                                            " " + rightFrontier);
                        //System.exit(1);
                    }
                    var parents = GetNonAdjoinedParent(chunks[ci]);
                    //try daughters first.
                    for (var cfi = 0; cfi < currentRightFrontier.Count; cfi++)
                    {
                        var frontierNode = rightFrontier[cfi];
                        var cfn          = currentRightFrontier[cfi];
                        if (!Parser.checkComplete || cfn.Label != AbstractBottomUpParser.COMPLETE)
                        {
                            //if (debug) System.err.println("Looking at attachment site (" + cfi + "): " + cfn.Type + " ci=" + i + " cs=" + nonPunctChildCount(cfn) + ", " + cfn + " :for " + currentChunks[ci].Type + " " + currentChunks[ci] + " -> " + parents);
                            if (parents.ContainsKey(frontierNode) && attachNode == null && parents[frontierNode] == NonPunctChildCount(cfn))
                            {
                                attachType      = Parser.ATTACH_DAUGHTER;
                                attachNodeIndex = cfi;
                                attachNode      = cfn;
                                if (Type == ParserEventTypeEnum.Attach)
                                {
                                    newEvents.Add(new Event(attachType,
                                                            attachContextGenerator.GetContext(currentChunks, ci, currentRightFrontier,
                                                                                              attachNodeIndex)));
                                }
                                //System.err.println("daughter attach "+attachNode+" at "+fi);
                            }
                        } /* else {
                           * if (debug) System.err.println("Skipping (" + cfi + "): " + cfn.Type + "," + cfn.getPreviousPunctuationSet() + " " + cfn + " :for " + currentChunks[ci].Type + " " + currentChunks[ci] + " -> " + parents);
                           * }
                           * // Can't attach past first incomplete node.
                           * if (Parser.checkComplete && cfn.getLabel().equals(Parser.INCOMPLETE)) {
                           * if (debug) System.err.println("breaking on incomplete:" + cfn.Type + " " + cfn);
                           * break;
                           * }
                           */
                    }
                    //try sisters, and generate non-attach events.
                    for (var cfi = 0; cfi < currentRightFrontier.Count; cfi++)
                    {
                        var frontierNode = rightFrontier[cfi];
                        var cfn          = currentRightFrontier[cfi];
                        if (attachNode == null && parents.ContainsKey(frontierNode.Parent) &&
                            frontierNode.Type.Equals(frontierNode.Parent.Type)
                            )
                        {
                            //&& frontierNode.Parent.getLabel() == null) {
                            attachType      = Parser.ATTACH_SISTER;
                            attachNode      = cfn;
                            attachNodeIndex = cfi;
                            if (Type == ParserEventTypeEnum.Attach)
                            {
                                newEvents.Add(new Event(Parser.ATTACH_SISTER,
                                                        attachContextGenerator.GetContext(currentChunks, ci, currentRightFrontier, cfi)));
                            }
                            chunks[ci].Parent.Label = Parser.BUILT;
                            //System.err.println("in search sister attach "+attachNode+" at "+cfi);
                        }
                        else if (cfi == attachNodeIndex)
                        {
                            //skip over previously attached daughter.
                        }
                        else
                        {
                            if (Type == ParserEventTypeEnum.Attach)
                            {
                                newEvents.Add(new Event(Parser.NON_ATTACH,
                                                        attachContextGenerator.GetContext(currentChunks, ci, currentRightFrontier, cfi)));
                            }
                        }
                        //Can't attach past first incomplete node.
                        if (Parser.checkComplete && cfn.Label.Equals(AbstractBottomUpParser.INCOMPLETE))
                        {
                            //if (debug) System.err.println("breaking on incomplete:" + cfn.Type + " " + cfn);
                            break;
                        }
                    }
                    //attach Node
                    if (attachNode != null)
                    {
                        if (attachType == Parser.ATTACH_DAUGHTER)
                        {
                            var daughter = currentChunks[ci];
                            //if (debug) System.err.println("daughter attach a=" + attachNode.Type + ":" + attachNode + " d=" + daughter + " com=" + lastChild(chunks[ci], rightFrontier.get(attachNodeIndex)));
                            attachNode.Add(daughter, Rules);
                            daughter.Parent = attachNode;
                            if (LastChild(chunks[ci], rightFrontier[attachNodeIndex]))
                            {
                                if (Type == ParserEventTypeEnum.Check)
                                {
                                    newEvents.Add(new Event(AbstractBottomUpParser.COMPLETE,
                                                            checkContextGenerator.GetContext(attachNode, currentChunks, ci, true)));
                                }
                                attachNode.Label = AbstractBottomUpParser.COMPLETE;
                            }
                            else
                            {
                                if (Type == ParserEventTypeEnum.Check)
                                {
                                    newEvents.Add(new Event(AbstractBottomUpParser.INCOMPLETE,
                                                            checkContextGenerator.GetContext(attachNode, currentChunks, ci, true)));
                                }
                            }
                        }
                        else if (attachType == Parser.ATTACH_SISTER)
                        {
                            var frontierNode = rightFrontier[attachNodeIndex];
                            rightFrontier[attachNodeIndex] = frontierNode.Parent;
                            var sister = currentChunks[ci];
                            //if (debug) System.err.println("sister attach a=" + attachNode.Type + ":" + attachNode + " s=" + sister + " ap=" + attachNode.Parent + " com=" + lastChild(chunks[ci], rightFrontier.get(attachNodeIndex)));
                            var newParent = attachNode.Parent.AdJoin(sister, Rules);

                            newParent.Parent  = attachNode.Parent;
                            attachNode.Parent = newParent;
                            sister.Parent     = newParent;
                            if (Equals(attachNode, currentChunks[0]))
                            {
                                currentChunks[0] = newParent;
                            }
                            if (LastChild(chunks[ci], rightFrontier[attachNodeIndex]))
                            {
                                if (Type == ParserEventTypeEnum.Check)
                                {
                                    newEvents.Add(new Event(AbstractBottomUpParser.COMPLETE,
                                                            checkContextGenerator.GetContext(newParent, currentChunks, ci, true)));
                                }
                                newParent.Label = AbstractBottomUpParser.COMPLETE;
                            }
                            else
                            {
                                if (Type == ParserEventTypeEnum.Check)
                                {
                                    newEvents.Add(new Event(AbstractBottomUpParser.INCOMPLETE,
                                                            checkContextGenerator.GetContext(newParent, currentChunks, ci, true)));
                                }
                                newParent.Label = AbstractBottomUpParser.INCOMPLETE;
                            }
                        }
                        //update right frontier
                        for (var ni = 0; ni < attachNodeIndex; ni++)
                        {
                            //System.err.println("removing: "+rightFrontier.get(0));
                            rightFrontier.RemoveAt(0);
                        }
                    }
                    else
                    {
                        //System.err.println("No attachment!");
                        throw new InvalidOperationException("No Attachment: " + chunks[ci]);
                    }
                }
                rightFrontier.InsertRange(0, builtNodes);
                builtNodes.Clear();
            }
        }