Esempio n. 1
0
        /// <summary>
        /// Adds events for parsing (post tagging and chunking to the specified list of events for the specified parse chunks.
        /// </summary>
        /// <param name="parseEvents">The events for the specified chunks.</param>
        /// <param name="chunks">The incomplete parses to be parsed.</param>
        protected override void AddParseEvents(List <Event> parseEvents, Parse[] chunks)
        {
            var ci = 0;

            while (ci < chunks.Length)
            {
                //System.err.println("parserEventStream.addParseEvents: chunks="+Arrays.asList(chunks));
                var c      = chunks[ci];
                var parent = c.Parent;
                if (parent != null)
                {
                    var    type = parent.Type;
                    String outcome;
                    if (FirstChild(c, parent))
                    {
                        outcome = AbstractBottomUpParser.START + type;
                    }
                    else
                    {
                        outcome = AbstractBottomUpParser.CONT + type;
                    }
                    //System.err.println("parserEventStream.addParseEvents: chunks["+ci+"]="+c+" label="+outcome+" bcg="+bcg);
                    c.Label = outcome;
                    if (Type == ParserEventTypeEnum.Build)
                    {
                        parseEvents.Add(new Event(outcome, bcg.GetContext(chunks, ci)));
                    }
                    var start = ci - 1;
                    while (start >= 0 && chunks[start].Parent.Equals(parent))
                    {
                        start--;
                    }
                    if (LastChild(c, parent))
                    {
                        if (Type == ParserEventTypeEnum.Check)
                        {
                            parseEvents.Add(new Event(AbstractBottomUpParser.COMPLETE,
                                                      kcg.GetContext(chunks, type, start + 1, ci)));
                        }
                        //perform reduce
                        var reduceStart = ci;
                        while (reduceStart >= 0 && chunks[reduceStart].Equals(parent))
                        {
                            reduceStart--;
                        }
                        reduceStart++;
                        chunks = ReduceChunks(chunks, ref ci, parent);
                        ci     = reduceStart - 1; //ci will be incremented at end of loop
                    }
                    else
                    {
                        if (Type == ParserEventTypeEnum.Check)
                        {
                            parseEvents.Add(new Event(AbstractBottomUpParser.INCOMPLETE,
                                                      kcg.GetContext(chunks, type, start + 1, ci)));
                        }
                    }
                }
                ci++;
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Advances the specified parse and returns the an array advanced parses whose probability accounts for
        /// more than the specified amount of probability mass.
        /// </summary>
        /// <param name="p">The parse to advance.</param>
        /// <param name="probMass">The amount of probability mass that should be accounted for by the advanced parses.</param>
        protected override Parse[] AdvanceParses(Parse p, double probMass)
        {
            var q = 1 - probMass;
            /** The closest previous node which has been labeled as a start node. */
            Parse lastStartNode = null;
            /** The index of the closest previous node which has been labeled as a start node. */
            var lastStartIndex = -1;
            /** The type of the closest previous node which has been labeled as a start node. */
            string lastStartType = null;
            /** The index of the node which will be labeled in this iteration of advancing the parse. */
            int advanceNodeIndex;
            /** The node which will be labeled in this iteration of advancing the parse. */
            Parse advanceNode      = null;
            var   originalChildren = p.Children;
            var   children         = CollapsePunctuation(originalChildren, punctSet);
            var   numNodes         = children.Length;

            if (numNodes == 0)
            {
                return(null);
            }
            //determines which node needs to be labeled and prior labels.
            for (advanceNodeIndex = 0; advanceNodeIndex < numNodes; advanceNodeIndex++)
            {
                advanceNode = children[advanceNodeIndex];
                if (advanceNode.Label == null)
                {
                    break;
                }
                if (startTypeMap.ContainsKey(advanceNode.Label))
                {
                    lastStartType  = startTypeMap[advanceNode.Label];
                    lastStartNode  = advanceNode;
                    lastStartIndex = advanceNodeIndex;
                    //System.err.println("lastStart "+i+" "+lastStart.label+" "+lastStart.prob);
                }
            }
            var originalAdvanceIndex = MapParseIndex(advanceNodeIndex, children, originalChildren);
            var newParsesList        = new List <Parse>(buildModel.GetNumOutcomes());

            //call build
            buildModel.Eval(buildContextGenerator.GetContext(children, advanceNodeIndex), bProbs);
            var bProbSum = 0d;

            while (bProbSum < probMass)
            {
                // The largest un-advanced labeling.
                var max = 0;
                for (var pi = 1; pi < bProbs.Length; pi++)
                {
                    //for each build outcome
                    if (bProbs[pi] > bProbs[max])
                    {
                        max = pi;
                    }
                }
                if (bProbs[max].Equals(0d))
                {
                    break;
                }
                var bProb = bProbs[max];
                bProbs[max] = 0; //zero out so new max can be found
                bProbSum   += bProb;
                var tag = buildModel.GetOutcome(max);
                //System.out.println("trying "+tag+" "+bprobSum+" lst="+lst);
                if (max == topStartIndex)
                {
                    // can't have top until complete
                    continue;
                }
                //System.err.println(i+" "+tag+" "+bprob);
                if (startTypeMap.ContainsKey(tag))
                {
                    //update last start
                    lastStartIndex = advanceNodeIndex;
                    lastStartNode  = advanceNode;
                    lastStartType  = startTypeMap[tag];
                }
                else if (contTypeMap.ContainsKey(tag))
                {
                    if (lastStartNode == null || !lastStartType.Equals(contTypeMap[tag]))
                    {
                        continue; //Cont must match previous start or continue
                    }
                }
                var newParse1 = (Parse)p.Clone();  //clone parse

                if (createDerivationString)
                {
                    newParse1.Derivation.Append(max).Append("-");
                }

                newParse1.SetChild(originalAdvanceIndex, tag); //replace constituent being labeled to create new derivation
                newParse1.AddProbability(Math.Log(bProb));

                //check
                //String[] context = checkContextGenerator.getContext(newParse1.getChildren(), lastStartType, lastStartIndex, advanceNodeIndex);
                checkModel.Eval(
                    checkContextGenerator.GetContext(
                        CollapsePunctuation(newParse1.Children, punctSet),
                        lastStartType,
                        lastStartIndex,
                        advanceNodeIndex),
                    cProbs);

                //System.out.println("check "+lastStartType+" "+cprobs[completeIndex]+" "+cprobs[incompleteIndex]+" "+tag+" "+java.util.Arrays.asList(context));

                if (cProbs[completeIndex] > q)
                {
                    //make sure a reduce is likely
                    var newParse2 = (Parse)newParse1.Clone();

                    if (createDerivationString)
                    {
                        newParse2.Derivation.Append(1).Append(".");
                    }

                    newParse2.AddProbability(Math.Log(cProbs[completeIndex]));
                    var cons = new Parse[advanceNodeIndex - lastStartIndex + 1];
                    var flat = true;

                    if (lastStartNode == null)
                    {
                        throw new InvalidOperationException("lastStartNode is null.");
                    }

                    //first
                    cons[0] = lastStartNode;
                    flat   &= cons[0].IsPosTag;
                    //last
                    cons[advanceNodeIndex - lastStartIndex] = advanceNode;
                    flat &= cons[advanceNodeIndex - lastStartIndex].IsPosTag;
                    //middle
                    for (var ci = 1; ci < advanceNodeIndex - lastStartIndex; ci++)
                    {
                        cons[ci] = children[ci + lastStartIndex];
                        flat    &= cons[ci].IsPosTag;
                    }
                    if (!flat)
                    {
                        //flat chunks are done by chunker
                        if (lastStartIndex == 0 && advanceNodeIndex == numNodes - 1)
                        {
                            //check for top node to include end and beginning punctuation
                            //System.err.println("ParserME.advanceParses: reducing entire span: "+new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd())+" "+lastStartType+" "+java.util.Arrays.asList(children));
                            newParse2.Insert(new Parse(p.Text, p.Span, lastStartType, cProbs[1],
                                                       headRules.GetHead(cons, lastStartType)));
                        }
                        else
                        {
                            newParse2.Insert(new Parse(p.Text, new Span(lastStartNode.Span.Start, advanceNode.Span.End),
                                                       lastStartType, cProbs[1], headRules.GetHead(cons, lastStartType)));
                        }
                        newParsesList.Add(newParse2);
                    }
                }
                if (cProbs[incompleteIndex] > q)
                {
                    //make sure a shift is likely
                    if (createDerivationString)
                    {
                        newParse1.Derivation.Append(0).Append(".");
                    }

                    if (advanceNodeIndex != numNodes - 1)
                    {
                        //can't shift last element
                        newParse1.AddProbability(Math.Log(cProbs[incompleteIndex]));
                        newParsesList.Add(newParse1);
                    }
                }
            }
            return(newParsesList.ToArray());
        }