Ejemplo n.º 1
0
        internal static void TestModel(IMaxentModel model, Event ev, double higherProbability)
        {
            var outcomes = model.Eval(ev.Context);
            var outcome  = model.GetBestOutcome(outcomes);

            Assert.AreEqual(2, outcomes.Length);
            Assert.AreEqual(ev.Outcome, outcome);

            if (ev.Outcome.Equals(model.GetOutcome(0)))
            {
                Assert.AreEqual(higherProbability, outcomes[0], 0.0001);
            }

            if (!ev.Outcome.Equals(model.GetOutcome(0)))
            {
                Assert.AreEqual(1.0 - higherProbability, outcomes[0], 0.0001);
            }

            if (ev.Outcome.Equals(model.GetOutcome(1)))
            {
                Assert.AreEqual(higherProbability, outcomes[1], 0.0001);
            }

            if (!ev.Outcome.Equals(model.GetOutcome(1)))
            {
                Assert.AreEqual(1.0 - higherProbability, outcomes[1], 0.0001);
            }
        }
Ejemplo n.º 2
0
 private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker,
                AbstractHeadRules headRules, int beamSize, double advancePercentage) :
     base(tagger, chunker, headRules, beamSize, advancePercentage)
 {
     this.buildModel       = buildModel;
     this.checkModel       = checkModel;
     bProbs                = new double[buildModel.GetNumOutcomes()];
     cProbs                = new double[checkModel.GetNumOutcomes()];
     buildContextGenerator = new BuildContextGenerator();
     checkContextGenerator = new CheckContextGenerator();
     startTypeMap          = new Dictionary <string, string>();
     contTypeMap           = new Dictionary <string, string>();
     for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++)
     {
         var outcome = buildModel.GetOutcome(boi);
         if (outcome.StartsWith(START))
         {
             startTypeMap[outcome] = outcome.Substring(START.Length);
         }
         else if (outcome.StartsWith(CONT))
         {
             contTypeMap[outcome] = outcome.Substring(CONT.Length);
         }
     }
     topStartIndex   = buildModel.GetIndex(TOP_START);
     completeIndex   = checkModel.GetIndex(COMPLETE);
     incompleteIndex = checkModel.GetIndex(INCOMPLETE);
 }
Ejemplo n.º 3
0
        internal static void TestModel(IMaxentModel model, double expecedAccuracy) {
            var devEvents = readPpaFile("devset");

            var total = 0;
            var correct = 0;
            foreach (var ev in devEvents) {
                //String targetLabel = ev.getOutcome();
                var ocs = model.Eval(ev.Context);

                var best = 0;
                for (var i = 1; i < ocs.Length; i++)
                    if (ocs[i] > ocs[best])
                        best = i;

                var predictedLabel = model.GetOutcome(best);

                if (ev.Outcome.Equals(predictedLabel))
                    correct++;
                total++;
            }

            var accuracy = correct/(double) total;

            Console.Out.WriteLine("Accuracy on PPA devSet: (" + correct + "/" + total + ") " + accuracy);

            Assert.AreEqual(expecedAccuracy, accuracy, .00001);
        }
Ejemplo n.º 4
0
        private bool IsModelValid(IMaxentModel model)
        {
            var outcomes = new string[model.GetNumOutcomes()];

            for (var i = 0; i < model.GetNumOutcomes(); i++)
            {
                outcomes[i] = model.GetOutcome(i);
            }

            return(Factory.CreateSequenceCodec().AreOutcomesCompatible(outcomes));
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Checks if the expected outcomes are all contained as outcomes in the given model.
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="expectedOutcomes">The expected outcomes.</param>
        /// <returns><c>true</c> if all expected outcomes are the only outcomes of the model;<c>false</c> otherwise.</returns>
        public static bool ValidateOutcomes(IMaxentModel model, params string[] expectedOutcomes) {
            if (model.GetNumOutcomes() == expectedOutcomes.Length) {
                var count = model.GetNumOutcomes();
                for (int i = 0; i < count; i++) {
                    if (!expectedOutcomes.Contains(model.GetOutcome(i))) {
                        return false;
                    }
                }
            } else {
                return false;
            }

            return true;
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Checks if the expected outcomes are all contained as outcomes in the given model.
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="expectedOutcomes">The expected outcomes.</param>
        /// <returns><c>true</c> if all expected outcomes are the only outcomes of the model;<c>false</c> otherwise.</returns>
        public static bool ValidateOutcomes(IMaxentModel model, params string[] expectedOutcomes)
        {
            if (model.GetNumOutcomes() == expectedOutcomes.Length)
            {
                var count = model.GetNumOutcomes();
                for (int i = 0; i < count; i++)
                {
                    if (!expectedOutcomes.Contains(model.GetOutcome(i)))
                    {
                        return(false);
                    }
                }
            }
            else
            {
                return(false);
            }

            return(true);
        }
Ejemplo n.º 7
0
        private static void TestModel(IMaxentModel model, double expectedAccuracy)
        {
            var devEvents = PrepAttachDataUtility.ReadPpaFile(@"devset");

            var total   = 0;
            var correct = 0;

            foreach (var ev in devEvents)
            {
                var targetLabel = ev.Outcome;
                var ocs         = model.Eval(ev.Context);

                var best = 0;
                for (var i = 1; i < ocs.Length; i++)
                {
                    if (ocs[i] > ocs[best])
                    {
                        best = i;
                    }
                }

                var predictedLabel = model.GetOutcome(best);

                if (targetLabel.Equals(predictedLabel))
                {
                    correct++;
                }

                total++;
            }

            var accuracy = correct / (double)total;

            Debug.WriteLine("Accuracy on PPA devset: (" + correct + "/" + total + ") " + accuracy);

            Assert.AreEqual(expectedAccuracy, accuracy, .00001);
        }
Ejemplo n.º 8
0
        internal static void TestModel(IMaxentModel model, double expecedAccuracy)
        {
            var devEvents = readPpaFile("devset");

            var total   = 0;
            var correct = 0;

            foreach (var ev in devEvents)
            {
                //String targetLabel = ev.getOutcome();
                var ocs = model.Eval(ev.Context);

                var best = 0;
                for (var i = 1; i < ocs.Length; i++)
                {
                    if (ocs[i] > ocs[best])
                    {
                        best = i;
                    }
                }

                var predictedLabel = model.GetOutcome(best);

                if (ev.Outcome.Equals(predictedLabel))
                {
                    correct++;
                }
                total++;
            }

            var accuracy = correct / (double)total;

            Console.Out.WriteLine("Accuracy on PPA devSet: (" + correct + "/" + total + ") " + accuracy);

            Assert.AreEqual(expecedAccuracy, accuracy, .00001);
        }
Ejemplo n.º 9
0
        protected override Parse[] AdvanceParses(Parse p, double probMass)
        {
            var q = 1 - probMass;
            /** The index of the node which will be labeled in this iteration of advancing the parse. */
            int advanceNodeIndex;
            /** The node which will be labeled in this iteration of advancing the parse. */
            Parse advanceNode      = null;
            var   originalChildren = p.Children;
            var   children         = CollapsePunctuation(originalChildren, punctSet);
            var   numNodes         = children.Length;

            if (numNodes == 0)
            {
                return(null);
            }
            if (numNodes == 1)
            {
                //put sentence initial and final punct in top node
                if (children[0].IsPosTag)
                {
                    return(null);
                }
                p.ExpandTopNode(children[0]);
                return(new[] { p });
            }
            //determines which node needs to advanced.
            for (advanceNodeIndex = 0; advanceNodeIndex < numNodes; advanceNodeIndex++)
            {
                advanceNode = children[advanceNodeIndex];
                if (!IsBuilt(advanceNode))
                {
                    break;
                }
            }

            if (advanceNode == null)
            {
                throw new InvalidOperationException("advanceNode is null.");
            }

            var originalZeroIndex    = MapParseIndex(0, children, originalChildren);
            var originalAdvanceIndex = MapParseIndex(advanceNodeIndex, children, originalChildren);
            var newParsesList        = new List <Parse>();

            //call build model
            buildModel.Eval(buildContextGenerator.GetContext(children, advanceNodeIndex), bProbs);
            var doneProb = bProbs[doneIndex];

            Debug("adi=" + advanceNodeIndex + " " + advanceNode.Type + "." + advanceNode.Label + " " + advanceNode + " choose build=" + (1 - doneProb) + " attach=" + doneProb);

            if (1 - doneProb > q)
            {
                double bprobSum = 0;
                while (bprobSum < probMass)
                {
                    /** The largest un advanced labeling. */
                    var max = 0;
                    for (var pi = 1; pi < bProbs.Length; pi++)
                    {
                        //for each build outcome
                        if (bProbs[pi] > bProbs[max])
                        {
                            max = pi;
                        }
                    }
                    if (bProbs[max].Equals(0d))
                    {
                        break;
                    }
                    var bprob = bProbs[max];
                    bProbs[max] = 0; //zero out so new max can be found
                    bprobSum   += bprob;
                    var tag = buildModel.GetOutcome(max);
                    if (!tag.Equals(DONE))
                    {
                        var newParse1 = (Parse)p.Clone();
                        var newNode   = new Parse(p.Text, advanceNode.Span, tag, bprob, advanceNode.Head);
                        newParse1.Insert(newNode);
                        newParse1.AddProbability(Math.Log(bprob));
                        newParsesList.Add(newParse1);
                        if (checkComplete)
                        {
                            cProbs =
                                checkModel.Eval(checkContextGenerator.GetContext(newNode, children, advanceNodeIndex,
                                                                                 false));

                            Debug("building " + tag + " " + bprob + " c=" + cProbs[completeIndex]);

                            if (cProbs[completeIndex] > probMass)
                            {
                                //just complete advances
                                SetComplete(newNode);
                                newParse1.AddProbability(Math.Log(cProbs[completeIndex]));

                                Debug("Only advancing complete node");
                            }
                            else if (1 - cProbs[completeIndex] > probMass)
                            {
                                //just incomplete advances
                                SetIncomplete(newNode);
                                newParse1.AddProbability(Math.Log(1 - cProbs[completeIndex]));
                                Debug("Only advancing incomplete node");
                            }
                            else
                            {
                                //both complete and incomplete advance
                                Debug("Advancing both complete and incomplete nodes");
                                SetComplete(newNode);
                                newParse1.AddProbability(Math.Log(cProbs[completeIndex]));

                                var newParse2 = (Parse)p.Clone();
                                var newNode2  = new Parse(p.Text, advanceNode.Span, tag, bprob, advanceNode.Head);
                                newParse2.Insert(newNode2);
                                newParse2.AddProbability(Math.Log(bprob));
                                newParsesList.Add(newParse2);
                                newParse2.AddProbability(Math.Log(1 - cProbs[completeIndex]));
                                SetIncomplete(newNode2); //set incomplete for non-clone
                            }
                        }
                        else
                        {
                            Debug("building " + tag + " " + bprob);
                        }
                    }
                }
            }
            //advance attaches
            if (doneProb > q)
            {
                var newParse1 = (Parse)p.Clone();  //clone parse
                //mark nodes as built
                if (checkComplete)
                {
                    if (IsComplete(advanceNode))
                    {
                        newParse1.SetChild(originalAdvanceIndex, BUILT + "." + COMPLETE);
                        //replace constituent being labeled to create new derivation
                    }
                    else
                    {
                        newParse1.SetChild(originalAdvanceIndex, BUILT + "." + INCOMPLETE);
                        //replace constituent being labeled to create new derivation
                    }
                }
                else
                {
                    newParse1.SetChild(originalAdvanceIndex, BUILT);
                    //replace constituent being labeled to create new derivation
                }
                newParse1.AddProbability(Math.Log(doneProb));
                if (advanceNodeIndex == 0)
                {
                    //no attach if first node.
                    newParsesList.Add(newParse1);
                }
                else
                {
                    var rf = GetRightFrontier(p, punctSet);
                    for (int fi = 0, fs = rf.Count; fi < fs; fi++)
                    {
                        var fn = rf[fi];
                        attachModel.Eval(attachContextGenerator.GetContext(children, advanceNodeIndex, rf, fi), aProbs);
                        if (debugOn)
                        {
                            //List cs = java.util.Arrays.asList(attachContextGenerator.getContext(children, advanceNodeIndex,rf,fi,punctSet));
                            Debug("Frontier node(" + fi + "): " + fn.Type + "." + fn.Label + " " + fn + " <- " +
                                  advanceNode.Type + " " + advanceNode + " d=" + aProbs[daughterAttachIndex] + " s=" +
                                  aProbs[sisterAttachIndex] + " ");
                        }
                        foreach (int attachment in attachments)
                        {
                            var prob = aProbs[attachment];
                            //should we try an attach if p > threshold and
                            // if !checkComplete then prevent daughter attaching to chunk
                            // if checkComplete then prevent daughter attaching to complete node or
                            //    sister attaching to an incomplete node
                            if (prob > q && (
                                    (!checkComplete && (attachment != daughterAttachIndex || !IsComplete(fn)))
                                    ||
                                    (checkComplete &&
                                     ((attachment == daughterAttachIndex && !IsComplete(fn)) ||
                                      (attachment == sisterAttachIndex && IsComplete(fn))))))
                            {
                                var newParse2 = newParse1.CloneRoot(fn, originalZeroIndex);
                                var newKids   = CollapsePunctuation(newParse2.Children, punctSet);
                                //remove node from top level since were going to attach it (including punct)
                                for (var ri = originalZeroIndex + 1; ri <= originalAdvanceIndex; ri++)
                                {
                                    //System.out.println(at"-removing "+(originalZeroIndex+1)+" "+newParse2.getChildren()[originalZeroIndex+1]);
                                    newParse2.Remove(originalZeroIndex + 1);
                                }
                                var   crf = GetRightFrontier(newParse2, punctSet);
                                Parse updatedNode;
                                if (attachment == daughterAttachIndex)
                                {
                                    //attach daughter
                                    updatedNode = crf[fi];
                                    updatedNode.Add(advanceNode, headRules);
                                }
                                else
                                {
                                    //attach sister
                                    Parse psite;
                                    if (fi + 1 < crf.Count)
                                    {
                                        psite       = crf[fi + 1];
                                        updatedNode = psite.AdJoin(advanceNode, headRules);
                                    }
                                    else
                                    {
                                        psite       = newParse2;
                                        updatedNode = psite.AdJoinRoot(advanceNode, headRules, originalZeroIndex);
                                        newKids[0]  = updatedNode;
                                    }
                                }
                                //update spans affected by attachment
                                for (var ni = fi + 1; ni < crf.Count; ni++)
                                {
                                    var node = crf[ni];
                                    node.UpdateSpan();
                                }
                                //if (debugOn) {System.out.print(ai+"-result: ");newParse2.show();System.out.println();}
                                newParse2.AddProbability(Math.Log(prob));
                                newParsesList.Add(newParse2);
                                if (checkComplete)
                                {
                                    cProbs =
                                        checkModel.Eval(checkContextGenerator.GetContext(updatedNode, newKids,
                                                                                         advanceNodeIndex, true));
                                    if (cProbs[completeIndex] > probMass)
                                    {
                                        SetComplete(updatedNode);
                                        newParse2.AddProbability(Math.Log(cProbs[completeIndex]));

                                        Debug("Only advancing complete node");
                                    }
                                    else if (1 - cProbs[completeIndex] > probMass)
                                    {
                                        SetIncomplete(updatedNode);
                                        newParse2.AddProbability(Math.Log(1 - cProbs[completeIndex]));
                                        Debug("Only advancing incomplete node");
                                    }
                                    else
                                    {
                                        SetComplete(updatedNode);
                                        var newParse3 = newParse2.CloneRoot(updatedNode, originalZeroIndex);
                                        newParse3.AddProbability(Math.Log(cProbs[completeIndex]));
                                        newParsesList.Add(newParse3);
                                        SetIncomplete(updatedNode);
                                        newParse2.AddProbability(Math.Log(1 - cProbs[completeIndex]));
                                        Debug("Advancing both complete and incomplete nodes; c=" + cProbs[completeIndex]);
                                    }
                                }
                            }
                            else
                            {
                                Debug("Skipping " + fn.Type + "." + fn.Label + " " + fn + " daughter=" +
                                      (attachment == daughterAttachIndex) + " complete=" + IsComplete(fn) +
                                      " prob=" + prob);
                            }
                        }
                        if (checkComplete && !IsComplete(fn))
                        {
                            Debug("Stopping at incomplete node(" + fi + "): " + fn.Type + "." + fn.Label + " " + fn);
                            break;
                        }
                    }
                }
            }
            return(newParsesList.ToArray());
        }
Ejemplo n.º 10
0
 private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker,
     AbstractHeadRules headRules, int beamSize, double advancePercentage) :
         base(tagger, chunker, headRules, beamSize, advancePercentage) {
     this.buildModel = buildModel;
     this.checkModel = checkModel;
     bProbs = new double[buildModel.GetNumOutcomes()];
     cProbs = new double[checkModel.GetNumOutcomes()];
     buildContextGenerator = new BuildContextGenerator();
     checkContextGenerator = new CheckContextGenerator();
     startTypeMap = new Dictionary<string, string>();
     contTypeMap = new Dictionary<string, string>();
     for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++) {
         var outcome = buildModel.GetOutcome(boi);
         if (outcome.StartsWith(START)) {
             startTypeMap[outcome] = outcome.Substring(START.Length);
         } else if (outcome.StartsWith(CONT)) {
             contTypeMap[outcome] = outcome.Substring(CONT.Length);
         }
     }
     topStartIndex = buildModel.GetIndex(TOP_START);
     completeIndex = checkModel.GetIndex(COMPLETE);
     incompleteIndex = checkModel.GetIndex(INCOMPLETE);
 }
Ejemplo n.º 11
0
        /// <summary>
        /// Finds the n most probable sequences.
        /// </summary>
        /// <param name="numSequences">The number sequences.</param>
        /// <param name="sequence">The sequence.</param>
        /// <param name="additionalContext">The additional context.</param>
        /// <param name="minSequenceScore">The minimum sequence score.</param>
        /// <param name="beamSearch">The beam search.</param>
        /// <param name="validator">The validator.</param>
        public Sequence[] BestSequences(int numSequences, T[] sequence, object[] additionalContext,
                                        double minSequenceScore,
                                        IBeamSearchContextGenerator <T> beamSearch, ISequenceValidator <T> validator)
        {
            IHeap <Sequence> prev = new ListHeap <Sequence>(size);
            IHeap <Sequence> next = new ListHeap <Sequence>(size);

            prev.Add(new Sequence());

            if (additionalContext == null)
            {
                additionalContext = new object[] {}; // EMPTY_ADDITIONAL_CONTEXT
            }

            for (var i = 0; i < sequence.Length; i++)
            {
                var sz = Math.Min(size, prev.Size());

                for (var sc = 0; prev.Size() > 0 && sc < sz; sc++)
                {
                    var top = prev.Extract();

                    var      tmpOutcomes = top.Outcomes;
                    var      outcomes    = tmpOutcomes.ToArray();
                    var      contexts    = beamSearch.GetContext(i, sequence, outcomes, additionalContext);
                    double[] scores;
                    if (contextsCache != null)
                    {
                        scores = (double[])contextsCache.Get(contexts);
                        if (scores == null)
                        {
                            scores = model.Eval(contexts, probs);
                            contextsCache.Put(contexts, scores);
                        }
                    }
                    else
                    {
                        scores = model.Eval(contexts, probs);
                    }

                    var tempScores = new double[scores.Length];
                    for (var c = 0; c < scores.Length; c++)
                    {
                        tempScores[c] = scores[c];
                    }

                    Array.Sort(tempScores);

                    var min = tempScores[Math.Max(0, scores.Length - size)];

                    for (var p = 0; p < scores.Length; p++)
                    {
                        if (scores[p] < min)
                        {
                            continue; //only advance first "size" outcomes
                        }
                        var outcome = model.GetOutcome(p);
                        if (validator.ValidSequence(i, sequence, outcomes, outcome))
                        {
                            var ns = new Sequence(top, outcome, scores[p]);
                            if (ns.Score > minSequenceScore)
                            {
                                next.Add(ns);
                            }
                        }
                    }

                    if (next.Size() == 0)
                    {
                        //if no advanced sequences, advance all valid
                        for (var p = 0; p < scores.Length; p++)
                        {
                            var outcome = model.GetOutcome(p);
                            if (validator.ValidSequence(i, sequence, outcomes, outcome))
                            {
                                var ns = new Sequence(top, outcome, scores[p]);
                                if (ns.Score > minSequenceScore)
                                {
                                    next.Add(ns);
                                }
                            }
                        }
                    }
                }

                // make prev = next; and re-init next (we reuse existing prev set once we clear it)
                prev.Clear();

                var tmp = prev;
                prev = next;
                next = tmp;
            }

            var numSeq       = Math.Min(numSequences, prev.Size());
            var topSequences = new Sequence[numSeq];

            for (var seqIndex = 0; seqIndex < numSeq; seqIndex++)
            {
                topSequences[seqIndex] = prev.Extract();
            }

            return(topSequences);
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Advances the specified parse and returns the an array advanced parses whose probability accounts for
        /// more than the specified amount of probability mass.
        /// </summary>
        /// <param name="p">The parse to advance.</param>
        /// <param name="probMass">The amount of probability mass that should be accounted for by the advanced parses.</param>
        protected override Parse[] AdvanceParses(Parse p, double probMass)
        {
            var q = 1 - probMass;
            /** The closest previous node which has been labeled as a start node. */
            Parse lastStartNode = null;
            /** The index of the closest previous node which has been labeled as a start node. */
            var lastStartIndex = -1;
            /** The type of the closest previous node which has been labeled as a start node. */
            string lastStartType = null;
            /** The index of the node which will be labeled in this iteration of advancing the parse. */
            int advanceNodeIndex;
            /** The node which will be labeled in this iteration of advancing the parse. */
            Parse advanceNode      = null;
            var   originalChildren = p.Children;
            var   children         = CollapsePunctuation(originalChildren, punctSet);
            var   numNodes         = children.Length;

            if (numNodes == 0)
            {
                return(null);
            }
            //determines which node needs to be labeled and prior labels.
            for (advanceNodeIndex = 0; advanceNodeIndex < numNodes; advanceNodeIndex++)
            {
                advanceNode = children[advanceNodeIndex];
                if (advanceNode.Label == null)
                {
                    break;
                }
                if (startTypeMap.ContainsKey(advanceNode.Label))
                {
                    lastStartType  = startTypeMap[advanceNode.Label];
                    lastStartNode  = advanceNode;
                    lastStartIndex = advanceNodeIndex;
                    //System.err.println("lastStart "+i+" "+lastStart.label+" "+lastStart.prob);
                }
            }
            var originalAdvanceIndex = MapParseIndex(advanceNodeIndex, children, originalChildren);
            var newParsesList        = new List <Parse>(buildModel.GetNumOutcomes());

            //call build
            buildModel.Eval(buildContextGenerator.GetContext(children, advanceNodeIndex), bProbs);
            var bProbSum = 0d;

            while (bProbSum < probMass)
            {
                // The largest un-advanced labeling.
                var max = 0;
                for (var pi = 1; pi < bProbs.Length; pi++)
                {
                    //for each build outcome
                    if (bProbs[pi] > bProbs[max])
                    {
                        max = pi;
                    }
                }
                if (bProbs[max].Equals(0d))
                {
                    break;
                }
                var bProb = bProbs[max];
                bProbs[max] = 0; //zero out so new max can be found
                bProbSum   += bProb;
                var tag = buildModel.GetOutcome(max);
                //System.out.println("trying "+tag+" "+bprobSum+" lst="+lst);
                if (max == topStartIndex)
                {
                    // can't have top until complete
                    continue;
                }
                //System.err.println(i+" "+tag+" "+bprob);
                if (startTypeMap.ContainsKey(tag))
                {
                    //update last start
                    lastStartIndex = advanceNodeIndex;
                    lastStartNode  = advanceNode;
                    lastStartType  = startTypeMap[tag];
                }
                else if (contTypeMap.ContainsKey(tag))
                {
                    if (lastStartNode == null || !lastStartType.Equals(contTypeMap[tag]))
                    {
                        continue; //Cont must match previous start or continue
                    }
                }
                var newParse1 = (Parse)p.Clone();  //clone parse

                if (createDerivationString)
                {
                    newParse1.Derivation.Append(max).Append("-");
                }

                newParse1.SetChild(originalAdvanceIndex, tag); //replace constituent being labeled to create new derivation
                newParse1.AddProbability(Math.Log(bProb));

                //check
                //String[] context = checkContextGenerator.getContext(newParse1.getChildren(), lastStartType, lastStartIndex, advanceNodeIndex);
                checkModel.Eval(
                    checkContextGenerator.GetContext(
                        CollapsePunctuation(newParse1.Children, punctSet),
                        lastStartType,
                        lastStartIndex,
                        advanceNodeIndex),
                    cProbs);

                //System.out.println("check "+lastStartType+" "+cprobs[completeIndex]+" "+cprobs[incompleteIndex]+" "+tag+" "+java.util.Arrays.asList(context));

                if (cProbs[completeIndex] > q)
                {
                    //make sure a reduce is likely
                    var newParse2 = (Parse)newParse1.Clone();

                    if (createDerivationString)
                    {
                        newParse2.Derivation.Append(1).Append(".");
                    }

                    newParse2.AddProbability(Math.Log(cProbs[completeIndex]));
                    var cons = new Parse[advanceNodeIndex - lastStartIndex + 1];
                    var flat = true;

                    if (lastStartNode == null)
                    {
                        throw new InvalidOperationException("lastStartNode is null.");
                    }

                    //first
                    cons[0] = lastStartNode;
                    flat   &= cons[0].IsPosTag;
                    //last
                    cons[advanceNodeIndex - lastStartIndex] = advanceNode;
                    flat &= cons[advanceNodeIndex - lastStartIndex].IsPosTag;
                    //middle
                    for (var ci = 1; ci < advanceNodeIndex - lastStartIndex; ci++)
                    {
                        cons[ci] = children[ci + lastStartIndex];
                        flat    &= cons[ci].IsPosTag;
                    }
                    if (!flat)
                    {
                        //flat chunks are done by chunker
                        if (lastStartIndex == 0 && advanceNodeIndex == numNodes - 1)
                        {
                            //check for top node to include end and beginning punctuation
                            //System.err.println("ParserME.advanceParses: reducing entire span: "+new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd())+" "+lastStartType+" "+java.util.Arrays.asList(children));
                            newParse2.Insert(new Parse(p.Text, p.Span, lastStartType, cProbs[1],
                                                       headRules.GetHead(cons, lastStartType)));
                        }
                        else
                        {
                            newParse2.Insert(new Parse(p.Text, new Span(lastStartNode.Span.Start, advanceNode.Span.End),
                                                       lastStartType, cProbs[1], headRules.GetHead(cons, lastStartType)));
                        }
                        newParsesList.Add(newParse2);
                    }
                }
                if (cProbs[incompleteIndex] > q)
                {
                    //make sure a shift is likely
                    if (createDerivationString)
                    {
                        newParse1.Derivation.Append(0).Append(".");
                    }

                    if (advanceNodeIndex != numNodes - 1)
                    {
                        //can't shift last element
                        newParse1.AddProbability(Math.Log(cProbs[incompleteIndex]));
                        newParsesList.Add(newParse1);
                    }
                }
            }
            return(newParsesList.ToArray());
        }
Ejemplo n.º 13
0
        private bool IsModelValid(IMaxentModel model) {
            var outcomes = new string[model.GetNumOutcomes()];

            for (var i = 0; i < model.GetNumOutcomes(); i++) {
                outcomes[i] = model.GetOutcome(i);
            }

            return Factory.CreateSequenceCodec().AreOutcomesCompatible(outcomes);
        }