Ejemplo n.º 1
0
 private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker,
                AbstractHeadRules headRules, int beamSize, double advancePercentage) :
     base(tagger, chunker, headRules, beamSize, advancePercentage)
 {
     this.buildModel       = buildModel;
     this.checkModel       = checkModel;
     bProbs                = new double[buildModel.GetNumOutcomes()];
     cProbs                = new double[checkModel.GetNumOutcomes()];
     buildContextGenerator = new BuildContextGenerator();
     checkContextGenerator = new CheckContextGenerator();
     startTypeMap          = new Dictionary <string, string>();
     contTypeMap           = new Dictionary <string, string>();
     for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++)
     {
         var outcome = buildModel.GetOutcome(boi);
         if (outcome.StartsWith(START))
         {
             startTypeMap[outcome] = outcome.Substring(START.Length);
         }
         else if (outcome.StartsWith(CONT))
         {
             contTypeMap[outcome] = outcome.Substring(CONT.Length);
         }
     }
     topStartIndex   = buildModel.GetIndex(TOP_START);
     completeIndex   = checkModel.GetIndex(COMPLETE);
     incompleteIndex = checkModel.GetIndex(INCOMPLETE);
 }
Ejemplo n.º 2
0
        private bool IsModelValid(IMaxentModel model)
        {
            var outcomes = new string[model.GetNumOutcomes()];

            for (var i = 0; i < model.GetNumOutcomes(); i++)
            {
                outcomes[i] = model.GetOutcome(i);
            }

            return(Factory.CreateSequenceCodec().AreOutcomesCompatible(outcomes));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Checks if the expected outcomes are all contained as outcomes in the given model.
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="expectedOutcomes">The expected outcomes.</param>
        /// <returns><c>true</c> if all expected outcomes are the only outcomes of the model;<c>false</c> otherwise.</returns>
        public static bool ValidateOutcomes(IMaxentModel model, params string[] expectedOutcomes) {
            if (model.GetNumOutcomes() == expectedOutcomes.Length) {
                var count = model.GetNumOutcomes();
                for (int i = 0; i < count; i++) {
                    if (!expectedOutcomes.Contains(model.GetOutcome(i))) {
                        return false;
                    }
                }
            } else {
                return false;
            }

            return true;
        }
Ejemplo n.º 4
0
        private Parser(
            IMaxentModel buildModel,
            IMaxentModel attachModel,
            IMaxentModel checkModel,
            IPOSTagger tagger,
            IChunker chunker,
            AbstractHeadRules headRules,
            int beamSize,
            double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage)
        {
            this.buildModel  = buildModel;
            this.attachModel = attachModel;
            this.checkModel  = checkModel;

            buildContextGenerator  = new BuildContextGenerator();
            attachContextGenerator = new AttachContextGenerator(punctSet);
            checkContextGenerator  = new CheckContextGenerator(punctSet);

            bProbs = new double[buildModel.GetNumOutcomes()];
            aProbs = new double[attachModel.GetNumOutcomes()];
            cProbs = new double[checkModel.GetNumOutcomes()];

            doneIndex           = buildModel.GetIndex(DONE);
            sisterAttachIndex   = attachModel.GetIndex(ATTACH_SISTER);
            daughterAttachIndex = attachModel.GetIndex(ATTACH_DAUGHTER);
            // nonAttachIndex = attachModel.GetIndex(NON_ATTACH);
            attachments   = new[] { daughterAttachIndex, sisterAttachIndex };
            completeIndex = checkModel.GetIndex(COMPLETE);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Gets all possible outcomes.
        /// </summary>
        /// <returns>all possible outcomes.</returns>
        public string[] GetOutcomes()
        {
            var outcomes = new string[model.GetNumOutcomes()];

            for (var i = 0; i < outcomes.Length; i++)
            {
                outcomes[i] = model.GetOutcome(i);
            }
            return(outcomes);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Checks if the expected outcomes are all contained as outcomes in the given model.
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="expectedOutcomes">The expected outcomes.</param>
        /// <returns><c>true</c> if all expected outcomes are the only outcomes of the model;<c>false</c> otherwise.</returns>
        public static bool ValidateOutcomes(IMaxentModel model, params string[] expectedOutcomes)
        {
            if (model.GetNumOutcomes() == expectedOutcomes.Length)
            {
                var count = model.GetNumOutcomes();
                for (int i = 0; i < count; i++)
                {
                    if (!expectedOutcomes.Contains(model.GetOutcome(i)))
                    {
                        return(false);
                    }
                }
            }
            else
            {
                return(false);
            }

            return(true);
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Creates new search object with the specified cache size.
        /// </summary>
        /// <param name="size">The size of the beam (k).</param>
        /// <param name="model">The model for assigning probabilities to the sequence outcomes.</param>
        /// <param name="cacheSize">Size of the cache.</param>
        public BeamSearch(int size, IMaxentModel model, int cacheSize)
        {
            this.size  = size;
            this.model = model;

            if (cacheSize > 0)
            {
                contextsCache = new Cache(cacheSize);
            }

            probs = new double[model.GetNumOutcomes()];
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Creates new search object.
        /// </summary>
        /// <param name="size">The size of the beam (k).</param>
        /// <param name="cg">The context generator for the model.</param>
        /// <param name="model">The model for assigning probabilities to the sequence outcomes.</param>
        /// <param name="validator">The sequence validator.</param>
        /// <param name="cacheSize">Size of the cache.</param>
        public BeamSearch(int size, IBeamSearchContextGenerator <T> cg, IMaxentModel model, ISequenceValidator <T> validator, int cacheSize)
        {
            this.cg        = cg;
            this.size      = size;
            this.model     = model;
            this.validator = validator;

            if (cacheSize > 0)
            {
                contextsCache = new Cache(cacheSize);
            }

            probs = new double[model.GetNumOutcomes()];
        }
Ejemplo n.º 9
0
 private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker,
     AbstractHeadRules headRules, int beamSize, double advancePercentage) :
         base(tagger, chunker, headRules, beamSize, advancePercentage) {
     this.buildModel = buildModel;
     this.checkModel = checkModel;
     bProbs = new double[buildModel.GetNumOutcomes()];
     cProbs = new double[checkModel.GetNumOutcomes()];
     buildContextGenerator = new BuildContextGenerator();
     checkContextGenerator = new CheckContextGenerator();
     startTypeMap = new Dictionary<string, string>();
     contTypeMap = new Dictionary<string, string>();
     for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++) {
         var outcome = buildModel.GetOutcome(boi);
         if (outcome.StartsWith(START)) {
             startTypeMap[outcome] = outcome.Substring(START.Length);
         } else if (outcome.StartsWith(CONT)) {
             contTypeMap[outcome] = outcome.Substring(CONT.Length);
         }
     }
     topStartIndex = buildModel.GetIndex(TOP_START);
     completeIndex = checkModel.GetIndex(COMPLETE);
     incompleteIndex = checkModel.GetIndex(INCOMPLETE);
 }
Ejemplo n.º 10
0
        private Parser(
            IMaxentModel buildModel,
            IMaxentModel attachModel, 
            IMaxentModel checkModel,
            IPOSTagger tagger,
            IChunker chunker, 
            AbstractHeadRules headRules, 
            int beamSize, 
            double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) {

            this.buildModel = buildModel;
            this.attachModel = attachModel;
            this.checkModel = checkModel;

            buildContextGenerator = new BuildContextGenerator();
            attachContextGenerator = new AttachContextGenerator(punctSet);
            checkContextGenerator = new CheckContextGenerator(punctSet);

            bProbs = new double[buildModel.GetNumOutcomes()];
            aProbs = new double[attachModel.GetNumOutcomes()];
            cProbs = new double[checkModel.GetNumOutcomes()];

            doneIndex = buildModel.GetIndex(DONE);
            sisterAttachIndex = attachModel.GetIndex(ATTACH_SISTER);
            daughterAttachIndex = attachModel.GetIndex(ATTACH_DAUGHTER);
            // nonAttachIndex = attachModel.GetIndex(NON_ATTACH);
            attachments = new[] {daughterAttachIndex, sisterAttachIndex};
            completeIndex = checkModel.GetIndex(COMPLETE);
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Advances the specified parse and returns the an array advanced parses whose probability accounts for
        /// more than the specified amount of probability mass.
        /// </summary>
        /// <param name="p">The parse to advance.</param>
        /// <param name="probMass">The amount of probability mass that should be accounted for by the advanced parses.</param>
        protected override Parse[] AdvanceParses(Parse p, double probMass)
        {
            var q = 1 - probMass;
            /** The closest previous node which has been labeled as a start node. */
            Parse lastStartNode = null;
            /** The index of the closest previous node which has been labeled as a start node. */
            var lastStartIndex = -1;
            /** The type of the closest previous node which has been labeled as a start node. */
            string lastStartType = null;
            /** The index of the node which will be labeled in this iteration of advancing the parse. */
            int advanceNodeIndex;
            /** The node which will be labeled in this iteration of advancing the parse. */
            Parse advanceNode      = null;
            var   originalChildren = p.Children;
            var   children         = CollapsePunctuation(originalChildren, punctSet);
            var   numNodes         = children.Length;

            if (numNodes == 0)
            {
                return(null);
            }
            //determines which node needs to be labeled and prior labels.
            for (advanceNodeIndex = 0; advanceNodeIndex < numNodes; advanceNodeIndex++)
            {
                advanceNode = children[advanceNodeIndex];
                if (advanceNode.Label == null)
                {
                    break;
                }
                if (startTypeMap.ContainsKey(advanceNode.Label))
                {
                    lastStartType  = startTypeMap[advanceNode.Label];
                    lastStartNode  = advanceNode;
                    lastStartIndex = advanceNodeIndex;
                    //System.err.println("lastStart "+i+" "+lastStart.label+" "+lastStart.prob);
                }
            }
            var originalAdvanceIndex = MapParseIndex(advanceNodeIndex, children, originalChildren);
            var newParsesList        = new List <Parse>(buildModel.GetNumOutcomes());

            //call build
            buildModel.Eval(buildContextGenerator.GetContext(children, advanceNodeIndex), bProbs);
            var bProbSum = 0d;

            while (bProbSum < probMass)
            {
                // The largest un-advanced labeling.
                var max = 0;
                for (var pi = 1; pi < bProbs.Length; pi++)
                {
                    //for each build outcome
                    if (bProbs[pi] > bProbs[max])
                    {
                        max = pi;
                    }
                }
                if (bProbs[max].Equals(0d))
                {
                    break;
                }
                var bProb = bProbs[max];
                bProbs[max] = 0; //zero out so new max can be found
                bProbSum   += bProb;
                var tag = buildModel.GetOutcome(max);
                //System.out.println("trying "+tag+" "+bprobSum+" lst="+lst);
                if (max == topStartIndex)
                {
                    // can't have top until complete
                    continue;
                }
                //System.err.println(i+" "+tag+" "+bprob);
                if (startTypeMap.ContainsKey(tag))
                {
                    //update last start
                    lastStartIndex = advanceNodeIndex;
                    lastStartNode  = advanceNode;
                    lastStartType  = startTypeMap[tag];
                }
                else if (contTypeMap.ContainsKey(tag))
                {
                    if (lastStartNode == null || !lastStartType.Equals(contTypeMap[tag]))
                    {
                        continue; //Cont must match previous start or continue
                    }
                }
                var newParse1 = (Parse)p.Clone();  //clone parse

                if (createDerivationString)
                {
                    newParse1.Derivation.Append(max).Append("-");
                }

                newParse1.SetChild(originalAdvanceIndex, tag); //replace constituent being labeled to create new derivation
                newParse1.AddProbability(Math.Log(bProb));

                //check
                //String[] context = checkContextGenerator.getContext(newParse1.getChildren(), lastStartType, lastStartIndex, advanceNodeIndex);
                checkModel.Eval(
                    checkContextGenerator.GetContext(
                        CollapsePunctuation(newParse1.Children, punctSet),
                        lastStartType,
                        lastStartIndex,
                        advanceNodeIndex),
                    cProbs);

                //System.out.println("check "+lastStartType+" "+cprobs[completeIndex]+" "+cprobs[incompleteIndex]+" "+tag+" "+java.util.Arrays.asList(context));

                if (cProbs[completeIndex] > q)
                {
                    //make sure a reduce is likely
                    var newParse2 = (Parse)newParse1.Clone();

                    if (createDerivationString)
                    {
                        newParse2.Derivation.Append(1).Append(".");
                    }

                    newParse2.AddProbability(Math.Log(cProbs[completeIndex]));
                    var cons = new Parse[advanceNodeIndex - lastStartIndex + 1];
                    var flat = true;

                    if (lastStartNode == null)
                    {
                        throw new InvalidOperationException("lastStartNode is null.");
                    }

                    //first
                    cons[0] = lastStartNode;
                    flat   &= cons[0].IsPosTag;
                    //last
                    cons[advanceNodeIndex - lastStartIndex] = advanceNode;
                    flat &= cons[advanceNodeIndex - lastStartIndex].IsPosTag;
                    //middle
                    for (var ci = 1; ci < advanceNodeIndex - lastStartIndex; ci++)
                    {
                        cons[ci] = children[ci + lastStartIndex];
                        flat    &= cons[ci].IsPosTag;
                    }
                    if (!flat)
                    {
                        //flat chunks are done by chunker
                        if (lastStartIndex == 0 && advanceNodeIndex == numNodes - 1)
                        {
                            //check for top node to include end and beginning punctuation
                            //System.err.println("ParserME.advanceParses: reducing entire span: "+new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd())+" "+lastStartType+" "+java.util.Arrays.asList(children));
                            newParse2.Insert(new Parse(p.Text, p.Span, lastStartType, cProbs[1],
                                                       headRules.GetHead(cons, lastStartType)));
                        }
                        else
                        {
                            newParse2.Insert(new Parse(p.Text, new Span(lastStartNode.Span.Start, advanceNode.Span.End),
                                                       lastStartType, cProbs[1], headRules.GetHead(cons, lastStartType)));
                        }
                        newParsesList.Add(newParse2);
                    }
                }
                if (cProbs[incompleteIndex] > q)
                {
                    //make sure a shift is likely
                    if (createDerivationString)
                    {
                        newParse1.Derivation.Append(0).Append(".");
                    }

                    if (advanceNodeIndex != numNodes - 1)
                    {
                        //can't shift last element
                        newParse1.AddProbability(Math.Log(cProbs[incompleteIndex]));
                        newParsesList.Add(newParse1);
                    }
                }
            }
            return(newParsesList.ToArray());
        }
Ejemplo n.º 12
0
        private bool IsModelValid(IMaxentModel model) {
            var outcomes = new string[model.GetNumOutcomes()];

            for (var i = 0; i < model.GetNumOutcomes(); i++) {
                outcomes[i] = model.GetOutcome(i);
            }

            return Factory.CreateSequenceCodec().AreOutcomesCompatible(outcomes);
        }