Пример #1
0
 private void AdvanceTop(Parse inputParse)
 {
     mBuildModel.Evaluate(mBuildContextGenerator.GetContext(inputParse.GetChildren(), 0), mBuildProbabilities);
     inputParse.AddProbability(Math.Log(mBuildProbabilities[mTopStartIndex]));
     mCheckModel.Evaluate(mCheckContextGenerator.GetContext(inputParse.GetChildren(), TopNode, 0, 0), mCheckProbabilities);
     inputParse.AddProbability(Math.Log(mCheckProbabilities[mCompleteIndex]));
     inputParse.Type = TopNode;
 }
Пример #2
0
 private void AdvanceTop(Parse inputParse, double[] buildProbabilities, double[] checkProbabilities)
 {
     buildModel.Evaluate(buildContextGenerator.GetContext(inputParse.GetChildren(), 0), buildProbabilities);
     inputParse.AddProbability(Math.Log(buildProbabilities[topStartIndex]));
     checkModel.Evaluate(checkContextGenerator.GetContext(inputParse.GetChildren(), TopNode, 0, 0), checkProbabilities);
     inputParse.AddProbability(Math.Log(checkProbabilities[completeIndex]));
     inputParse.Type = TopNode;
 }
Пример #3
0
        private void AddTagEvents(List <SharpEntropy.TrainingEvent> events, Parse[] chunks)
        {
            List <string> tokens     = new List <string>();
            List <string> predicates = new List <string>();

            for (int currentChunk = 0; currentChunk < chunks.Length; currentChunk++)
            {
                Parse chunkParse = chunks[currentChunk];
                if (chunkParse.IsPosTag)
                {
                    tokens.Add(chunkParse.ToString());
                    predicates.Add(chunkParse.Type);
                }
                else
                {
                    Parse[] childParses = chunkParse.GetChildren();
                    foreach (Parse tokenParse in childParses)
                    {
                        tokens.Add(tokenParse.ToString());
                        predicates.Add(tokenParse.Type);
                    }
                }
            }
            for (int currentToken = 0; currentToken < tokens.Count; currentToken++)
            {
                events.Add(new SharpEntropy.TrainingEvent(predicates[currentToken], mPosContextGenerator.GetContext(currentToken, tokens.ToArray(), predicates.ToArray(), null)));
            }
        }
Пример #4
0
        ///<summary>
        ///Advances the parse by assigning it POS tags and returns multiple tag sequences.
        ///</summary>
        ///<param name="inputParse">
        ///The parse to be tagged.
        ///</param>
        ///<returns>
        ///Parses with different pos-tag sequence assignments.
        ///</returns>
        private Parse[] AdvanceTags(Parse inputParse)
        {
            Parse[] children      = inputParse.GetChildren();
            var     words         = children.Select(ch => ch.ToString()).ToArray();
            var     probabilities = new double[words.Length];

            Util.Sequence[] tagSequences = posTagger.TopKSequences(words);
            if (tagSequences.Length == 0)
            {
                Console.Error.WriteLine("no tag sequence");
            }
            var newParses = new Parse[tagSequences.Length];

            for (int tagSequenceIndex = 0; tagSequenceIndex < tagSequences.Length; tagSequenceIndex++)
            {
                string[] tags = tagSequences[tagSequenceIndex].Outcomes.ToArray();
                tagSequences[tagSequenceIndex].GetProbabilities(probabilities);
                newParses[tagSequenceIndex] = (Parse)inputParse.Clone();                  //copies top level
                //if (CreateDerivationString)
                //{
                //	newParses[tagSequenceIndex].AppendDerivationBuffer(tagSequenceIndex.ToString(System.Globalization.CultureInfo.InvariantCulture));
                //	newParses[tagSequenceIndex].AppendDerivationBuffer(".");
                //}
                for (int wordIndex = 0; wordIndex < words.Length; wordIndex++)
                {
                    Parse wordParse = children[wordIndex];
                    //System.Console.Error.WriteLine("inserting tag " + tags[wordIndex]);
                    double wordProbability = probabilities[wordIndex];
                    newParses[tagSequenceIndex].Insert(new Parse(wordParse.Text, wordParse.Span, tags[wordIndex], wordProbability));
                    newParses[tagSequenceIndex].AddProbability(Math.Log(wordProbability));
                    //newParses[tagSequenceIndex].Show();
                }
            }
            return(newParses);
        }
Пример #5
0
 private static void GetInitialChunks(Parse inputParse, List <Parse> initialChunks)
 {
     if (inputParse.IsPosTag)
     {
         initialChunks.Add(inputParse);
     }
     else
     {
         Parse[] kids           = inputParse.GetChildren();
         bool    AreAllKidsTags = true;
         for (int currentChild = 0, childCount = kids.Length; currentChild < childCount; currentChild++)
         {
             if (!(kids[currentChild]).IsPosTag)
             {
                 AreAllKidsTags = false;
                 break;
             }
         }
         if (AreAllKidsTags)
         {
             initialChunks.Add(inputParse);
         }
         else
         {
             for (int currentChild = 0, childCount = kids.Length; currentChild < childCount; currentChild++)
             {
                 GetInitialChunks(kids[currentChild], initialChunks);
             }
         }
     }
 }
Пример #6
0
        /// <summary>
        /// Shows the parse in the LithiumControl.
        /// </summary>
        /// <param name="parse">The parse to display</param>
        public void ShowParse(Parse parse)
        {
            lithiumControl.NewDiagram();

            if (parse.Type == MaximumEntropyParser.TopNode)
            {
                parse = parse.GetChildren()[0];
            }

            // Display the parse result
            ShapeBase root = this.lithiumControl.Root;
            root.Text = parse.Type;
            root.Visible = true;

            AddChildNodes(root, parse.GetChildren());
            root.Expand();

            this.lithiumControl.DrawTree();
        }
Пример #7
0
        /// <summary>
        /// Returns the parse nodes which are children of this node and which are pos tags.
        /// </summary>
        /// <returns>
        /// the parse nodes which are children of this node and which are pos tags.
        /// </returns>
        public virtual Parse[] GetTagNodes()
        {
            var tags  = new List <Parse>();
            var nodes = new List <Parse>(_parts);

            while (nodes.Count != 0)
            {
                Parse currentParse = nodes[0];
                nodes.RemoveAt(0);
                if (currentParse.IsPosTag)
                {
                    tags.Add(currentParse);
                }
                else
                {
                    nodes.InsertRange(0, currentParse.GetChildren());
                }
            }
            return(tags.ToArray());
        }
Пример #8
0
        private void AddChunkEvents(List <SharpEntropy.TrainingEvent> events, Parse[] chunks)
        {
            List <string> tokens     = new List <string>();
            List <string> tags       = new List <string>();
            List <string> predicates = new List <string>();

            for (int currentChunk = 0; currentChunk < chunks.Length; currentChunk++)
            {
                Parse chunkParse = chunks[currentChunk];
                if (chunkParse.IsPosTag)
                {
                    tokens.Add(chunkParse.ToString());
                    tags.Add(chunkParse.Type);
                    predicates.Add(MaximumEntropyParser.OtherOutcome);
                }
                else
                {
                    bool    isStart     = true;
                    string  chunkType   = chunkParse.Type;
                    Parse[] childParses = chunkParse.GetChildren();
                    foreach (Parse tokenParse in childParses)
                    {
                        tokens.Add(tokenParse.ToString());
                        tags.Add(tokenParse.Type);
                        if (isStart)
                        {
                            predicates.Add(MaximumEntropyParser.StartPrefix + chunkType);
                            isStart = false;
                        }
                        else
                        {
                            predicates.Add(MaximumEntropyParser.ContinuePrefix + chunkType);
                        }
                    }
                }
            }
            for (int currentToken = 0; currentToken < tokens.Count; currentToken++)
            {
                events.Add(new SharpEntropy.TrainingEvent(predicates[currentToken], mChunkContextGenerator.GetContext(currentToken, tokens.ToArray(), tags.ToArray(), predicates.ToArray())));
            }
        }
Пример #9
0
 private static bool IsFirstChild(Parse child, Parse parent)
 {
     Parse[] kids = parent.GetChildren();
     return(kids[0] == child);
 }
Пример #10
0
        ///<summary>
        ///Returns the top chunk sequences for the specified parse.
        ///</summary>
        ///<param name="inputParse">
        ///A pos-tag assigned parse.
        ///</param>
        /// <param name="minChunkScore">
        /// the minimum probability for an allowed chunk sequence.
        /// </param>
        ///<returns>
        ///The top chunk assignments to the specified parse.
        ///</returns>
        private Parse[] AdvanceChunks(Parse inputParse, double minChunkScore)
        {
            // chunk
            Parse[] children      = inputParse.GetChildren();
            var     words         = new string[children.Length];
            var     parseTags     = new string[words.Length];
            var     probabilities = new double[words.Length];

            for (int childParseIndex = 0, childParseCount = children.Length; childParseIndex < childParseCount; childParseIndex++)
            {
                Parse currentChildParse = children[childParseIndex];
                words[childParseIndex]     = currentChildParse.Head.ToString();
                parseTags[childParseIndex] = currentChildParse.Type;
            }
            //System.Console.Error.WriteLine("adjusted min chunk score = " + (minChunkScore - inputParse.Probability));
            Util.Sequence[] chunkerSequences = basalChunker.TopKSequences(words, parseTags, minChunkScore - inputParse.Probability);
            var             newParses        = new Parse[chunkerSequences.Length];

            for (int sequenceIndex = 0, sequenceCount = chunkerSequences.Length; sequenceIndex < sequenceCount; sequenceIndex++)
            {
                newParses[sequenceIndex] = (Parse)inputParse.Clone();                  //copies top level
                if (CreateDerivationString)
                {
                    newParses[sequenceIndex].AppendDerivationBuffer(sequenceIndex.ToString(System.Globalization.CultureInfo.InvariantCulture));
                    newParses[sequenceIndex].AppendDerivationBuffer(".");
                }
                string[] tags = chunkerSequences[sequenceIndex].Outcomes.ToArray();
                chunkerSequences[sequenceIndex].GetProbabilities(probabilities);
                int    start = -1;
                int    end   = 0;
                string type  = null;
                //System.Console.Error.Write("sequence " + sequenceIndex + " ");
                for (int tagIndex = 0; tagIndex <= tags.Length; tagIndex++)
                {
                    //if (tagIndex != tags.Length)
                    //{
                    //	System.Console.Error.WriteLine(words[tagIndex] + " " + parseTags[tagIndex] + " " + tags[tagIndex] + " " + probabilities[tagIndex]);
                    //}
                    if (tagIndex != tags.Length)
                    {
                        newParses[sequenceIndex].AddProbability(Math.Log(probabilities[tagIndex]));
                    }
                    if (tagIndex != tags.Length && tags[tagIndex].StartsWith(ContinuePrefix))
                    {                     // if continue just update end chunking tag don't use mContinueTypeMap
                        end = tagIndex;
                    }
                    else
                    {                     //make previous constituent if it exists
                        if (type != null)
                        {
                            //System.Console.Error.WriteLine("inserting tag " + tags[tagIndex]);
                            Parse startParse = children[start];
                            Parse endParse   = children[end];
                            //System.Console.Error.WriteLine("Putting " + type + " at " + start + "," + end + " " + newParses[sequenceIndex].Probability);
                            var consitituents = new Parse[end - start + 1];
                            consitituents[0] = startParse;
                            //consitituents[0].Label = "Start-" + type;
                            if (end - start != 0)
                            {
                                consitituents[end - start] = endParse;
                                //consitituents[end - start].Label = "Cont-" + type;
                                for (int constituentIndex = 1; constituentIndex < end - start; constituentIndex++)
                                {
                                    consitituents[constituentIndex] = children[constituentIndex + start];
                                    //consitituents[constituentIndex].Label = "Cont-" + type;
                                }
                            }
                            newParses[sequenceIndex].Insert(new Parse(startParse.Text, new Util.Span(startParse.Span.Start, endParse.Span.End), type, 1, headRules.GetHead(consitituents, type)));
                        }
                        if (tagIndex != tags.Length)
                        {                         //update for new constituent
                            if (tags[tagIndex].StartsWith(StartPrefix))
                            {                     // don't use mStartTypeMap these are chunk tags
                                type  = tags[tagIndex].Substring(StartPrefix.Length);
                                start = tagIndex;
                                end   = tagIndex;
                            }
                            else
                            {                             // other
                                type = null;
                            }
                        }
                    }
                }
                //newParses[sequenceIndex].Show();
                //System.Console.Out.WriteLine();
            }
            return(newParses);
        }
Пример #11
0
        ///<summary>
        ///Advances the specified parse and returns the an array advanced parses whose probability accounts for
        ///more than the speicficed amount of probability mass, Q.
        ///</summary>
        ///<param name="inputParse">
        ///The parse to advance.
        ///</param>
        ///<param name="qParam">
        ///The amount of probability mass that should be accounted for by the advanced parses.
        ///</param>
        private Parse[] AdvanceParses(Parse inputParse, double qParam, double[] buildProbabilities, double[] checkProbabilities)
        {
            double qOpp           = 1 - qParam;
            Parse  lastStartNode  = null;               // The closest previous node which has been labeled as a start node.
            int    lastStartIndex = -1;                 // The index of the closest previous node which has been labeled as a start node.
            string lastStartType  = null;               // The type of the closest previous node which has been labeled as a start node.
            int    advanceNodeIndex;                    // The index of the node which will be labeled in this iteration of advancing the parse.
            Parse  advanceNode = null;                  // The node which will be labeled in this iteration of advancing the parse.

            Parse[] children  = inputParse.GetChildren();
            int     nodeCount = children.Length;

            //determines which node needs to be labeled and prior labels.
            for (advanceNodeIndex = 0; advanceNodeIndex < nodeCount; advanceNodeIndex++)
            {
                advanceNode = children[advanceNodeIndex];
                if (advanceNode.Label == null)
                {
                    break;
                }
                else if (startTypeMap.ContainsKey(advanceNode.Label))
                {
                    lastStartType  = startTypeMap[advanceNode.Label];
                    lastStartNode  = advanceNode;
                    lastStartIndex = advanceNodeIndex;
                }
            }
            var newParsesList = new List <Parse>(buildModel.OutcomeCount);

            //call build
            buildModel.Evaluate(buildContextGenerator.GetContext(children, advanceNodeIndex), buildProbabilities);
            double buildProbabilitiesSum = 0;

            while (buildProbabilitiesSum < qParam)
            {
                //  The largest unadvanced labeling.
                int highestBuildProbabilityIndex = 0;
                for (int probabilityIndex = 1; probabilityIndex < buildProbabilities.Length; probabilityIndex++)
                {                 //for each build outcome
                    if (buildProbabilities[probabilityIndex] > buildProbabilities[highestBuildProbabilityIndex])
                    {
                        highestBuildProbabilityIndex = probabilityIndex;
                    }
                }
                if (buildProbabilities[highestBuildProbabilityIndex] == 0)
                {
                    break;
                }

                double highestBuildProbability = buildProbabilities[highestBuildProbabilityIndex];

                buildProbabilities[highestBuildProbabilityIndex] = 0;                 //zero out so new max can be found
                buildProbabilitiesSum += highestBuildProbability;

                string tag = buildModel.GetOutcomeName(highestBuildProbabilityIndex);
                //System.Console.Out.WriteLine("trying " + tag + " " + buildProbabilitiesSum + " lst=" + lst);
                if (highestBuildProbabilityIndex == topStartIndex)
                {                 // can't have top until complete
                    continue;
                }
                //System.Console.Error.WriteLine(probabilityIndex + " " + tag + " " + highestBuildProbability);
                if (startTypeMap.ContainsKey(tag))
                {                 //update last start
                    lastStartIndex = advanceNodeIndex;
                    lastStartNode  = advanceNode;
                    lastStartType  = startTypeMap[tag];
                }
                else if (continueTypeMap.ContainsKey(tag))
                {
                    if (lastStartNode == null || lastStartType != continueTypeMap[tag])
                    {
                        continue;                         //Cont must match previous start or continue
                    }
                }
                var newParse1 = (Parse)inputParse.Clone();                  //clone parse
                if (CreateDerivationString)
                {
                    newParse1.AppendDerivationBuffer(highestBuildProbabilityIndex.ToString(System.Globalization.CultureInfo.InvariantCulture));
                    newParse1.AppendDerivationBuffer("-");
                }
                newParse1.SetChild(advanceNodeIndex, tag);                 //replace constituent labeled

                newParse1.AddProbability(Math.Log(highestBuildProbability));
                //check
                checkModel.Evaluate(checkContextGenerator.GetContext(newParse1.GetChildren(), lastStartType, lastStartIndex, advanceNodeIndex), checkProbabilities);
                //System.Console.Out.WriteLine("check " + mCheckProbabilities[mCompleteIndex] + " " + mCheckProbabilities[mIncompleteIndex]);
                Parse newParse2 = newParse1;
                if (checkProbabilities[completeIndex] > qOpp)
                {                 //make sure a reduce is likely
                    newParse2 = (Parse)newParse1.Clone();
                    if (CreateDerivationString)
                    {
                        newParse2.AppendDerivationBuffer("1");
                        newParse2.AppendDerivationBuffer(".");
                    }
                    newParse2.AddProbability(System.Math.Log(checkProbabilities[1]));
                    var  constituent = new Parse[advanceNodeIndex - lastStartIndex + 1];
                    bool isFlat      = true;
                    //first
                    constituent[0] = lastStartNode;
                    if (constituent[0].Type != constituent[0].Head.Type)
                    {
                        isFlat = false;
                    }
                    //last
                    constituent[advanceNodeIndex - lastStartIndex] = advanceNode;
                    if (isFlat && constituent[advanceNodeIndex - lastStartIndex].Type != constituent[advanceNodeIndex - lastStartIndex].Head.Type)
                    {
                        isFlat = false;
                    }
                    //middle
                    for (int constituentIndex = 1; constituentIndex < advanceNodeIndex - lastStartIndex; constituentIndex++)
                    {
                        constituent[constituentIndex] = children[constituentIndex + lastStartIndex];
                        if (isFlat && constituent[constituentIndex].Type != constituent[constituentIndex].Head.Type)
                        {
                            isFlat = false;
                        }
                    }
                    if (!isFlat)
                    {                     //flat chunks are done by chunker
                        newParse2.Insert(new Parse(inputParse.Text, new Util.Span(lastStartNode.Span.Start, advanceNode.Span.End), lastStartType, checkProbabilities[1], headRules.GetHead(constituent, lastStartType)));
                        newParsesList.Add(newParse2);
                    }
                }
                if (checkProbabilities[incompleteIndex] > qOpp)
                {                 //make sure a shift is likely
                    if (CreateDerivationString)
                    {
                        newParse1.AppendDerivationBuffer("0");
                        newParse1.AppendDerivationBuffer(".");
                    }
                    if (advanceNodeIndex != nodeCount - 1)
                    {                     //can't shift last element
                        newParse1.AddProbability(Math.Log(checkProbabilities[0]));
                        newParsesList.Add(newParse1);
                    }
                }
            }
            Parse[] newParses = newParsesList.ToArray();
            return(newParses);
        }
Пример #12
0
 private static bool IsFirstChild(Parse child, Parse parent)
 {
     Parse[] kids = parent.GetChildren();
     return kids[0] == child;
 }
Пример #13
0
 private static bool IsLastChild(Parse child, Parse parent)
 {
     Parse[] kids = parent.GetChildren();
     return kids[kids.Length - 1] == child;
 }
Пример #14
0
		private void AdvanceTop(Parse inputParse, double[] buildProbabilities, double[] checkProbabilities)
		{
			buildModel.Evaluate(buildContextGenerator.GetContext(inputParse.GetChildren(), 0), buildProbabilities);
			inputParse.AddProbability(Math.Log(buildProbabilities[topStartIndex]));
			checkModel.Evaluate(checkContextGenerator.GetContext(inputParse.GetChildren(), TopNode, 0, 0), checkProbabilities);
			inputParse.AddProbability(Math.Log(checkProbabilities[completeIndex]));
			inputParse.Type = TopNode;
		}
Пример #15
0
 private static bool IsLastChild(Parse child, Parse parent)
 {
     Parse[] kids = parent.GetChildren();
     return(kids[kids.Length - 1] == child);
 }
Пример #16
0
 private static void GetInitialChunks(Parse inputParse, List<Parse> initialChunks)
 {
     if (inputParse.IsPosTag)
     {
         initialChunks.Add(inputParse);
     }
     else
     {
         Parse[] kids = inputParse.GetChildren();
         bool AreAllKidsTags = true;
         for (int currentChild = 0, childCount = kids.Length; currentChild < childCount; currentChild++)
         {
             if (!(kids[currentChild]).IsPosTag)
             {
                 AreAllKidsTags = false;
                 break;
             }
         }
         if (AreAllKidsTags)
         {
             initialChunks.Add(inputParse);
         }
         else
         {
             for (int currentChild = 0, childCount = kids.Length; currentChild < childCount; currentChild++)
             {
                 GetInitialChunks(kids[currentChild], initialChunks);
             }
         }
     }
 }
Пример #17
0
 private void Show(Parse p, StringBuilder buffer)
 {
     int start = p.Span.Start;
     if (p.Type != MaximumEntropyParser.TokenNode)
     {
         buffer.Append("(");
         buffer.Append(p.Type);
         if (mParseMap.ContainsKey(p))
         {
             buffer.Append("#" + mParseMap[p].ToString());
         }
         buffer.Append(" ");
     }
     Parse[] children = p.GetChildren();
     foreach (Parse c in children)
     {
         Util.Span s = c.Span;
         if (start < s.Start)
         {
             buffer.Append(p.Text.Substring(start, (s.Start) - (start)));
         }
         Show(c, buffer);
         start = s.End;
     }
     buffer.Append(p.Text.Substring(start, p.Span.End - start));
     if (p.Type != MaximumEntropyParser.TokenNode)
     {
         buffer.Append(")");
     }
 }
Пример #18
0
		///<summary>
		///Advances the specified parse and returns the an array advanced parses whose probability accounts for
		///more than the speicficed amount of probability mass, Q.
		///</summary>
		///<param name="inputParse">
		///The parse to advance.
		///</param>
		///<param name="qParam">
		///The amount of probability mass that should be accounted for by the advanced parses.
		///</param> 
		private Parse[] AdvanceParses(Parse inputParse, double qParam, double[] buildProbabilities, double[] checkProbabilities) 
		{
			double qOpp = 1 - qParam;
			Parse lastStartNode = null;		// The closest previous node which has been labeled as a start node.
			int lastStartIndex = -1;			// The index of the closest previous node which has been labeled as a start node. 
			string lastStartType = null;	// The type of the closest previous node which has been labeled as a start node.
			int advanceNodeIndex;			// The index of the node which will be labeled in this iteration of advancing the parse.
			Parse advanceNode = null;		// The node which will be labeled in this iteration of advancing the parse.
            
			Parse[] children = inputParse.GetChildren();
			int nodeCount = children.Length;

			//determines which node needs to be labeled and prior labels.
			for (advanceNodeIndex = 0; advanceNodeIndex < nodeCount; advanceNodeIndex++) 
			{
				advanceNode = children[advanceNodeIndex];
				if (advanceNode.Label == null) 
				{
					break;
				}
				else if (startTypeMap.ContainsKey(advanceNode.Label)) 
				{
					lastStartType = startTypeMap[advanceNode.Label];
					lastStartNode = advanceNode;
					lastStartIndex = advanceNodeIndex;
				}
			}
            var newParsesList = new List<Parse>(buildModel.OutcomeCount);
			//call build
			buildModel.Evaluate(buildContextGenerator.GetContext(children, advanceNodeIndex), buildProbabilities);
			double buildProbabilitiesSum = 0;
			while (buildProbabilitiesSum < qParam) 
			{
				//  The largest unadvanced labeling.
				int highestBuildProbabilityIndex = 0;
				for (int probabilityIndex = 1; probabilityIndex < buildProbabilities.Length; probabilityIndex++) 
				{ //for each build outcome
					if (buildProbabilities[probabilityIndex] > buildProbabilities[highestBuildProbabilityIndex]) 
					{
						highestBuildProbabilityIndex = probabilityIndex;
					}
				}
				if (buildProbabilities[highestBuildProbabilityIndex] == 0) 
				{
					break;
				}

				double highestBuildProbability = buildProbabilities[highestBuildProbabilityIndex];		

				buildProbabilities[highestBuildProbabilityIndex] = 0; //zero out so new max can be found
				buildProbabilitiesSum += highestBuildProbability;

				string tag = buildModel.GetOutcomeName(highestBuildProbabilityIndex);
				//System.Console.Out.WriteLine("trying " + tag + " " + buildProbabilitiesSum + " lst=" + lst);
				if (highestBuildProbabilityIndex == topStartIndex) 
				{ // can't have top until complete
					continue;
				}
				//System.Console.Error.WriteLine(probabilityIndex + " " + tag + " " + highestBuildProbability);
				if (startTypeMap.ContainsKey(tag)) 
				{ //update last start
					lastStartIndex = advanceNodeIndex;
					lastStartNode = advanceNode;
					lastStartType = startTypeMap[tag];
				}
				else if (continueTypeMap.ContainsKey(tag)) 
				{
					if (lastStartNode == null || lastStartType != continueTypeMap[tag]) 
					{
						continue; //Cont must match previous start or continue
					}
				}
				var newParse1 = (Parse) inputParse.Clone(); //clone parse
				if (CreateDerivationString)
				{
					newParse1.AppendDerivationBuffer(highestBuildProbabilityIndex.ToString(System.Globalization.CultureInfo.InvariantCulture));
					newParse1.AppendDerivationBuffer("-");
				}
				newParse1.SetChild(advanceNodeIndex, tag); //replace constituent labeled

				newParse1.AddProbability(Math.Log(highestBuildProbability));
				//check
				checkModel.Evaluate(checkContextGenerator.GetContext(newParse1.GetChildren(), lastStartType, lastStartIndex, advanceNodeIndex), checkProbabilities);
				//System.Console.Out.WriteLine("check " + mCheckProbabilities[mCompleteIndex] + " " + mCheckProbabilities[mIncompleteIndex]);
				Parse newParse2 = newParse1;
				if (checkProbabilities[completeIndex] > qOpp) 
				{ //make sure a reduce is likely
					newParse2 = (Parse) newParse1.Clone();
					if (CreateDerivationString)
					{
						newParse2.AppendDerivationBuffer("1");
						newParse2.AppendDerivationBuffer(".");
					}
					newParse2.AddProbability(System.Math.Log(checkProbabilities[1]));
					var constituent = new Parse[advanceNodeIndex - lastStartIndex + 1];
					bool isFlat = true;
					//first
					constituent[0] = lastStartNode;
					if (constituent[0].Type != constituent[0].Head.Type)
					{
						isFlat = false;
					}
					//last
					constituent[advanceNodeIndex - lastStartIndex] = advanceNode;
					if (isFlat && constituent[advanceNodeIndex - lastStartIndex].Type != constituent[advanceNodeIndex - lastStartIndex].Head.Type) 
					{
						isFlat = false;
					}
					//middle
					for (int constituentIndex = 1; constituentIndex < advanceNodeIndex - lastStartIndex; constituentIndex++) 
					{
						constituent[constituentIndex] = children[constituentIndex + lastStartIndex];
						if (isFlat && constituent[constituentIndex].Type != constituent[constituentIndex].Head.Type) 
						{
							isFlat = false;
						}
					}
					if (!isFlat) 
					{ //flat chunks are done by chunker
						newParse2.Insert(new Parse(inputParse.Text, new Util.Span(lastStartNode.Span.Start, advanceNode.Span.End), lastStartType, checkProbabilities[1], headRules.GetHead(constituent, lastStartType)));
						newParsesList.Add(newParse2);
					}
				}
				if (checkProbabilities[incompleteIndex] > qOpp) 
				{ //make sure a shift is likely
					if (CreateDerivationString)
					{
						newParse1.AppendDerivationBuffer("0");
						newParse1.AppendDerivationBuffer(".");
					}
					if (advanceNodeIndex != nodeCount - 1) 
					{ //can't shift last element
						newParse1.AddProbability(Math.Log(checkProbabilities[0]));
						newParsesList.Add(newParse1);
					}
				}
			}
			Parse[] newParses = newParsesList.ToArray();
			return newParses;
		}
Пример #19
0
		///<summary>
		///Advances the parse by assigning it POS tags and returns multiple tag sequences.
		///</summary>
		///<param name="inputParse">
		///The parse to be tagged.
		///</param>
		///<returns>
		///Parses with different pos-tag sequence assignments.
		///</returns>
		private Parse[] AdvanceTags(Parse inputParse) 
		{
			Parse[] children = inputParse.GetChildren();
		    var words = children.Select(ch => ch.ToString()).ToArray();
            var probabilities = new double[words.Length];

			Util.Sequence[] tagSequences = posTagger.TopKSequences(words);
			if (tagSequences.Length == 0) 
			{
				Console.Error.WriteLine("no tag sequence");
			}
			var newParses = new Parse[tagSequences.Length];
			for (int tagSequenceIndex = 0; tagSequenceIndex < tagSequences.Length; tagSequenceIndex++) 
			{
				string[] tags = tagSequences[tagSequenceIndex].Outcomes.ToArray();
				tagSequences[tagSequenceIndex].GetProbabilities(probabilities);
				newParses[tagSequenceIndex] = (Parse) inputParse.Clone(); //copies top level
				if (CreateDerivationString)
				{
					newParses[tagSequenceIndex].AppendDerivationBuffer(tagSequenceIndex.ToString(System.Globalization.CultureInfo.InvariantCulture));
					newParses[tagSequenceIndex].AppendDerivationBuffer(".");
				}
				for (int wordIndex = 0; wordIndex < words.Length; wordIndex++) 
				{
					Parse wordParse = children[wordIndex];
					//System.Console.Error.WriteLine("inserting tag " + tags[wordIndex]);
					double wordProbability = probabilities[wordIndex];
					newParses[tagSequenceIndex].Insert(new Parse(wordParse.Text, wordParse.Span, tags[wordIndex], wordProbability));
					newParses[tagSequenceIndex].AddProbability(Math.Log(wordProbability));
					//newParses[tagSequenceIndex].Show();
				}
			}
			return newParses;
		}
Пример #20
0
		///<summary>
		///Returns the top chunk sequences for the specified parse.
		///</summary>
		///<param name="inputParse">
		///A pos-tag assigned parse.
		///</param>
		/// <param name="minChunkScore">
		/// the minimum probability for an allowed chunk sequence.
		/// </param>
		///<returns>
		///The top chunk assignments to the specified parse.
		///</returns>
		private Parse[] AdvanceChunks(Parse inputParse, double minChunkScore) 
		{
			// chunk
			Parse[] children = inputParse.GetChildren();
			var words = new string[children.Length];
			var parseTags = new string[words.Length];
			var probabilities = new double[words.Length];
		    for (int childParseIndex = 0, childParseCount = children.Length; childParseIndex < childParseCount; childParseIndex++) 
			{
				Parse currentChildParse = children[childParseIndex];
				words[childParseIndex] = currentChildParse.Head.ToString();
				parseTags[childParseIndex] = currentChildParse.Type;
			}
			//System.Console.Error.WriteLine("adjusted min chunk score = " + (minChunkScore - inputParse.Probability));
			Util.Sequence[] chunkerSequences = basalChunker.TopKSequences(words, parseTags, minChunkScore - inputParse.Probability);
			var newParses = new Parse[chunkerSequences.Length];
			for (int sequenceIndex = 0, sequenceCount = chunkerSequences.Length; sequenceIndex < sequenceCount; sequenceIndex++) 
			{
				newParses[sequenceIndex] = (Parse) inputParse.Clone(); //copies top level
				if (CreateDerivationString)
				{
					newParses[sequenceIndex].AppendDerivationBuffer(sequenceIndex.ToString(System.Globalization.CultureInfo.InvariantCulture));
					newParses[sequenceIndex].AppendDerivationBuffer(".");
				}
				string[] tags = chunkerSequences[sequenceIndex].Outcomes.ToArray();
				chunkerSequences[sequenceIndex].GetProbabilities(probabilities);
				int start = -1;
				int end = 0;
				string type = null;
				//System.Console.Error.Write("sequence " + sequenceIndex + " ");
				for (int tagIndex = 0; tagIndex <= tags.Length; tagIndex++) 
				{
					//if (tagIndex != tags.Length)
					//{
					//	System.Console.Error.WriteLine(words[tagIndex] + " " + parseTags[tagIndex] + " " + tags[tagIndex] + " " + probabilities[tagIndex]);
					//}
					if (tagIndex != tags.Length) 
					{
						newParses[sequenceIndex].AddProbability(Math.Log(probabilities[tagIndex]));
					}
					if (tagIndex != tags.Length && tags[tagIndex].StartsWith(ContinuePrefix)) 
					{ // if continue just update end chunking tag don't use mContinueTypeMap
						end = tagIndex;
					}
					else 
					{ //make previous constituent if it exists
						if (type != null) 
						{
							//System.Console.Error.WriteLine("inserting tag " + tags[tagIndex]);
							Parse startParse = children[start];
							Parse endParse = children[end];
							//System.Console.Error.WriteLine("Putting " + type + " at " + start + "," + end + " " + newParses[sequenceIndex].Probability);
							var consitituents = new Parse[end - start + 1];
							consitituents[0] = startParse;
							//consitituents[0].Label = "Start-" + type;
							if (end - start != 0) 
							{
								consitituents[end - start] = endParse;
								//consitituents[end - start].Label = "Cont-" + type;
								for (int constituentIndex = 1; constituentIndex < end - start; constituentIndex++) 
								{
									consitituents[constituentIndex] = children[constituentIndex + start];
									//consitituents[constituentIndex].Label = "Cont-" + type;
								}
							}
							newParses[sequenceIndex].Insert(new Parse(startParse.Text, new Util.Span(startParse.Span.Start, endParse.Span.End), type, 1, headRules.GetHead(consitituents, type)));
						}
						if (tagIndex != tags.Length) 
						{ //update for new constituent
							if (tags[tagIndex].StartsWith(StartPrefix)) 
							{ // don't use mStartTypeMap these are chunk tags
								type = tags[tagIndex].Substring(StartPrefix.Length);
								start = tagIndex;
								end = tagIndex;
							}
							else 
							{ // other 
								type = null;
							}
						}
					}
				}
				//newParses[sequenceIndex].Show();
				//System.Console.Out.WriteLine();
			}
			return newParses;
		}
Пример #21
0
        private void ShowParse()
        {
            if (txtInput.Text.Length == 0)
            {
                return;
            }

            //prepare the UI
            txtInput.Enabled = false;
            btnParse.Enabled = false;
            this.Cursor = Cursors.WaitCursor;

            lithiumControl.NewDiagram();

            //do the parsing
            if (mParser == null)
            {
                mParser = new EnglishTreebankParser(mModelPath, true, false);
            }
            mParse = mParser.DoParse(txtInput.Text);

            if (mParse.Type == MaximumEntropyParser.TopNode)
            {
                mParse = mParse.GetChildren()[0];
            }

            //display the parse result
            ShapeBase root = this.lithiumControl.Root;
            root.Text = mParse.Type;
            root.Visible = true;

            AddChildNodes(root, mParse.GetChildren());
            root.Expand();

            this.lithiumControl.DrawTree();

            //restore the UI
            this.Cursor = Cursors.Default;
            txtInput.Enabled = true;
            btnParse.Enabled = true;
        }
 private void AdvanceTop(Parse inputParse)
 {
     mBuildModel.Evaluate(mBuildContextGenerator.GetContext(inputParse.GetChildren(), 0), mBuildProbabilities);
     inputParse.AddProbability(System.Math.Log(mBuildProbabilities[mTopStartIndex]));
     mCheckModel.Evaluate(mCheckContextGenerator.GetContext(inputParse.GetChildren(), TopNode, 0, 0), mCheckProbabilities);
     inputParse.AddProbability(System.Math.Log(mCheckProbabilities[mCompleteIndex]));
     inputParse.Type = TopNode;
 }