private void AdvanceTop(Parse inputParse) { mBuildModel.Evaluate(mBuildContextGenerator.GetContext(inputParse.GetChildren(), 0), mBuildProbabilities); inputParse.AddProbability(Math.Log(mBuildProbabilities[mTopStartIndex])); mCheckModel.Evaluate(mCheckContextGenerator.GetContext(inputParse.GetChildren(), TopNode, 0, 0), mCheckProbabilities); inputParse.AddProbability(Math.Log(mCheckProbabilities[mCompleteIndex])); inputParse.Type = TopNode; }
private void AdvanceTop(Parse inputParse, double[] buildProbabilities, double[] checkProbabilities) { buildModel.Evaluate(buildContextGenerator.GetContext(inputParse.GetChildren(), 0), buildProbabilities); inputParse.AddProbability(Math.Log(buildProbabilities[topStartIndex])); checkModel.Evaluate(checkContextGenerator.GetContext(inputParse.GetChildren(), TopNode, 0, 0), checkProbabilities); inputParse.AddProbability(Math.Log(checkProbabilities[completeIndex])); inputParse.Type = TopNode; }
private void AddTagEvents(List <SharpEntropy.TrainingEvent> events, Parse[] chunks) { List <string> tokens = new List <string>(); List <string> predicates = new List <string>(); for (int currentChunk = 0; currentChunk < chunks.Length; currentChunk++) { Parse chunkParse = chunks[currentChunk]; if (chunkParse.IsPosTag) { tokens.Add(chunkParse.ToString()); predicates.Add(chunkParse.Type); } else { Parse[] childParses = chunkParse.GetChildren(); foreach (Parse tokenParse in childParses) { tokens.Add(tokenParse.ToString()); predicates.Add(tokenParse.Type); } } } for (int currentToken = 0; currentToken < tokens.Count; currentToken++) { events.Add(new SharpEntropy.TrainingEvent(predicates[currentToken], mPosContextGenerator.GetContext(currentToken, tokens.ToArray(), predicates.ToArray(), null))); } }
///<summary> ///Advances the parse by assigning it POS tags and returns multiple tag sequences. ///</summary> ///<param name="inputParse"> ///The parse to be tagged. ///</param> ///<returns> ///Parses with different pos-tag sequence assignments. ///</returns> private Parse[] AdvanceTags(Parse inputParse) { Parse[] children = inputParse.GetChildren(); var words = children.Select(ch => ch.ToString()).ToArray(); var probabilities = new double[words.Length]; Util.Sequence[] tagSequences = posTagger.TopKSequences(words); if (tagSequences.Length == 0) { Console.Error.WriteLine("no tag sequence"); } var newParses = new Parse[tagSequences.Length]; for (int tagSequenceIndex = 0; tagSequenceIndex < tagSequences.Length; tagSequenceIndex++) { string[] tags = tagSequences[tagSequenceIndex].Outcomes.ToArray(); tagSequences[tagSequenceIndex].GetProbabilities(probabilities); newParses[tagSequenceIndex] = (Parse)inputParse.Clone(); //copies top level //if (CreateDerivationString) //{ // newParses[tagSequenceIndex].AppendDerivationBuffer(tagSequenceIndex.ToString(System.Globalization.CultureInfo.InvariantCulture)); // newParses[tagSequenceIndex].AppendDerivationBuffer("."); //} for (int wordIndex = 0; wordIndex < words.Length; wordIndex++) { Parse wordParse = children[wordIndex]; //System.Console.Error.WriteLine("inserting tag " + tags[wordIndex]); double wordProbability = probabilities[wordIndex]; newParses[tagSequenceIndex].Insert(new Parse(wordParse.Text, wordParse.Span, tags[wordIndex], wordProbability)); newParses[tagSequenceIndex].AddProbability(Math.Log(wordProbability)); //newParses[tagSequenceIndex].Show(); } } return(newParses); }
private static void GetInitialChunks(Parse inputParse, List <Parse> initialChunks) { if (inputParse.IsPosTag) { initialChunks.Add(inputParse); } else { Parse[] kids = inputParse.GetChildren(); bool AreAllKidsTags = true; for (int currentChild = 0, childCount = kids.Length; currentChild < childCount; currentChild++) { if (!(kids[currentChild]).IsPosTag) { AreAllKidsTags = false; break; } } if (AreAllKidsTags) { initialChunks.Add(inputParse); } else { for (int currentChild = 0, childCount = kids.Length; currentChild < childCount; currentChild++) { GetInitialChunks(kids[currentChild], initialChunks); } } } }
/// <summary> /// Shows the parse in the LithiumControl. /// </summary> /// <param name="parse">The parse to display</param> public void ShowParse(Parse parse) { lithiumControl.NewDiagram(); if (parse.Type == MaximumEntropyParser.TopNode) { parse = parse.GetChildren()[0]; } // Display the parse result ShapeBase root = this.lithiumControl.Root; root.Text = parse.Type; root.Visible = true; AddChildNodes(root, parse.GetChildren()); root.Expand(); this.lithiumControl.DrawTree(); }
/// <summary> /// Returns the parse nodes which are children of this node and which are pos tags. /// </summary> /// <returns> /// the parse nodes which are children of this node and which are pos tags. /// </returns> public virtual Parse[] GetTagNodes() { var tags = new List <Parse>(); var nodes = new List <Parse>(_parts); while (nodes.Count != 0) { Parse currentParse = nodes[0]; nodes.RemoveAt(0); if (currentParse.IsPosTag) { tags.Add(currentParse); } else { nodes.InsertRange(0, currentParse.GetChildren()); } } return(tags.ToArray()); }
private void AddChunkEvents(List <SharpEntropy.TrainingEvent> events, Parse[] chunks) { List <string> tokens = new List <string>(); List <string> tags = new List <string>(); List <string> predicates = new List <string>(); for (int currentChunk = 0; currentChunk < chunks.Length; currentChunk++) { Parse chunkParse = chunks[currentChunk]; if (chunkParse.IsPosTag) { tokens.Add(chunkParse.ToString()); tags.Add(chunkParse.Type); predicates.Add(MaximumEntropyParser.OtherOutcome); } else { bool isStart = true; string chunkType = chunkParse.Type; Parse[] childParses = chunkParse.GetChildren(); foreach (Parse tokenParse in childParses) { tokens.Add(tokenParse.ToString()); tags.Add(tokenParse.Type); if (isStart) { predicates.Add(MaximumEntropyParser.StartPrefix + chunkType); isStart = false; } else { predicates.Add(MaximumEntropyParser.ContinuePrefix + chunkType); } } } } for (int currentToken = 0; currentToken < tokens.Count; currentToken++) { events.Add(new SharpEntropy.TrainingEvent(predicates[currentToken], mChunkContextGenerator.GetContext(currentToken, tokens.ToArray(), tags.ToArray(), predicates.ToArray()))); } }
private static bool IsFirstChild(Parse child, Parse parent) { Parse[] kids = parent.GetChildren(); return(kids[0] == child); }
///<summary> ///Returns the top chunk sequences for the specified parse. ///</summary> ///<param name="inputParse"> ///A pos-tag assigned parse. ///</param> /// <param name="minChunkScore"> /// the minimum probability for an allowed chunk sequence. /// </param> ///<returns> ///The top chunk assignments to the specified parse. ///</returns> private Parse[] AdvanceChunks(Parse inputParse, double minChunkScore) { // chunk Parse[] children = inputParse.GetChildren(); var words = new string[children.Length]; var parseTags = new string[words.Length]; var probabilities = new double[words.Length]; for (int childParseIndex = 0, childParseCount = children.Length; childParseIndex < childParseCount; childParseIndex++) { Parse currentChildParse = children[childParseIndex]; words[childParseIndex] = currentChildParse.Head.ToString(); parseTags[childParseIndex] = currentChildParse.Type; } //System.Console.Error.WriteLine("adjusted min chunk score = " + (minChunkScore - inputParse.Probability)); Util.Sequence[] chunkerSequences = basalChunker.TopKSequences(words, parseTags, minChunkScore - inputParse.Probability); var newParses = new Parse[chunkerSequences.Length]; for (int sequenceIndex = 0, sequenceCount = chunkerSequences.Length; sequenceIndex < sequenceCount; sequenceIndex++) { newParses[sequenceIndex] = (Parse)inputParse.Clone(); //copies top level if (CreateDerivationString) { newParses[sequenceIndex].AppendDerivationBuffer(sequenceIndex.ToString(System.Globalization.CultureInfo.InvariantCulture)); newParses[sequenceIndex].AppendDerivationBuffer("."); } string[] tags = chunkerSequences[sequenceIndex].Outcomes.ToArray(); chunkerSequences[sequenceIndex].GetProbabilities(probabilities); int start = -1; int end = 0; string type = null; //System.Console.Error.Write("sequence " + sequenceIndex + " "); for (int tagIndex = 0; tagIndex <= tags.Length; tagIndex++) { //if (tagIndex != tags.Length) //{ // System.Console.Error.WriteLine(words[tagIndex] + " " + parseTags[tagIndex] + " " + tags[tagIndex] + " " + probabilities[tagIndex]); //} if (tagIndex != tags.Length) { newParses[sequenceIndex].AddProbability(Math.Log(probabilities[tagIndex])); } if (tagIndex != tags.Length && tags[tagIndex].StartsWith(ContinuePrefix)) { // if continue just update end chunking tag don't use mContinueTypeMap end = tagIndex; } else { //make previous constituent if it exists if (type != null) { //System.Console.Error.WriteLine("inserting tag " + tags[tagIndex]); Parse startParse = children[start]; Parse endParse = children[end]; //System.Console.Error.WriteLine("Putting " + type + " at " + start + "," + end + " " + newParses[sequenceIndex].Probability); var consitituents = new Parse[end - start + 1]; consitituents[0] = startParse; //consitituents[0].Label = "Start-" + type; if (end - start != 0) { consitituents[end - start] = endParse; //consitituents[end - start].Label = "Cont-" + type; for (int constituentIndex = 1; constituentIndex < end - start; constituentIndex++) { consitituents[constituentIndex] = children[constituentIndex + start]; //consitituents[constituentIndex].Label = "Cont-" + type; } } newParses[sequenceIndex].Insert(new Parse(startParse.Text, new Util.Span(startParse.Span.Start, endParse.Span.End), type, 1, headRules.GetHead(consitituents, type))); } if (tagIndex != tags.Length) { //update for new constituent if (tags[tagIndex].StartsWith(StartPrefix)) { // don't use mStartTypeMap these are chunk tags type = tags[tagIndex].Substring(StartPrefix.Length); start = tagIndex; end = tagIndex; } else { // other type = null; } } } } //newParses[sequenceIndex].Show(); //System.Console.Out.WriteLine(); } return(newParses); }
///<summary> ///Advances the specified parse and returns the an array advanced parses whose probability accounts for ///more than the speicficed amount of probability mass, Q. ///</summary> ///<param name="inputParse"> ///The parse to advance. ///</param> ///<param name="qParam"> ///The amount of probability mass that should be accounted for by the advanced parses. ///</param> private Parse[] AdvanceParses(Parse inputParse, double qParam, double[] buildProbabilities, double[] checkProbabilities) { double qOpp = 1 - qParam; Parse lastStartNode = null; // The closest previous node which has been labeled as a start node. int lastStartIndex = -1; // The index of the closest previous node which has been labeled as a start node. string lastStartType = null; // The type of the closest previous node which has been labeled as a start node. int advanceNodeIndex; // The index of the node which will be labeled in this iteration of advancing the parse. Parse advanceNode = null; // The node which will be labeled in this iteration of advancing the parse. Parse[] children = inputParse.GetChildren(); int nodeCount = children.Length; //determines which node needs to be labeled and prior labels. for (advanceNodeIndex = 0; advanceNodeIndex < nodeCount; advanceNodeIndex++) { advanceNode = children[advanceNodeIndex]; if (advanceNode.Label == null) { break; } else if (startTypeMap.ContainsKey(advanceNode.Label)) { lastStartType = startTypeMap[advanceNode.Label]; lastStartNode = advanceNode; lastStartIndex = advanceNodeIndex; } } var newParsesList = new List <Parse>(buildModel.OutcomeCount); //call build buildModel.Evaluate(buildContextGenerator.GetContext(children, advanceNodeIndex), buildProbabilities); double buildProbabilitiesSum = 0; while (buildProbabilitiesSum < qParam) { // The largest unadvanced labeling. int highestBuildProbabilityIndex = 0; for (int probabilityIndex = 1; probabilityIndex < buildProbabilities.Length; probabilityIndex++) { //for each build outcome if (buildProbabilities[probabilityIndex] > buildProbabilities[highestBuildProbabilityIndex]) { highestBuildProbabilityIndex = probabilityIndex; } } if (buildProbabilities[highestBuildProbabilityIndex] == 0) { break; } double highestBuildProbability = buildProbabilities[highestBuildProbabilityIndex]; buildProbabilities[highestBuildProbabilityIndex] = 0; //zero out so new max can be found buildProbabilitiesSum += highestBuildProbability; string tag = buildModel.GetOutcomeName(highestBuildProbabilityIndex); //System.Console.Out.WriteLine("trying " + tag + " " + buildProbabilitiesSum + " lst=" + lst); if (highestBuildProbabilityIndex == topStartIndex) { // can't have top until complete continue; } //System.Console.Error.WriteLine(probabilityIndex + " " + tag + " " + highestBuildProbability); if (startTypeMap.ContainsKey(tag)) { //update last start lastStartIndex = advanceNodeIndex; lastStartNode = advanceNode; lastStartType = startTypeMap[tag]; } else if (continueTypeMap.ContainsKey(tag)) { if (lastStartNode == null || lastStartType != continueTypeMap[tag]) { continue; //Cont must match previous start or continue } } var newParse1 = (Parse)inputParse.Clone(); //clone parse if (CreateDerivationString) { newParse1.AppendDerivationBuffer(highestBuildProbabilityIndex.ToString(System.Globalization.CultureInfo.InvariantCulture)); newParse1.AppendDerivationBuffer("-"); } newParse1.SetChild(advanceNodeIndex, tag); //replace constituent labeled newParse1.AddProbability(Math.Log(highestBuildProbability)); //check checkModel.Evaluate(checkContextGenerator.GetContext(newParse1.GetChildren(), lastStartType, lastStartIndex, advanceNodeIndex), checkProbabilities); //System.Console.Out.WriteLine("check " + mCheckProbabilities[mCompleteIndex] + " " + mCheckProbabilities[mIncompleteIndex]); Parse newParse2 = newParse1; if (checkProbabilities[completeIndex] > qOpp) { //make sure a reduce is likely newParse2 = (Parse)newParse1.Clone(); if (CreateDerivationString) { newParse2.AppendDerivationBuffer("1"); newParse2.AppendDerivationBuffer("."); } newParse2.AddProbability(System.Math.Log(checkProbabilities[1])); var constituent = new Parse[advanceNodeIndex - lastStartIndex + 1]; bool isFlat = true; //first constituent[0] = lastStartNode; if (constituent[0].Type != constituent[0].Head.Type) { isFlat = false; } //last constituent[advanceNodeIndex - lastStartIndex] = advanceNode; if (isFlat && constituent[advanceNodeIndex - lastStartIndex].Type != constituent[advanceNodeIndex - lastStartIndex].Head.Type) { isFlat = false; } //middle for (int constituentIndex = 1; constituentIndex < advanceNodeIndex - lastStartIndex; constituentIndex++) { constituent[constituentIndex] = children[constituentIndex + lastStartIndex]; if (isFlat && constituent[constituentIndex].Type != constituent[constituentIndex].Head.Type) { isFlat = false; } } if (!isFlat) { //flat chunks are done by chunker newParse2.Insert(new Parse(inputParse.Text, new Util.Span(lastStartNode.Span.Start, advanceNode.Span.End), lastStartType, checkProbabilities[1], headRules.GetHead(constituent, lastStartType))); newParsesList.Add(newParse2); } } if (checkProbabilities[incompleteIndex] > qOpp) { //make sure a shift is likely if (CreateDerivationString) { newParse1.AppendDerivationBuffer("0"); newParse1.AppendDerivationBuffer("."); } if (advanceNodeIndex != nodeCount - 1) { //can't shift last element newParse1.AddProbability(Math.Log(checkProbabilities[0])); newParsesList.Add(newParse1); } } } Parse[] newParses = newParsesList.ToArray(); return(newParses); }
private static bool IsFirstChild(Parse child, Parse parent) { Parse[] kids = parent.GetChildren(); return kids[0] == child; }
private static bool IsLastChild(Parse child, Parse parent) { Parse[] kids = parent.GetChildren(); return kids[kids.Length - 1] == child; }
private static bool IsLastChild(Parse child, Parse parent) { Parse[] kids = parent.GetChildren(); return(kids[kids.Length - 1] == child); }
private static void GetInitialChunks(Parse inputParse, List<Parse> initialChunks) { if (inputParse.IsPosTag) { initialChunks.Add(inputParse); } else { Parse[] kids = inputParse.GetChildren(); bool AreAllKidsTags = true; for (int currentChild = 0, childCount = kids.Length; currentChild < childCount; currentChild++) { if (!(kids[currentChild]).IsPosTag) { AreAllKidsTags = false; break; } } if (AreAllKidsTags) { initialChunks.Add(inputParse); } else { for (int currentChild = 0, childCount = kids.Length; currentChild < childCount; currentChild++) { GetInitialChunks(kids[currentChild], initialChunks); } } } }
private void Show(Parse p, StringBuilder buffer) { int start = p.Span.Start; if (p.Type != MaximumEntropyParser.TokenNode) { buffer.Append("("); buffer.Append(p.Type); if (mParseMap.ContainsKey(p)) { buffer.Append("#" + mParseMap[p].ToString()); } buffer.Append(" "); } Parse[] children = p.GetChildren(); foreach (Parse c in children) { Util.Span s = c.Span; if (start < s.Start) { buffer.Append(p.Text.Substring(start, (s.Start) - (start))); } Show(c, buffer); start = s.End; } buffer.Append(p.Text.Substring(start, p.Span.End - start)); if (p.Type != MaximumEntropyParser.TokenNode) { buffer.Append(")"); } }
///<summary> ///Advances the specified parse and returns the an array advanced parses whose probability accounts for ///more than the speicficed amount of probability mass, Q. ///</summary> ///<param name="inputParse"> ///The parse to advance. ///</param> ///<param name="qParam"> ///The amount of probability mass that should be accounted for by the advanced parses. ///</param> private Parse[] AdvanceParses(Parse inputParse, double qParam, double[] buildProbabilities, double[] checkProbabilities) { double qOpp = 1 - qParam; Parse lastStartNode = null; // The closest previous node which has been labeled as a start node. int lastStartIndex = -1; // The index of the closest previous node which has been labeled as a start node. string lastStartType = null; // The type of the closest previous node which has been labeled as a start node. int advanceNodeIndex; // The index of the node which will be labeled in this iteration of advancing the parse. Parse advanceNode = null; // The node which will be labeled in this iteration of advancing the parse. Parse[] children = inputParse.GetChildren(); int nodeCount = children.Length; //determines which node needs to be labeled and prior labels. for (advanceNodeIndex = 0; advanceNodeIndex < nodeCount; advanceNodeIndex++) { advanceNode = children[advanceNodeIndex]; if (advanceNode.Label == null) { break; } else if (startTypeMap.ContainsKey(advanceNode.Label)) { lastStartType = startTypeMap[advanceNode.Label]; lastStartNode = advanceNode; lastStartIndex = advanceNodeIndex; } } var newParsesList = new List<Parse>(buildModel.OutcomeCount); //call build buildModel.Evaluate(buildContextGenerator.GetContext(children, advanceNodeIndex), buildProbabilities); double buildProbabilitiesSum = 0; while (buildProbabilitiesSum < qParam) { // The largest unadvanced labeling. int highestBuildProbabilityIndex = 0; for (int probabilityIndex = 1; probabilityIndex < buildProbabilities.Length; probabilityIndex++) { //for each build outcome if (buildProbabilities[probabilityIndex] > buildProbabilities[highestBuildProbabilityIndex]) { highestBuildProbabilityIndex = probabilityIndex; } } if (buildProbabilities[highestBuildProbabilityIndex] == 0) { break; } double highestBuildProbability = buildProbabilities[highestBuildProbabilityIndex]; buildProbabilities[highestBuildProbabilityIndex] = 0; //zero out so new max can be found buildProbabilitiesSum += highestBuildProbability; string tag = buildModel.GetOutcomeName(highestBuildProbabilityIndex); //System.Console.Out.WriteLine("trying " + tag + " " + buildProbabilitiesSum + " lst=" + lst); if (highestBuildProbabilityIndex == topStartIndex) { // can't have top until complete continue; } //System.Console.Error.WriteLine(probabilityIndex + " " + tag + " " + highestBuildProbability); if (startTypeMap.ContainsKey(tag)) { //update last start lastStartIndex = advanceNodeIndex; lastStartNode = advanceNode; lastStartType = startTypeMap[tag]; } else if (continueTypeMap.ContainsKey(tag)) { if (lastStartNode == null || lastStartType != continueTypeMap[tag]) { continue; //Cont must match previous start or continue } } var newParse1 = (Parse) inputParse.Clone(); //clone parse if (CreateDerivationString) { newParse1.AppendDerivationBuffer(highestBuildProbabilityIndex.ToString(System.Globalization.CultureInfo.InvariantCulture)); newParse1.AppendDerivationBuffer("-"); } newParse1.SetChild(advanceNodeIndex, tag); //replace constituent labeled newParse1.AddProbability(Math.Log(highestBuildProbability)); //check checkModel.Evaluate(checkContextGenerator.GetContext(newParse1.GetChildren(), lastStartType, lastStartIndex, advanceNodeIndex), checkProbabilities); //System.Console.Out.WriteLine("check " + mCheckProbabilities[mCompleteIndex] + " " + mCheckProbabilities[mIncompleteIndex]); Parse newParse2 = newParse1; if (checkProbabilities[completeIndex] > qOpp) { //make sure a reduce is likely newParse2 = (Parse) newParse1.Clone(); if (CreateDerivationString) { newParse2.AppendDerivationBuffer("1"); newParse2.AppendDerivationBuffer("."); } newParse2.AddProbability(System.Math.Log(checkProbabilities[1])); var constituent = new Parse[advanceNodeIndex - lastStartIndex + 1]; bool isFlat = true; //first constituent[0] = lastStartNode; if (constituent[0].Type != constituent[0].Head.Type) { isFlat = false; } //last constituent[advanceNodeIndex - lastStartIndex] = advanceNode; if (isFlat && constituent[advanceNodeIndex - lastStartIndex].Type != constituent[advanceNodeIndex - lastStartIndex].Head.Type) { isFlat = false; } //middle for (int constituentIndex = 1; constituentIndex < advanceNodeIndex - lastStartIndex; constituentIndex++) { constituent[constituentIndex] = children[constituentIndex + lastStartIndex]; if (isFlat && constituent[constituentIndex].Type != constituent[constituentIndex].Head.Type) { isFlat = false; } } if (!isFlat) { //flat chunks are done by chunker newParse2.Insert(new Parse(inputParse.Text, new Util.Span(lastStartNode.Span.Start, advanceNode.Span.End), lastStartType, checkProbabilities[1], headRules.GetHead(constituent, lastStartType))); newParsesList.Add(newParse2); } } if (checkProbabilities[incompleteIndex] > qOpp) { //make sure a shift is likely if (CreateDerivationString) { newParse1.AppendDerivationBuffer("0"); newParse1.AppendDerivationBuffer("."); } if (advanceNodeIndex != nodeCount - 1) { //can't shift last element newParse1.AddProbability(Math.Log(checkProbabilities[0])); newParsesList.Add(newParse1); } } } Parse[] newParses = newParsesList.ToArray(); return newParses; }
///<summary> ///Advances the parse by assigning it POS tags and returns multiple tag sequences. ///</summary> ///<param name="inputParse"> ///The parse to be tagged. ///</param> ///<returns> ///Parses with different pos-tag sequence assignments. ///</returns> private Parse[] AdvanceTags(Parse inputParse) { Parse[] children = inputParse.GetChildren(); var words = children.Select(ch => ch.ToString()).ToArray(); var probabilities = new double[words.Length]; Util.Sequence[] tagSequences = posTagger.TopKSequences(words); if (tagSequences.Length == 0) { Console.Error.WriteLine("no tag sequence"); } var newParses = new Parse[tagSequences.Length]; for (int tagSequenceIndex = 0; tagSequenceIndex < tagSequences.Length; tagSequenceIndex++) { string[] tags = tagSequences[tagSequenceIndex].Outcomes.ToArray(); tagSequences[tagSequenceIndex].GetProbabilities(probabilities); newParses[tagSequenceIndex] = (Parse) inputParse.Clone(); //copies top level if (CreateDerivationString) { newParses[tagSequenceIndex].AppendDerivationBuffer(tagSequenceIndex.ToString(System.Globalization.CultureInfo.InvariantCulture)); newParses[tagSequenceIndex].AppendDerivationBuffer("."); } for (int wordIndex = 0; wordIndex < words.Length; wordIndex++) { Parse wordParse = children[wordIndex]; //System.Console.Error.WriteLine("inserting tag " + tags[wordIndex]); double wordProbability = probabilities[wordIndex]; newParses[tagSequenceIndex].Insert(new Parse(wordParse.Text, wordParse.Span, tags[wordIndex], wordProbability)); newParses[tagSequenceIndex].AddProbability(Math.Log(wordProbability)); //newParses[tagSequenceIndex].Show(); } } return newParses; }
///<summary> ///Returns the top chunk sequences for the specified parse. ///</summary> ///<param name="inputParse"> ///A pos-tag assigned parse. ///</param> /// <param name="minChunkScore"> /// the minimum probability for an allowed chunk sequence. /// </param> ///<returns> ///The top chunk assignments to the specified parse. ///</returns> private Parse[] AdvanceChunks(Parse inputParse, double minChunkScore) { // chunk Parse[] children = inputParse.GetChildren(); var words = new string[children.Length]; var parseTags = new string[words.Length]; var probabilities = new double[words.Length]; for (int childParseIndex = 0, childParseCount = children.Length; childParseIndex < childParseCount; childParseIndex++) { Parse currentChildParse = children[childParseIndex]; words[childParseIndex] = currentChildParse.Head.ToString(); parseTags[childParseIndex] = currentChildParse.Type; } //System.Console.Error.WriteLine("adjusted min chunk score = " + (minChunkScore - inputParse.Probability)); Util.Sequence[] chunkerSequences = basalChunker.TopKSequences(words, parseTags, minChunkScore - inputParse.Probability); var newParses = new Parse[chunkerSequences.Length]; for (int sequenceIndex = 0, sequenceCount = chunkerSequences.Length; sequenceIndex < sequenceCount; sequenceIndex++) { newParses[sequenceIndex] = (Parse) inputParse.Clone(); //copies top level if (CreateDerivationString) { newParses[sequenceIndex].AppendDerivationBuffer(sequenceIndex.ToString(System.Globalization.CultureInfo.InvariantCulture)); newParses[sequenceIndex].AppendDerivationBuffer("."); } string[] tags = chunkerSequences[sequenceIndex].Outcomes.ToArray(); chunkerSequences[sequenceIndex].GetProbabilities(probabilities); int start = -1; int end = 0; string type = null; //System.Console.Error.Write("sequence " + sequenceIndex + " "); for (int tagIndex = 0; tagIndex <= tags.Length; tagIndex++) { //if (tagIndex != tags.Length) //{ // System.Console.Error.WriteLine(words[tagIndex] + " " + parseTags[tagIndex] + " " + tags[tagIndex] + " " + probabilities[tagIndex]); //} if (tagIndex != tags.Length) { newParses[sequenceIndex].AddProbability(Math.Log(probabilities[tagIndex])); } if (tagIndex != tags.Length && tags[tagIndex].StartsWith(ContinuePrefix)) { // if continue just update end chunking tag don't use mContinueTypeMap end = tagIndex; } else { //make previous constituent if it exists if (type != null) { //System.Console.Error.WriteLine("inserting tag " + tags[tagIndex]); Parse startParse = children[start]; Parse endParse = children[end]; //System.Console.Error.WriteLine("Putting " + type + " at " + start + "," + end + " " + newParses[sequenceIndex].Probability); var consitituents = new Parse[end - start + 1]; consitituents[0] = startParse; //consitituents[0].Label = "Start-" + type; if (end - start != 0) { consitituents[end - start] = endParse; //consitituents[end - start].Label = "Cont-" + type; for (int constituentIndex = 1; constituentIndex < end - start; constituentIndex++) { consitituents[constituentIndex] = children[constituentIndex + start]; //consitituents[constituentIndex].Label = "Cont-" + type; } } newParses[sequenceIndex].Insert(new Parse(startParse.Text, new Util.Span(startParse.Span.Start, endParse.Span.End), type, 1, headRules.GetHead(consitituents, type))); } if (tagIndex != tags.Length) { //update for new constituent if (tags[tagIndex].StartsWith(StartPrefix)) { // don't use mStartTypeMap these are chunk tags type = tags[tagIndex].Substring(StartPrefix.Length); start = tagIndex; end = tagIndex; } else { // other type = null; } } } } //newParses[sequenceIndex].Show(); //System.Console.Out.WriteLine(); } return newParses; }
private void ShowParse() { if (txtInput.Text.Length == 0) { return; } //prepare the UI txtInput.Enabled = false; btnParse.Enabled = false; this.Cursor = Cursors.WaitCursor; lithiumControl.NewDiagram(); //do the parsing if (mParser == null) { mParser = new EnglishTreebankParser(mModelPath, true, false); } mParse = mParser.DoParse(txtInput.Text); if (mParse.Type == MaximumEntropyParser.TopNode) { mParse = mParse.GetChildren()[0]; } //display the parse result ShapeBase root = this.lithiumControl.Root; root.Text = mParse.Type; root.Visible = true; AddChildNodes(root, mParse.GetChildren()); root.Expand(); this.lithiumControl.DrawTree(); //restore the UI this.Cursor = Cursors.Default; txtInput.Enabled = true; btnParse.Enabled = true; }
private void AdvanceTop(Parse inputParse) { mBuildModel.Evaluate(mBuildContextGenerator.GetContext(inputParse.GetChildren(), 0), mBuildProbabilities); inputParse.AddProbability(System.Math.Log(mBuildProbabilities[mTopStartIndex])); mCheckModel.Evaluate(mCheckContextGenerator.GetContext(inputParse.GetChildren(), TopNode, 0, 0), mCheckProbabilities); inputParse.AddProbability(System.Math.Log(mCheckProbabilities[mCompleteIndex])); inputParse.Type = TopNode; }