private void AddChildNodes(ShapeBase currentShape, Parse[] childParses) { foreach (Parse childParse in childParses) { // if this is not a token node (token node = one of the words of the sentence) if (childParse.Type != MaximumEntropyParser.TokenNode) { ShapeBase childShape = currentShape.AddChild(childParse.Type); if (childParse.IsPosTag) { childShape.ShapeColor = Color.DarkGoldenrod; } else { childShape.ShapeColor = Color.SteelBlue; } AddChildNodes(childShape, childParse.GetChildren()); childShape.Expand(); } else { Span parseSpan = childParse.Span; string token = childParse.Text.Substring(parseSpan.Start, (parseSpan.End) - (parseSpan.Start)); ShapeBase childShape = currentShape.AddChild(token); childShape.ShapeColor = Color.Ivory; } } }
private void Surround(Parse inputParse, int index, string type, List<string> features) { StringBuilder feature = new StringBuilder(20); feature.Append("s").Append(index).Append("="); if (inputParse != null) { feature.Append(inputParse.Head.ToString()).Append("|").Append(type).Append("|").Append(inputParse.Head.Type); } else { feature.Append(mEndOfSentence).Append("|").Append(type).Append("|").Append(mEndOfSentence); } features.Add(feature.ToString()); feature.Length = 0; feature.Append("s").Append(index).Append("*="); if (inputParse != null) { feature.Append(type).Append("|").Append(inputParse.Head.Type); } else { feature.Append(type).Append("|").Append(mEndOfSentence); } features.Add(feature.ToString()); }
private string FindNames(OpenNLP.Tools.Parser.Parse sentenceParse) { if (_nameFinder == null) { _nameFinder = new OpenNLP.Tools.NameFind.EnglishNameFinder(_modelPath + "namefind\\"); } var models = new[] { "date", "location", "money", "organization", "percentage", "person", "time" }; return(_nameFinder.GetNames(models, sentenceParse)); }
private string IdentifyCoreferents(IEnumerable <string> sentences) { if (_coreferenceFinder == null) { _coreferenceFinder = new OpenNLP.Tools.Lang.English.TreebankLinker(_modelPath + "coref"); } var parsedSentences = new List <OpenNLP.Tools.Parser.Parse>(); foreach (string sentence in sentences) { OpenNLP.Tools.Parser.Parse sentenceParse = ParseSentence(sentence); parsedSentences.Add(sentenceParse); } return(_coreferenceFinder.GetCoreferenceParse(parsedSentences.ToArray())); }
private string MakeConstituent(Parse inputParse, int index) { StringBuilder feature = new StringBuilder(20); feature.Append(index).Append("="); if (inputParse != null) { if (index < 0) { feature.Append(inputParse.Label).Append("|"); } feature.Append(inputParse.Type).Append("|").Append(inputParse.Head.ToString()); } else { feature.Append(mEndOfSentence).Append("|").Append(mEndOfSentence).Append("|").Append(mEndOfSentence); } return feature.ToString(); }
/// <summary> /// Shows the parse in the LithiumControl. /// </summary> /// <param name="parse">The parse to display</param> public void ShowParse(Parse parse) { lithiumControl.NewDiagram(); if (parse.Type == MaximumEntropyParser.TopNode) { parse = parse.GetChildren()[0]; } // Display the parse result ShapeBase root = this.lithiumControl.Root; root.Text = parse.Type; root.Visible = true; AddChildNodes(root, parse.GetChildren()); root.Expand(); this.lithiumControl.DrawTree(); }
private void CheckConstituent(Parse firstParse, Parse secondParse, string type, List<string> features) { StringBuilder feature = new StringBuilder(20); feature.Append("cil=").Append(type).Append(",").Append(firstParse.Type).Append("|").Append(firstParse.Head.ToString()).Append(",").Append(secondParse.Type).Append("|").Append(secondParse.Head.ToString()); features.Add(feature.ToString()); feature.Length = 0; feature.Append("ci*l=").Append(type).Append(",").Append(firstParse.Type).Append(",").Append(secondParse.Type).Append("|").Append(secondParse.Head.ToString()); features.Add(feature.ToString()); feature.Length = 0; feature.Append("cil*=").Append(type).Append(",").Append(firstParse.Type).Append("|").Append(firstParse.Head.ToString()).Append(",").Append(secondParse.Type); features.Add(feature.ToString()); feature.Length = 0; feature.Append("ci*l*=").Append(type).Append(",").Append(firstParse.Type).Append(",").Append(secondParse.Type); features.Add(feature.ToString()); }
/// <summary> /// Computes the head parses for this parse and its sub-parses and stores this information /// in the parse data structure. /// </summary> /// <param name="rules"> /// The head rules which determine how the head of the parse is computed. /// </param> public virtual void UpdateHeads(IHeadRules rules) { if (mParts != null && mParts.Count != 0) { for (int currentPart = 0, partCount = mParts.Count; currentPart < partCount; currentPart++) { Parse currentParse = mParts[currentPart]; currentParse.UpdateHeads(rules); } mHead = rules.GetHead(mParts.ToArray(), mType); if (mHead == null) { mHead = this; } } else { mHead = this; } }
/// <summary> /// Returns the index of this specified child. /// </summary> /// <param name="child"> /// A child of this parse. /// </param> /// <returns> /// the index of this specified child or -1 if the specified child is not a child of this parse. /// </returns> public int IndexOf(Parse child) { return mParts.IndexOf(child); }
/// <summary> /// Generates a Parse structure from the specified tree-bank style parse string. /// </summary> /// <param name="parse"> /// A tree-bank style parse string. /// </param> /// <returns> /// a Parse structure for the specified tree-bank style parse string. /// </returns> public static Parse FromParseString(string parse) { StringBuilder textBuffer = new StringBuilder(); int offset = 0; Stack<Util.Pair<string, int>> parseStack = new Stack<Util.Pair<string, int>>(); List<Util.Pair<string, Util.Span>> consitutents = new List<Util.Pair<string, Util.Span>>(); for (int currentChar = 0, charCount = parse.Length; currentChar < charCount; currentChar++) { char c = parse[currentChar]; if (c == '(') { string rest = parse.Substring(currentChar + 1); string type = GetType(rest); if (type == null) { throw new ParseException("null type for: " + rest); } string token = GetToken(rest); parseStack.Push(new Util.Pair<string, int>(type, offset)); if ((object) token != null && type != "-NONE-") { consitutents.Add(new Util.Pair<string, Util.Span>(MaximumEntropyParser.TokenNode, new Util.Span(offset, offset + token.Length))); textBuffer.Append(token).Append(" "); offset += token.Length + 1; } } else if (c == ')') { Util.Pair<string, int> parts = parseStack.Pop(); string type = parts.FirstValue; if (type != "-NONE-") { int start = parts.SecondValue; consitutents.Add(new Util.Pair<string, Util.Span>(parts.FirstValue, new Util.Span(start, offset - 1))); } } } string text = textBuffer.ToString(); Parse rootParse = new Parse(text, new Util.Span(0, text.Length), MaximumEntropyParser.TopNode, 1); for (int currentConstituent = 0, constituentCount = consitutents.Count; currentConstituent < constituentCount; currentConstituent++) { Util.Pair<string, Util.Span> parts = consitutents[currentConstituent]; string type = parts.FirstValue; if (type != MaximumEntropyParser.TopNode) { Parse newConstituent = new Parse(text, parts.SecondValue, type, 1); rootParse.Insert(newConstituent); } } return rootParse; }
public Parse(string parseText, Util.Span span, string type, double probability, Parse head) : this(parseText, span, type, probability) { Head = head; }
// Methods ------------------------------ /// <summary> /// Returns a parse for the specified parse of tokens. /// </summary> /// <param name="flatParse"> /// A flat parse containing only tokens and a root node, p. /// </param> /// <param name="parseCount"> /// the number of parses required /// </param> /// <returns> /// A full parse of the specified tokens or the flat chunks of the tokens if a full parse could not be found. /// </returns> public virtual Parse[] FullParse(Parse flatParse, int parseCount) { if (CreateDerivationString) { flatParse.InitializeDerivationBuffer(); } var oldDerivationsHeap = new Util.SortedSet<Parse>(); var parses = new Util.SortedSet<Parse>(); int derivationLength = 0; int maxDerivationLength = 2 * flatParse.ChildCount + 3; oldDerivationsHeap.Add(flatParse); Parse guessParse = null; double bestComplete = - 100000; //approximating -infinity/0 in ln domain var buildProbabilities = new double[this.buildModel.OutcomeCount]; var checkProbabilities = new double[this.checkModel.OutcomeCount]; while (parses.Count < m && derivationLength < maxDerivationLength) { var newDerivationsHeap = new Util.TreeSet<Parse>(); if (oldDerivationsHeap.Count > 0) { int derivationsProcessed = 0; foreach (Parse currentParse in oldDerivationsHeap) { derivationsProcessed++; if (derivationsProcessed >= k) { break; } // for each derivation //Parse currentParse = (Parse) pi.Current; if (currentParse.Probability < bestComplete) //this parse and the ones which follow will never win, stop advancing. { break; } if (guessParse == null && derivationLength == 2) { guessParse = currentParse; } Parse[] newDerivations = null; if (0 == derivationLength) { newDerivations = AdvanceTags(currentParse); } else if (derivationLength == 1) { if (newDerivationsHeap.Count < k) { newDerivations = AdvanceChunks(currentParse, bestComplete); } else { newDerivations = AdvanceChunks(currentParse, newDerivationsHeap.Last().Probability); } } else { // derivationLength > 1 newDerivations = AdvanceParses(currentParse, q, buildProbabilities, checkProbabilities); } if (newDerivations != null) { for (int currentDerivation = 0, derivationCount = newDerivations.Length; currentDerivation < derivationCount; currentDerivation++) { if (newDerivations[currentDerivation].IsComplete) { AdvanceTop(newDerivations[currentDerivation], buildProbabilities, checkProbabilities); if (newDerivations[currentDerivation].Probability > bestComplete) { bestComplete = newDerivations[currentDerivation].Probability; } parses.Add(newDerivations[currentDerivation]); } else { newDerivationsHeap.Add(newDerivations[currentDerivation]); } } //RN added sort newDerivationsHeap.Sort(); } else { //Console.Error.WriteLine("Couldn't advance parse " + derivationLength + " stage " + derivationsProcessed + "!\n"); } } derivationLength++; oldDerivationsHeap = newDerivationsHeap; } else { break; } } //RN added sort parses.Sort(); if (parses.Count == 0) { //Console.Error.WriteLine("Couldn't find parse for: " + flatParse); //oFullParse = (Parse) mOldDerivationsHeap.First(); return new Parse[] {guessParse}; } else if (parseCount == 1) { //RN added parent adjustment Parse topParse = parses.First(); topParse.UpdateChildParents(); return new Parse[] {topParse}; } else { var topParses = new List<Parse>(parseCount); while(!parses.IsEmpty() && topParses.Count < parseCount) { Parse topParse = parses.First(); //RN added parent adjustment topParse.UpdateChildParents(); topParses.Add(topParse); parses.Remove(topParse); } return topParses.ToArray(); } }
private Parse[] DoParse(IEnumerable<string> tokens, int requestedParses) { var lineBuilder = new System.Text.StringBuilder(); var convertedTokens = new List<string>(); foreach (string rawToken in tokens) { string convertedToken = ConvertToken(rawToken); convertedTokens.Add(convertedToken); lineBuilder.Append(convertedToken).Append(" "); } if (lineBuilder.Length != 0) { string text = lineBuilder.ToString(0, lineBuilder.Length - 1); var currentParse = new Parse(text, new Util.Span(0, text.Length), "INC", 1, null); int start = 0; foreach (string token in convertedTokens) { currentParse.Insert(new Parse(text, new Util.Span(start, start + token.Length), MaximumEntropyParser.TokenNode, 0)); start += token.Length + 1; } Parse[] parses = _parser.FullParse(currentParse, requestedParses); return parses; } else { return null; } }
/// <summary> /// Returns the predictive context used to determine how the constituent at the specified index /// should be combined with other constituents. /// </summary> /// <param name="constituents"> /// The constituents which have yet to be combined into new constituents. /// </param> /// <param name="index"> /// The index of the constituent whcihi is being considered. /// </param> /// <returns> /// the context for building constituents at the specified index. /// </returns> public virtual string[] GetContext(Parse[] constituents, int index) { List <string> features = new List <string>(100); int constituentCount = constituents.Length; //default features.Add("default"); // cons(-2), cons(-1), cons(0), cons(1), cons(2) // cons(-2) Parse previousPreviousParse = null; Parse previousParse = null; Parse currentParse = null; Parse nextParse = null; Parse nextNextParse = null; if (index - 2 >= 0) { previousPreviousParse = constituents[index - 2]; } if (index - 1 >= 0) { previousParse = constituents[index - 1]; } currentParse = constituents[index]; if (index + 1 < constituentCount) { nextParse = constituents[index + 1]; } if (index + 2 < constituentCount) { nextNextParse = constituents[index + 2]; } // cons(-2), cons(-1), cons(0), cons(1), cons(2) string previousPreviousConstituent = MakeConstituent(previousPreviousParse, -2); string previousConstituent = MakeConstituent(previousParse, -1); string currentConstituent = MakeConstituent(currentParse, 0); string nextConstituent = MakeConstituent(nextParse, 1); string nextNextConstituent = MakeConstituent(nextNextParse, 2); string previousPreviousConstituentBackOff = MakeConstituentBackOff(previousPreviousParse, -2); string previousConstituentBackOff = MakeConstituentBackOff(previousParse, -1); string currentConstituentBackOff = MakeConstituentBackOff(currentParse, 0); string nextConstituentBackOff = MakeConstituentBackOff(nextParse, 1); string nextNextConstituentBackOff = MakeConstituentBackOff(nextNextParse, 2); // cons(-2), cons(-1), cons(0), cons(1), cons(2) features.Add(previousPreviousConstituent); features.Add(previousPreviousConstituentBackOff); features.Add(previousConstituent); features.Add(previousConstituentBackOff); features.Add(currentConstituent); features.Add(currentConstituentBackOff); features.Add(nextConstituent); features.Add(nextConstituentBackOff); features.Add(nextNextConstituent); features.Add(nextNextConstituentBackOff); // cons(-1,0), cons(0,1) features.Add(previousConstituent + "," + currentConstituent); features.Add(previousConstituentBackOff + "," + currentConstituent); features.Add(previousConstituent + "," + currentConstituentBackOff); features.Add(previousConstituentBackOff + "," + currentConstituentBackOff); features.Add(currentConstituent + "," + nextConstituent); features.Add(currentConstituentBackOff + "," + nextConstituent); features.Add(currentConstituent + "," + nextConstituentBackOff); features.Add(currentConstituentBackOff + "," + nextConstituentBackOff); // cons3(-2,-1,0), cons3(-1,0,1), cons3(0,1,2) features.Add(previousPreviousConstituent + "," + previousConstituent + "," + currentConstituent); features.Add(previousPreviousConstituentBackOff + "," + previousConstituent + "," + currentConstituent); features.Add(previousPreviousConstituent + "," + previousConstituentBackOff + "," + currentConstituent); features.Add(previousPreviousConstituentBackOff + "," + previousConstituentBackOff + "," + currentConstituent); features.Add(previousPreviousConstituentBackOff + "," + previousConstituentBackOff + "," + currentConstituentBackOff); features.Add(previousConstituent + "," + currentConstituent + "," + nextConstituent); features.Add(previousConstituentBackOff + "," + currentConstituent + "," + nextConstituent); features.Add(previousConstituent + "," + currentConstituent + "," + nextConstituentBackOff); features.Add(previousConstituentBackOff + "," + currentConstituent + "," + nextConstituentBackOff); features.Add(previousConstituentBackOff + "," + currentConstituentBackOff + "," + nextConstituentBackOff); features.Add(currentConstituent + "," + nextConstituent + "," + nextNextConstituent); features.Add(currentConstituent + "," + nextConstituentBackOff + "," + nextNextConstituent); features.Add(currentConstituent + "," + nextConstituent + "," + nextNextConstituentBackOff); features.Add(currentConstituent + "," + nextConstituentBackOff + "," + nextNextConstituentBackOff); features.Add(currentConstituentBackOff + "," + nextConstituentBackOff + "," + nextNextConstituentBackOff); // punct string currentParseWord = currentParse.ToString(); if (currentParseWord == "-RRB-") { for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--) { Parse testParse = constituents[parseIndex]; if (testParse.ToString() == "-LRB-") { features.Add("bracketsmatch"); break; } if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix)) { break; } } } if (currentParseWord == "-RCB-") { for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--) { Parse testParse = constituents[parseIndex]; if (testParse.ToString() == "-LCB-") { features.Add("bracketsmatch"); break; } if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix)) { break; } } } if (currentParseWord == PartsOfSpeech.RightCloseDoubleQuote) { for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--) { Parse testParse = constituents[parseIndex]; if (testParse.ToString() == PartsOfSpeech.LeftOpenDoubleQuote) { features.Add("quotesmatch"); break; } if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix)) { break; } } } if (currentParseWord == "'") { for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--) { Parse testParse = constituents[parseIndex]; if (testParse.ToString() == "`") { features.Add("quotesmatch"); break; } if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix)) { break; } } } if (currentParseWord == PartsOfSpeech.Comma) { for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--) { Parse testParse = constituents[parseIndex]; if (testParse.ToString() == PartsOfSpeech.Comma) { features.Add("iscomma"); break; } if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix)) { break; } } } if (currentParseWord == PartsOfSpeech.SentenceFinalPunctuation && index == constituentCount - 1) { for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--) { Parse testParse = constituents[parseIndex]; if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix)) { if (parseIndex == 0) { features.Add("endofsentence"); } break; } } } return(features.ToArray()); }
public string GetNames(string[] models, Parse data) { CreateModels(models); return ProcessParse(models, data); }
private void ShowParse() { if (txtInput.Text.Length == 0) { return; } //prepare the UI txtInput.Enabled = false; btnParse.Enabled = false; this.Cursor = Cursors.WaitCursor; lithiumControl.NewDiagram(); //do the parsing if (mParser == null) { mParser = new EnglishTreebankParser(mModelPath, true, false); } mParse = mParser.DoParse(txtInput.Text); if (mParse.Type == MaximumEntropyParser.TopNode) { mParse = mParse.GetChildren()[0]; } //display the parse result ShapeBase root = this.lithiumControl.Root; root.Text = mParse.Type; root.Visible = true; AddChildNodes(root, mParse.GetChildren()); root.Expand(); this.lithiumControl.DrawTree(); //restore the UI this.Cursor = Cursors.Default; txtInput.Enabled = true; btnParse.Enabled = true; }
private void AdvanceTop(Parse inputParse, double[] buildProbabilities, double[] checkProbabilities) { buildModel.Evaluate(buildContextGenerator.GetContext(inputParse.GetChildren(), 0), buildProbabilities); inputParse.AddProbability(Math.Log(buildProbabilities[topStartIndex])); checkModel.Evaluate(checkContextGenerator.GetContext(inputParse.GetChildren(), TopNode, 0, 0), checkProbabilities); inputParse.AddProbability(Math.Log(checkProbabilities[completeIndex])); inputParse.Type = TopNode; }
public Parse(string parseText, Util.Span span, string type, double probability, Parse head) : this(parseText, span, type, probability) { mHead = head; }
///<summary> ///Advances the specified parse and returns the an array advanced parses whose probability accounts for ///more than the speicficed amount of probability mass, Q. ///</summary> ///<param name="inputParse"> ///The parse to advance. ///</param> ///<param name="qParam"> ///The amount of probability mass that should be accounted for by the advanced parses. ///</param> private Parse[] AdvanceParses(Parse inputParse, double qParam, double[] buildProbabilities, double[] checkProbabilities) { double qOpp = 1 - qParam; Parse lastStartNode = null; // The closest previous node which has been labeled as a start node. int lastStartIndex = -1; // The index of the closest previous node which has been labeled as a start node. string lastStartType = null; // The type of the closest previous node which has been labeled as a start node. int advanceNodeIndex; // The index of the node which will be labeled in this iteration of advancing the parse. Parse advanceNode = null; // The node which will be labeled in this iteration of advancing the parse. Parse[] children = inputParse.GetChildren(); int nodeCount = children.Length; //determines which node needs to be labeled and prior labels. for (advanceNodeIndex = 0; advanceNodeIndex < nodeCount; advanceNodeIndex++) { advanceNode = children[advanceNodeIndex]; if (advanceNode.Label == null) { break; } else if (startTypeMap.ContainsKey(advanceNode.Label)) { lastStartType = startTypeMap[advanceNode.Label]; lastStartNode = advanceNode; lastStartIndex = advanceNodeIndex; } } var newParsesList = new List<Parse>(buildModel.OutcomeCount); //call build buildModel.Evaluate(buildContextGenerator.GetContext(children, advanceNodeIndex), buildProbabilities); double buildProbabilitiesSum = 0; while (buildProbabilitiesSum < qParam) { // The largest unadvanced labeling. int highestBuildProbabilityIndex = 0; for (int probabilityIndex = 1; probabilityIndex < buildProbabilities.Length; probabilityIndex++) { //for each build outcome if (buildProbabilities[probabilityIndex] > buildProbabilities[highestBuildProbabilityIndex]) { highestBuildProbabilityIndex = probabilityIndex; } } if (buildProbabilities[highestBuildProbabilityIndex] == 0) { break; } double highestBuildProbability = buildProbabilities[highestBuildProbabilityIndex]; buildProbabilities[highestBuildProbabilityIndex] = 0; //zero out so new max can be found buildProbabilitiesSum += highestBuildProbability; string tag = buildModel.GetOutcomeName(highestBuildProbabilityIndex); //System.Console.Out.WriteLine("trying " + tag + " " + buildProbabilitiesSum + " lst=" + lst); if (highestBuildProbabilityIndex == topStartIndex) { // can't have top until complete continue; } //System.Console.Error.WriteLine(probabilityIndex + " " + tag + " " + highestBuildProbability); if (startTypeMap.ContainsKey(tag)) { //update last start lastStartIndex = advanceNodeIndex; lastStartNode = advanceNode; lastStartType = startTypeMap[tag]; } else if (continueTypeMap.ContainsKey(tag)) { if (lastStartNode == null || lastStartType != continueTypeMap[tag]) { continue; //Cont must match previous start or continue } } var newParse1 = (Parse) inputParse.Clone(); //clone parse if (CreateDerivationString) { newParse1.AppendDerivationBuffer(highestBuildProbabilityIndex.ToString(System.Globalization.CultureInfo.InvariantCulture)); newParse1.AppendDerivationBuffer("-"); } newParse1.SetChild(advanceNodeIndex, tag); //replace constituent labeled newParse1.AddProbability(Math.Log(highestBuildProbability)); //check checkModel.Evaluate(checkContextGenerator.GetContext(newParse1.GetChildren(), lastStartType, lastStartIndex, advanceNodeIndex), checkProbabilities); //System.Console.Out.WriteLine("check " + mCheckProbabilities[mCompleteIndex] + " " + mCheckProbabilities[mIncompleteIndex]); Parse newParse2 = newParse1; if (checkProbabilities[completeIndex] > qOpp) { //make sure a reduce is likely newParse2 = (Parse) newParse1.Clone(); if (CreateDerivationString) { newParse2.AppendDerivationBuffer("1"); newParse2.AppendDerivationBuffer("."); } newParse2.AddProbability(System.Math.Log(checkProbabilities[1])); var constituent = new Parse[advanceNodeIndex - lastStartIndex + 1]; bool isFlat = true; //first constituent[0] = lastStartNode; if (constituent[0].Type != constituent[0].Head.Type) { isFlat = false; } //last constituent[advanceNodeIndex - lastStartIndex] = advanceNode; if (isFlat && constituent[advanceNodeIndex - lastStartIndex].Type != constituent[advanceNodeIndex - lastStartIndex].Head.Type) { isFlat = false; } //middle for (int constituentIndex = 1; constituentIndex < advanceNodeIndex - lastStartIndex; constituentIndex++) { constituent[constituentIndex] = children[constituentIndex + lastStartIndex]; if (isFlat && constituent[constituentIndex].Type != constituent[constituentIndex].Head.Type) { isFlat = false; } } if (!isFlat) { //flat chunks are done by chunker newParse2.Insert(new Parse(inputParse.Text, new Util.Span(lastStartNode.Span.Start, advanceNode.Span.End), lastStartType, checkProbabilities[1], headRules.GetHead(constituent, lastStartType))); newParsesList.Add(newParse2); } } if (checkProbabilities[incompleteIndex] > qOpp) { //make sure a shift is likely if (CreateDerivationString) { newParse1.AppendDerivationBuffer("0"); newParse1.AppendDerivationBuffer("."); } if (advanceNodeIndex != nodeCount - 1) { //can't shift last element newParse1.AddProbability(Math.Log(checkProbabilities[0])); newParsesList.Add(newParse1); } } } Parse[] newParses = newParsesList.ToArray(); return newParses; }
/// <summary> /// Returns the deepest shared parent of this node and the specified node. /// If the nodes are identical then their parent is returned. /// If one node is the parent of the other then the parent node is returned. /// </summary> /// <param name="node"> /// The node from which parents are compared to this node's parents. /// </param> /// <returns> /// the deepest shared parent of this node and the specified node. /// </returns> public virtual Parse GetCommonParent(Parse node) { if (this == node) { return this.Parent; } Util.HashSet<Parse> parents = new Util.HashSet<Parse>(); Parse parentParse = this; while (parentParse != null) { parents.Add(parentParse); parentParse = parentParse.Parent; } while (node != null) { if (parents.Contains(node)) { return node; } node = node.Parent; } return null; }
///<summary> ///Returns the top chunk sequences for the specified parse. ///</summary> ///<param name="inputParse"> ///A pos-tag assigned parse. ///</param> /// <param name="minChunkScore"> /// the minimum probability for an allowed chunk sequence. /// </param> ///<returns> ///The top chunk assignments to the specified parse. ///</returns> private Parse[] AdvanceChunks(Parse inputParse, double minChunkScore) { // chunk Parse[] children = inputParse.GetChildren(); var words = new string[children.Length]; var parseTags = new string[words.Length]; var probabilities = new double[words.Length]; for (int childParseIndex = 0, childParseCount = children.Length; childParseIndex < childParseCount; childParseIndex++) { Parse currentChildParse = children[childParseIndex]; words[childParseIndex] = currentChildParse.Head.ToString(); parseTags[childParseIndex] = currentChildParse.Type; } //System.Console.Error.WriteLine("adjusted min chunk score = " + (minChunkScore - inputParse.Probability)); Util.Sequence[] chunkerSequences = basalChunker.TopKSequences(words, parseTags, minChunkScore - inputParse.Probability); var newParses = new Parse[chunkerSequences.Length]; for (int sequenceIndex = 0, sequenceCount = chunkerSequences.Length; sequenceIndex < sequenceCount; sequenceIndex++) { newParses[sequenceIndex] = (Parse) inputParse.Clone(); //copies top level if (CreateDerivationString) { newParses[sequenceIndex].AppendDerivationBuffer(sequenceIndex.ToString(System.Globalization.CultureInfo.InvariantCulture)); newParses[sequenceIndex].AppendDerivationBuffer("."); } string[] tags = chunkerSequences[sequenceIndex].Outcomes.ToArray(); chunkerSequences[sequenceIndex].GetProbabilities(probabilities); int start = -1; int end = 0; string type = null; //System.Console.Error.Write("sequence " + sequenceIndex + " "); for (int tagIndex = 0; tagIndex <= tags.Length; tagIndex++) { //if (tagIndex != tags.Length) //{ // System.Console.Error.WriteLine(words[tagIndex] + " " + parseTags[tagIndex] + " " + tags[tagIndex] + " " + probabilities[tagIndex]); //} if (tagIndex != tags.Length) { newParses[sequenceIndex].AddProbability(Math.Log(probabilities[tagIndex])); } if (tagIndex != tags.Length && tags[tagIndex].StartsWith(ContinuePrefix)) { // if continue just update end chunking tag don't use mContinueTypeMap end = tagIndex; } else { //make previous constituent if it exists if (type != null) { //System.Console.Error.WriteLine("inserting tag " + tags[tagIndex]); Parse startParse = children[start]; Parse endParse = children[end]; //System.Console.Error.WriteLine("Putting " + type + " at " + start + "," + end + " " + newParses[sequenceIndex].Probability); var consitituents = new Parse[end - start + 1]; consitituents[0] = startParse; //consitituents[0].Label = "Start-" + type; if (end - start != 0) { consitituents[end - start] = endParse; //consitituents[end - start].Label = "Cont-" + type; for (int constituentIndex = 1; constituentIndex < end - start; constituentIndex++) { consitituents[constituentIndex] = children[constituentIndex + start]; //consitituents[constituentIndex].Label = "Cont-" + type; } } newParses[sequenceIndex].Insert(new Parse(startParse.Text, new Util.Span(startParse.Span.Start, endParse.Span.End), type, 1, headRules.GetHead(consitituents, type))); } if (tagIndex != tags.Length) { //update for new constituent if (tags[tagIndex].StartsWith(StartPrefix)) { // don't use mStartTypeMap these are chunk tags type = tags[tagIndex].Substring(StartPrefix.Length); start = tagIndex; end = tagIndex; } else { // other type = null; } } } } //newParses[sequenceIndex].Show(); //System.Console.Out.WriteLine(); } return newParses; }
///<summary> ///Inserts the specified constituent into this parse based on its text span. This ///method assumes that the specified constituent can be inserted into this parse. ///</summary> ///<param name="constituent"> ///The constituent to be inserted. ///</param> public virtual void Insert(Parse constituent) { Util.Span constituentSpan = constituent.mSpan; if (mSpan.Contains(constituentSpan)) { int currentPart; int partCount = mParts.Count; for (currentPart = 0; currentPart < partCount; currentPart++) { Parse subPart = mParts[currentPart]; Util.Span subPartSpan = subPart.mSpan; if (subPartSpan.Start > constituentSpan.End) { break; } // constituent Contains subPart else if (constituentSpan.Contains(subPartSpan)) { mParts.RemoveAt(currentPart); currentPart--; constituent.mParts.Add(subPart); subPart.Parent = constituent; partCount = mParts.Count; } else if (subPartSpan.Contains(constituentSpan)) { //System.Console.WriteLine("Parse.insert:subPart contains con"); subPart.Insert(constituent); return; } } mParts.Insert(currentPart, constituent); constituent.Parent = this; } else { throw new ParseException("Inserting constituent not contained in the sentence!"); } }
///<summary> ///Advances the parse by assigning it POS tags and returns multiple tag sequences. ///</summary> ///<param name="inputParse"> ///The parse to be tagged. ///</param> ///<returns> ///Parses with different pos-tag sequence assignments. ///</returns> private Parse[] AdvanceTags(Parse inputParse) { Parse[] children = inputParse.GetChildren(); var words = children.Select(ch => ch.ToString()).ToArray(); var probabilities = new double[words.Length]; Util.Sequence[] tagSequences = posTagger.TopKSequences(words); if (tagSequences.Length == 0) { Console.Error.WriteLine("no tag sequence"); } var newParses = new Parse[tagSequences.Length]; for (int tagSequenceIndex = 0; tagSequenceIndex < tagSequences.Length; tagSequenceIndex++) { string[] tags = tagSequences[tagSequenceIndex].Outcomes.ToArray(); tagSequences[tagSequenceIndex].GetProbabilities(probabilities); newParses[tagSequenceIndex] = (Parse) inputParse.Clone(); //copies top level if (CreateDerivationString) { newParses[tagSequenceIndex].AppendDerivationBuffer(tagSequenceIndex.ToString(System.Globalization.CultureInfo.InvariantCulture)); newParses[tagSequenceIndex].AppendDerivationBuffer("."); } for (int wordIndex = 0; wordIndex < words.Length; wordIndex++) { Parse wordParse = children[wordIndex]; //System.Console.Error.WriteLine("inserting tag " + tags[wordIndex]); double wordProbability = probabilities[wordIndex]; newParses[tagSequenceIndex].Insert(new Parse(wordParse.Text, wordParse.Span, tags[wordIndex], wordProbability)); newParses[tagSequenceIndex].AddProbability(Math.Log(wordProbability)); //newParses[tagSequenceIndex].Show(); } } return newParses; }
/// <summary> /// Returns predictive context for deciding whether the specified constituents between the specified start and end index /// can be combined to form a new constituent of the specified type. /// </summary> /// <param name="constituents"> /// The constituents which have yet to be combined into new constituents. /// </param> /// <param name="type"> /// The type of the new constituent proposed. /// </param> /// <param name="firstConstituent"> /// The first constituent of the proposed constituent. /// </param> /// <param name="lastConstituent"> /// The last constituent of the proposed constituent. /// </param> /// <returns> /// The predictive context for deciding whether a new constituent should be created. /// </returns> public virtual string[] GetContext(Parse[] constituents, string type, int firstConstituent, int lastConstituent) { int constituentCount = constituents.Length; List<string> features = new List<string>(100); //default features.Add("default"); Parse startParse = constituents[firstConstituent]; Parse endParse = constituents[lastConstituent]; CheckConstituent(startParse, "begin", type, features); CheckConstituent(endParse, "last", type, features); StringBuilder production = new StringBuilder(20); production.Append(type).Append("->"); for (int parseIndex = firstConstituent; parseIndex < lastConstituent; parseIndex++) { Parse testParse = constituents[parseIndex]; CheckConstituent(testParse, endParse, type, features); production.Append(testParse.Type).Append(","); } production.Append(endParse.Type); features.Add(production.ToString()); Parse previousPreviousParse = null; Parse previousParse = null; Parse nextParse = null; Parse nextNextParse = null; if (firstConstituent - 2 >= 0) { previousPreviousParse = constituents[firstConstituent - 2]; } if (firstConstituent - 1 >= 0) { previousParse = constituents[firstConstituent - 1]; } if (lastConstituent + 1 < constituentCount) { nextParse = constituents[lastConstituent + 1]; } if (lastConstituent + 2 < constituentCount) { nextNextParse = constituents[lastConstituent + 2]; } Surround(previousParse, - 1, type, features); Surround(previousPreviousParse, - 2, type, features); Surround(nextParse, 1, type, features); Surround(nextNextParse, 2, type, features); return features.ToArray(); }
public string DoParse(string[] lines, int requestedParses) { System.Text.StringBuilder parseStringBuilder = new System.Text.StringBuilder(); foreach (string line in lines) { System.Text.StringBuilder lineBuilder = new System.Text.StringBuilder(); string[] rawTokens = mTokenizer.Tokenize(line); ArrayList tokens = new ArrayList(); foreach (string rawToken in rawTokens) { string convertedToken = ConvertToken(rawToken); tokens.Add(convertedToken); lineBuilder.Append(convertedToken).Append(" "); } if (lineBuilder.Length != 0) { string text = lineBuilder.ToString(0, lineBuilder.Length - 1).ToString(); Parse currentParse = new Parse(text, new Util.Span(0, text.Length), "INC", 1, null); int start = 0; foreach (string token in tokens) { currentParse.Insert(new Parse(text, new Util.Span(start, start + token.Length), MaximumEntropyParser.TokenNode, 0)); start += token.Length + 1; } Parse[] parses = mParser.FullParse(currentParse, requestedParses); for (int currentParseIndex = 0, parseCount = parses.Length; currentParseIndex < parseCount; currentParseIndex++) { if (requestedParses > 1) { lineBuilder.Append(currentParse.ToString() + " " + parses[currentParseIndex].Probability.ToString(System.Globalization.CultureInfo.InvariantCulture) + " "); } lineBuilder.Append(parses[currentParseIndex].Show()); parseStringBuilder.Append(lineBuilder.ToString()); } } else { parseStringBuilder.Append("\r\n"); } } return parseStringBuilder.ToString(); }
private void CheckConstituent(Parse inputParse, string index, string type, List<string> features) { StringBuilder feature = new StringBuilder(20); feature.Append("c").Append(index).Append("=").Append(inputParse.Type).Append("|").Append(inputParse.Head.ToString()).Append("|").Append(type); features.Add(feature.ToString()); feature.Length = 0; feature.Append("c").Append(index).Append("*=").Append(inputParse.Type).Append("|").Append(type); features.Add(feature.ToString()); }
public Parse[] DoParse(string line, int requestedParses) { System.Text.StringBuilder lineBuilder = new System.Text.StringBuilder(); string[] rawTokens = mTokenizer.Tokenize(line); ArrayList tokens = new ArrayList(); foreach (string rawToken in rawTokens) { string convertedToken = ConvertToken(rawToken); tokens.Add(convertedToken); lineBuilder.Append(convertedToken).Append(" "); } if (lineBuilder.Length != 0) { string text = lineBuilder.ToString(0, lineBuilder.Length - 1).ToString(); Parse currentParse = new Parse(text, new Util.Span(0, text.Length), "INC", 1, null); int start = 0; foreach (string token in tokens) { currentParse.Insert(new Parse(text, new Util.Span(start, start + token.Length), MaximumEntropyParser.TokenNode, 0)); start += token.Length + 1; } Parse[] parses = mParser.FullParse(currentParse, requestedParses); return parses; } else { return null; } }
public virtual Parse GetHead(Parse[] constituents, string type) { if (constituents[0].Type == MaximumEntropyParser.TokenNode) { return null; } HeadRule headRule; if (type == "NP" || type == "NX") { string[] tags1 = new string[]{"NN", "NNP", "NNPS", "NNS", "NX", "JJR", "POS"}; for (int currentConstituent = constituents.Length - 1; currentConstituent >= 0; currentConstituent--) { for (int currentTag = tags1.Length - 1; currentTag >= 0; currentTag--) { if (constituents[currentConstituent].Type.Equals(tags1[currentTag])) { return (constituents[currentConstituent].Head); } } } for (int currentConstituent = 0; currentConstituent < constituents.Length; currentConstituent++) { if (constituents[currentConstituent].Type.Equals("NP")) { return (constituents[currentConstituent].Head); } } string[] tags2 = new string[]{"$", "ADJP", "PRN"}; for (int currentConstituent = constituents.Length - 1; currentConstituent >= 0; currentConstituent--) { for (int currentTag = tags2.Length - 1; currentTag >= 0; currentTag--) { if (constituents[currentConstituent].Type.Equals(tags2[currentTag])) { return (constituents[currentConstituent].Head); } } } string[] tags3 = new string[]{"JJ", "JJS", "RB", "QP"}; for (int currentConstituent = constituents.Length - 1; currentConstituent >= 0; currentConstituent--) { for (int currentTag = tags3.Length - 1; currentTag >= 0; currentTag--) { if (constituents[currentConstituent].Type.Equals(tags3[currentTag])) { return (constituents[currentConstituent].Head); } } } return (constituents[constituents.Length - 1].Head); } else { if (mHeadRules.ContainsKey(type)) { headRule = mHeadRules[type]; string[] tags = headRule.Tags; int constituentCount = constituents.Length; int tagCount = tags.Length; if (headRule.LeftToRight) { for (int currentTag = 0; currentTag < tagCount; currentTag++) { for (int currentConstituent = 0; currentConstituent < constituentCount; currentConstituent++) { if (constituents[currentConstituent].Type.Equals(tags[currentTag])) { return (constituents[currentConstituent].Head); } } } return (constituents[0].Head); } else { for (int currentTag = 0; currentTag < tagCount; currentTag++) { for (int currentConstituent = constituentCount - 1; currentConstituent >= 0; currentConstituent--) { if (constituents[currentConstituent].Type.Equals(tags[currentTag])) { return (constituents[currentConstituent].Head); } } } return (constituents[constituentCount - 1].Head); } } } return (constituents[constituents.Length - 1].Head); }
/// <summary> /// Returns the predictive context used to determine how the constituent at the specified index /// should be combined with other constituents. /// </summary> /// <param name="constituents"> /// The constituents which have yet to be combined into new constituents. /// </param> /// <param name="index"> /// The index of the constituent whcihi is being considered. /// </param> /// <returns> /// the context for building constituents at the specified index. /// </returns> public virtual string[] GetContext(Parse[] constituents, int index) { List<string> features = new List<string>(100); int constituentCount = constituents.Length; //default features.Add("default"); // cons(-2), cons(-1), cons(0), cons(1), cons(2) // cons(-2) Parse previousPreviousParse = null; Parse previousParse = null; Parse currentParse = null; Parse nextParse = null; Parse nextNextParse = null; if (index - 2 >= 0) { previousPreviousParse = constituents[index - 2]; } if (index - 1 >= 0) { previousParse = constituents[index - 1]; } currentParse = constituents[index]; if (index + 1 < constituentCount) { nextParse = constituents[index + 1]; } if (index + 2 < constituentCount) { nextNextParse = constituents[index + 2]; } // cons(-2), cons(-1), cons(0), cons(1), cons(2) string previousPreviousConstituent = MakeConstituent(previousPreviousParse, - 2); string previousConstituent = MakeConstituent(previousParse, - 1); string currentConstituent = MakeConstituent(currentParse, 0); string nextConstituent = MakeConstituent(nextParse, 1); string nextNextConstituent = MakeConstituent(nextNextParse, 2); string previousPreviousConstituentBackOff = MakeConstituentBackOff(previousPreviousParse, - 2); string previousConstituentBackOff = MakeConstituentBackOff(previousParse, - 1); string currentConstituentBackOff = MakeConstituentBackOff(currentParse, 0); string nextConstituentBackOff = MakeConstituentBackOff(nextParse, 1); string nextNextConstituentBackOff = MakeConstituentBackOff(nextNextParse, 2); // cons(-2), cons(-1), cons(0), cons(1), cons(2) features.Add(previousPreviousConstituent); features.Add(previousPreviousConstituentBackOff); features.Add(previousConstituent); features.Add(previousConstituentBackOff); features.Add(currentConstituent); features.Add(currentConstituentBackOff); features.Add(nextConstituent); features.Add(nextConstituentBackOff); features.Add(nextNextConstituent); features.Add(nextNextConstituentBackOff); // cons(-1,0), cons(0,1) features.Add(previousConstituent + "," + currentConstituent); features.Add(previousConstituentBackOff + "," + currentConstituent); features.Add(previousConstituent + "," + currentConstituentBackOff); features.Add(previousConstituentBackOff + "," + currentConstituentBackOff); features.Add(currentConstituent + "," + nextConstituent); features.Add(currentConstituentBackOff + "," + nextConstituent); features.Add(currentConstituent + "," + nextConstituentBackOff); features.Add(currentConstituentBackOff + "," + nextConstituentBackOff); // cons3(-2,-1,0), cons3(-1,0,1), cons3(0,1,2) features.Add(previousPreviousConstituent + "," + previousConstituent + "," + currentConstituent); features.Add(previousPreviousConstituentBackOff + "," + previousConstituent + "," + currentConstituent); features.Add(previousPreviousConstituent + "," + previousConstituentBackOff + "," + currentConstituent); features.Add(previousPreviousConstituentBackOff + "," + previousConstituentBackOff + "," + currentConstituent); features.Add(previousPreviousConstituentBackOff + "," + previousConstituentBackOff + "," + currentConstituentBackOff); features.Add(previousConstituent + "," + currentConstituent + "," + nextConstituent); features.Add(previousConstituentBackOff + "," + currentConstituent + "," + nextConstituent); features.Add(previousConstituent + "," + currentConstituent + "," + nextConstituentBackOff); features.Add(previousConstituentBackOff + "," + currentConstituent + "," + nextConstituentBackOff); features.Add(previousConstituentBackOff + "," + currentConstituentBackOff + "," + nextConstituentBackOff); features.Add(currentConstituent + "," + nextConstituent + "," + nextNextConstituent); features.Add(currentConstituent + "," + nextConstituentBackOff + "," + nextNextConstituent); features.Add(currentConstituent + "," + nextConstituent + "," + nextNextConstituentBackOff); features.Add(currentConstituent + "," + nextConstituentBackOff + "," + nextNextConstituentBackOff); features.Add(currentConstituentBackOff + "," + nextConstituentBackOff + "," + nextNextConstituentBackOff); // punct string currentParseWord = currentParse.ToString(); if (currentParseWord == "-RRB-") { for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--) { Parse testParse = constituents[parseIndex]; if (testParse.ToString() == "-LRB-") { features.Add("bracketsmatch"); break; } if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix)) { break; } } } if (currentParseWord == "-RCB-") { for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--) { Parse testParse = constituents[parseIndex]; if (testParse.ToString() == "-LCB-") { features.Add("bracketsmatch"); break; } if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix)) { break; } } } if (currentParseWord == "''") { for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--) { Parse testParse = constituents[parseIndex]; if (testParse.ToString() == "``") { features.Add("quotesmatch"); break; } if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix)) { break; } } } if (currentParseWord == "'") { for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--) { Parse testParse = constituents[parseIndex]; if (testParse.ToString() == "`") { features.Add("quotesmatch"); break; } if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix)) { break; } } } if (currentParseWord == ",") { for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--) { Parse testParse = constituents[parseIndex]; if (testParse.ToString() == ",") { features.Add("iscomma"); break; } if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix)) { break; } } } if (currentParseWord == (".") && index == constituentCount - 1) { for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--) { Parse testParse = constituents[parseIndex]; if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix)) { if (parseIndex == 0) { features.Add("endofsentence"); } break; } } } return features.ToArray(); }
private string ProcessParse(string[] models, Parse lineParse) { System.Text.StringBuilder output = new System.Text.StringBuilder(); string[][] finderTags = new string[models.Length][]; Dictionary<string, string>[] previousTokenMaps = CreatePreviousTokenMaps(models); Parse[] tokenParses = lineParse.GetTagNodes(); string[] tokens = new string[tokenParses.Length]; for (int currentToken = 0; currentToken < tokens.Length; currentToken++) { tokens[currentToken] = tokenParses[currentToken].ToString(); } for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++) { MaximumEntropyNameFinder finder = mFinders[models[currentFinder]]; finderTags[currentFinder] = finder.Find(tokens, previousTokenMaps[currentFinder]); } UpdatePreviousTokenMaps(previousTokenMaps, tokens, finderTags); for (int currentFinder = 0, finderCount = models.Length; currentFinder < finderCount; currentFinder++) { int start = -1; List<Span> names = new List<Span>(5); for (int currentToken = 0, tokenCount = tokens.Length; currentToken < tokenCount; currentToken++) { if ((finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start) || (finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Other)) { if (start != -1) { names.Add(new Span(start, currentToken - 1)); } start = -1; } if (finderTags[currentFinder][currentToken] == MaximumEntropyNameFinder.Start) { start = currentToken; } } if (start != - 1) { names.Add(new Span(start, tokens.Length - 1)); } AddNames(models[currentFinder], names, tokenParses, lineParse); } output.Append(lineParse.Show()); output.Append("\r\n"); return output.ToString(); }
/// <summary> /// Identitifies coreference relationships for parsed input. /// </summary> /// <param name="parsedSentences">Array of parsed sentences.</param> /// <returns></returns> public string GetCoreferenceParse(Parse[] parsedSentences) { int sentenceNumber = 0; var document = new List<Mention>(); var parses = new List<Parse>(); var output = new StringBuilder(); foreach (Parse lineParse in parsedSentences) { if (lineParse == null) { DiscourseEntity[] entities = GetEntitiesFromMentions(document.ToArray()); output.Append(new CoreferenceParse(parses, entities).Show()); sentenceNumber = 0; document.Clear(); parses.Clear(); } else { parses.Add(lineParse); Mention[] extents = MentionFinder.GetMentions(new DefaultParse(lineParse, sentenceNumber)); //construct new parses for mentions which don't have constituents. foreach (Mention mention in extents) { if (mention.Parse == null) { var snp = new Parse(lineParse.Text, mention.Span, "NML", 1.0); lineParse.Insert(snp); mention.Parse = new DefaultParse(snp, sentenceNumber); } } document.AddRange(extents); sentenceNumber++; } } if (document.Count > 0) { DiscourseEntity[] entities = GetEntitiesFromMentions(document.ToArray()); //showEntities(entities); output.Append((new CoreferenceParse(parses, entities)).Show()); } return output.ToString(); }
private void AddNames(string tag, List<Span>names, Parse[] tokens, Parse lineParse) { for (int currentName = 0, nameCount = names.Count; currentName < nameCount; currentName++) { Span nameTokenSpan = names[currentName]; Parse startToken = tokens[nameTokenSpan.Start]; Parse endToken = tokens[nameTokenSpan.End]; Parse commonParent = startToken.GetCommonParent(endToken); if (commonParent != null) { Span nameSpan = new Span(startToken.Span.Start, endToken.Span.End); if (nameSpan.Equals(commonParent.Span)) { commonParent.Insert(new Parse(commonParent.Text, nameSpan, tag, 1.0)); } else { Parse[] kids = commonParent.GetChildren(); bool crossingKids = false; for (int currentKid = 0, kidCount = kids.Length; currentKid < kidCount; currentKid++) { if (nameSpan.Crosses(kids[currentKid].Span)) { crossingKids = true; } } if (!crossingKids) { commonParent.Insert(new Parse(commonParent.Text, nameSpan, tag, 1.0)); } else { if (commonParent.Type == "NP") { Parse[] grandKids = kids[0].GetChildren(); if (grandKids.Length > 1 && nameSpan.Contains(grandKids[grandKids.Length - 1].Span)) { commonParent.Insert(new Parse(commonParent.Text, commonParent.Span, tag, 1.0)); } } } } } } }
private void Show(Parse p, StringBuilder buffer) { int start = p.Span.Start; if (p.Type != MaximumEntropyParser.TokenNode) { buffer.Append("("); buffer.Append(p.Type); if (mParseMap.ContainsKey(p)) { buffer.Append("#" + mParseMap[p].ToString()); } buffer.Append(" "); } Parse[] children = p.GetChildren(); foreach (Parse c in children) { Util.Span s = c.Span; if (start < s.Start) { buffer.Append(p.Text.Substring(start, (s.Start) - (start))); } Show(c, buffer); start = s.End; } buffer.Append(p.Text.Substring(start, p.Span.End - start)); if (p.Type != MaximumEntropyParser.TokenNode) { buffer.Append(")"); } }
public Parse(string parseText, Util.Span span, string type, double probability) { mText = parseText; mSpan = span; mType = type; mProbability = probability; mHead = this; mParts = new List<Parse>(); mLabel = null; mParent = null; }
/// <summary> /// Returns the index of this specified child. /// </summary> /// <param name="child"> /// A child of this parse. /// </param> /// <returns> /// the index of this specified child or -1 if the specified child is not a child of this parse. /// </returns> public int IndexOf(Parse child) { return(_parts.IndexOf(child)); }