/// <summary> /// Copies particular labels from the current node to corresponding nodes in another tree /// </summary> /// <param name="n">Tree to which to copy labels</param> /// <param name="labels">Labels to copy</param> public void CopyLabelsTo(NomBankNode n, params NomBankNodeLabel[] labels) { foreach (NomBankNodeLabel label in labels) { foreach (NomBankNode fromNode in GetDescendants(label)) { int startToken = fromNode.FirstToken.TokenNumber; int endToken = fromNode.LastToken.TokenNumber; NomBankNode toNode = n.GetLowestCommonAncestorOfTokens(startToken, endToken) as NomBankNode; toNode.AddLabel(label.Copy(), true); } } }
/// <summary> /// Applies the current type to a node, or does nothing if the node already has the type /// </summary> /// <param name="n">Node to apply type to</param> private void ApplyType(NomBankNode n) { // don't reapply type if (n.HasLabel(_label)) { return; } // add label without hyphen indexes, and don't bother synching with root collection (this collection will be added to the root) NomBankNodeLabel label = _label.Copy(); label.HyphenIndexes.Clear(); n.AddLabel(label, false); // only apply hyphen indexes to text with a hyphen or slash string nodeText = n.SurfaceText; char[] hyphenSlash = new char[] { '-', '/' }; if (nodeText.IndexOfAny(hyphenSlash) == -1) { return; } // try to apply each hyphen index foreach (NomBankNodeLabel.HyphenationIndex hyphenIndex in _label.HyphenIndexes) { int numParts = nodeText.Split(hyphenSlash, StringSplitOptions.RemoveEmptyEntries).Length; // get numeric hyphen index int index = int.Parse(hyphenIndex.ToString().Substring(1)); // add index to node's label if applicable if (index < numParts) { if (_appliedIndexes.Contains(hyphenIndex)) { throw new Exception("Hyphen index applied more than once"); } label.AddHyphenIndex(hyphenIndex); _appliedIndexes.Add(hyphenIndex); } } }
/// <summary> /// Gets training instance parse tree as a NomBank node, marking predicates and support verbs /// as determined from constructor. /// </summary> /// <param name="mrgFile">MRG file of tree</param> /// <param name="sentenceNumber">Sentence number of tree</param> /// <returns>Training instance parse tree as a NomBank node</returns> protected override TreeBankNode GetTrainingInstanceParseTree(string mrgFile, int sentenceNumber) { // get parse tree as usual TreeBankNode parseTree = base.GetTrainingInstanceParseTree(mrgFile, sentenceNumber); // turn parse tree into NomBank tree NomBankNode nomBankParseTree = new NomBankNode(parseTree); // label predicate and support verb nodes if (_labelPredicates || _labelSupportVerbs) { /* get MRG file in gold-parsed NomBank engine...we must use the gold NomBank engine for marking stuff because * the automatic version won't know about all of the markables due to syntactic parse errors that prevent some * propositions from being included in the auto-parse propositions file */ string goldMrgFile = _goldParsedNomBankEngine.GetFullMrgPath(mrgFile); foreach (NomBankNode token in nomBankParseTree.Tokens) { int tokenNumber = token.TokenNumber; if (_labelPredicates) { if (_goldParsedNomBankEngine.TokenIsMarkable(goldMrgFile, sentenceNumber, tokenNumber)) { token.AddLabel(new NomBankNodeLabel(NomBankNodeLabel.NodeType.Predicate, 1), true); } } if (_labelSupportVerbs) { if (_goldParsedNomBankEngine.TokenIsSupportVerb(goldMrgFile, sentenceNumber, tokenNumber)) { token.AddLabel(new NomBankNodeLabel(NomBankNodeLabel.NodeType.Support, 1), true); } } } } return(nomBankParseTree); }
/// <summary> /// Gets next training instance for models build over NomBank /// </summary> /// <returns>Next training instance</returns> public override ClassifiableEntity GetNextInstance() { // try to move to next node while (!_nodes.MoveNext()) { NomBankEngine nomBankEngine = TreeBankEngine as NomBankEngine; // try to move to next NounInfo while (!MoveToNextValidNounInfo(ref _nounInfo)) { // try to move to next noun...if there are none we're done if (!_nouns.MoveNext()) { return(null); } // start before first NounInfo for current noun _nounInfo = nomBankEngine.GetNounInfo(_nouns.Current).GetEnumerator(); } // filter all nodes in the tree, keeping the ones that pass NomBankNode root = nomBankEngine.GetNomBankTree(_nounInfo.Current); _filteredNodes.Clear(); // reuse node collection for better memory usage foreach (NomBankNode n in root.AllNodes) { if (Filter(n)) { _filteredNodes.Add(n); } } _nodes = _filteredNodes.GetEnumerator(); } return(_nodes.Current); }
/// <summary> /// Gets bracketed text for this node /// </summary> /// <param name="options">Bracketing options</param> /// <returns>Bracketed text</returns> public string GetBracketedText(params BracketedOutputOptions[] options) { // get set of options Set <BracketedOutputOptions> optionsSet = new Set <BracketedOutputOptions>(); if (options != null) { optionsSet.AddRange(options); } // bracketed text builder StringBuilder text = new StringBuilder(); // whether or not we have bracketed this node bool bracketed = false; // whether or not we should prepend a space before adding any more text bool prependSpace = false; // get labels to bracket for this node List <NomBankNodeLabel> labelsToBracket = new List <NomBankNodeLabel>(); bool dontBracketSupportVerbs = optionsSet.Contains(BracketedOutputOptions.IgnoreSupportVerbs); foreach (NomBankNodeLabel label in _labels) { if (dontBracketSupportVerbs && label.Type == NomBankNodeLabel.NodeType.Support) { continue; } else { labelsToBracket.Add(label); } } // add bracketing with labels if (labelsToBracket.Count > 0) { text.Append("["); // multiple labels are separated by a slash bool prependSlash = false; foreach (NomBankNodeLabel label in labelsToBracket) { if (prependSlash) { text.Append("/"); } // get label string text.Append(label.ToString(!optionsSet.Contains(BracketedOutputOptions.IgnoreArgumentFeatures), !optionsSet.Contains(BracketedOutputOptions.IgnoreHyphenIndexes))); // add sense information to predicate labels if (label.IsPredicate && optionsSet.Contains(BracketedOutputOptions.IncludePredicateFrame)) { text.Append("." + Information.RoleSetId); } // add confidence score if (!optionsSet.Contains(BracketedOutputOptions.IgnoreBracketProbabilities)) { text.Append(" " + label.Confidence); } prependSlash = true; } bracketed = prependSpace = true; } // check for leaf if (IsLeaf) { text.Append((prependSpace ? " " : "") + SurfaceText); } // add bracketed text for each child else { IEnumerator <TreeBankNode> childEnum = Children; while (childEnum.MoveNext()) { NomBankNode child = childEnum.Current as NomBankNode; if (!child.IsNullElement) { text.Append((prependSpace ? " " : "") + child.GetBracketedText(options)); prependSpace = true; } } } // add closing bracket if we started one above if (bracketed) { text.Append("]"); } return(text.ToString()); }
/// <summary> /// Constructor. WARNING: this will accept an instance of any class derived from TreeBankNode (e.g., NomBankNode), but /// will return a NomBankNode with only TreeBankNode members instantiated. Co-index IDs and referents are lost. /// </summary> /// <param name="treeBankNode">TreeBankNode from which to construct this NomBankNode</param> /// <param name="parent">Parent of this NomBankNode node</param> protected NomBankNode(TreeBankNode treeBankNode, NomBankNode parent) : base(treeBankNode, parent, new TreeBankNodeConstructor(NomBankChildConstructor)) { _labels = new Set <NomBankNodeLabel>(); _labeledNodeCollections = new List <NomBankLabeledNodeCollection>(); }
/// <summary> /// Gets whether or not the current tree has the same argument labeling as another tree /// </summary> /// <param name="otherTree">Tree to compare the current one to</param> /// <param name="ignoreNullNodes">Whether or not to ignore null nodes</param> /// <param name="labelsToCheck">Node labels to check</param> /// <returns>True if argument labelings are the same, false otherwise</returns> public bool HasSameLabelingAs(NomBankNode otherTree, bool ignoreNullNodes, NomBankNodeLabel[] labelsToCheck) { if (!IsRoot) { throw new Exception("Non-root node"); } if (!otherTree.IsRoot) { throw new Exception("Non-root node"); } // check all node labels foreach (NomBankNodeLabel label in labelsToCheck) { List <NomBankNode> nodes1 = GetDescendants(label); if (ignoreNullNodes) { for (int i = 0; i < nodes1.Count;) { if (nodes1[i].IsNullElement) { nodes1.RemoveAt(i); } else { ++i; } } } List <NomBankNode> nodes2 = otherTree.GetDescendants(label); if (ignoreNullNodes) { for (int i = 0; i < nodes2.Count;) { if (nodes2[i].IsNullElement) { nodes2.RemoveAt(i); } else { ++i; } } } if (nodes1.Count != nodes2.Count) { return(false); } // check current nodes against the other nodes foreach (NomBankNode node1 in nodes1) { bool matched = false; foreach (NomBankNode node2 in nodes2) { if (node1.CoversSameTokensAs(node2)) { matched = true; break; } } if (!matched) { return(false); } } // check the other nodes against the current ones foreach (NomBankNode node2 in nodes2) { bool matched = false; foreach (NomBankNode node1 in nodes1) { if (node2.CoversSameTokensAs(node1)) { matched = true; break; } } if (!matched) { return(false); } } } return(true); }
/// <summary> /// Marks argument nodes from the current node in the corresponding parse from a different TreeBank. This is used when /// transferring NomBank annotations to parse trees other than those distributed in the TreeBank (e.g., those produced /// by an automatic syntactic parser). /// </summary> /// <param name="treeBankEngine">Initialized TreeBank engine from which to pull the parse tree to mark NomBank arguments within</param> /// <returns>NomBank node, or null if all arguments couldn't be minimally transferred to the other parse tree. An argument /// is minimally transferred if the corresponding node in the other parse tree subsumes precisely the same text as the node /// in the current parse tree. Sometimes this is not possible due to parse errors.</returns> public NomBankNode MarkArgumentNodesIn(TreeBankEngine treeBankEngine) { // make sure we're marking arguments using a root node if (!IsRoot) { throw new Exception("Must pass root node"); } // get mrg file in other tree bank string treeBankMrgFile = treeBankEngine.GetFullMrgPath(MrgFile); // need a NomBank root to mark arguments within NomBankNode nbRoot = new NomBankNode(treeBankEngine.GetParseTree(treeBankMrgFile, SentenceNumber)); // make sure we got the right sentence if (nbRoot.SurfaceText != SurfaceText) { throw new Exception("Failed to get same parse tree"); } // Add information to root. Ignore leaf number and argument information - we'll set them at the end. treeBankMrgFile = treeBankMrgFile.Substring(treeBankEngine.MrgPath.Length).Trim(Path.DirectorySeparatorChar); NounInfo currInfo = Information; nbRoot.Information = new NounInfo(currInfo.Noun, treeBankMrgFile, currInfo.SentenceNumber, -1, currInfo.RoleSetId, ""); // transfer all argument node lists foreach (NomBankLabeledNodeCollection corefList in LabeledNodeCollections) { // new node list NomBankLabeledNodeCollection otherCorefList = new NomBankLabeledNodeCollection(corefList.Label.Copy()); // get single nodes foreach (NomBankNode singleNode in corefList.SingleNodes) { if (!singleNode.IsNullElement) { // get argument node from other parse tree NomBankNode otherArgNode = nbRoot.GetMinimallySubsumingNode(singleNode.FirstToken, singleNode.LastToken) as NomBankNode; if (otherArgNode == null) { return(null); } otherCorefList.AddSingleNode(otherArgNode); } } // get split arguments foreach (List <TreeBankNode> splitNode in corefList.SplitNodes) { List <TreeBankNode> otherSplitArg = new List <TreeBankNode>(); // get each node in the split argument foreach (NomBankNode node in splitNode) { if (!node.IsNullElement) { // get split node in other tree NomBankNode otherSplitArgNode = nbRoot.GetMinimallySubsumingNode(node.FirstToken, node.LastToken) as NomBankNode; if (otherSplitArgNode == null) { return(null); } otherSplitArg.Add(otherSplitArgNode); } } // if only one node of the split arg was non-null, at that node as a single if (otherSplitArg.Count == 1) { otherCorefList.AddSingleNode(otherSplitArg.First()); } // otherwise, add the split arg normally else if (otherSplitArg.Count >= 2) { otherCorefList.AddSplitNode(otherSplitArg); } } // make sure all hyphen indexes were applied if (otherCorefList.Label.HyphenIndexes.Count != otherCorefList.AppliedIndexes.Count) { throw new Exception("Not all hyphen indexes were applied"); } // add coref list if we found non-null nodes if (otherCorefList.SingleNodes.Count > 0 || otherCorefList.SplitNodes.Count > 0) { nbRoot.LabeledNodeCollections.Add(otherCorefList); } } // return null if we didn't find any argument node lists with non-null nodes if (nbRoot.LabeledNodeCollections.Count == 0) { return(null); } // set leaf number and argument locations in the information object nbRoot.Information.LeafNumber = nbRoot.PredicateNode.Leaves[0].LeafNumber; nbRoot.Information.LabeledNodeLocations = nbRoot.LabeledNodeLocations; return(nbRoot); }