/// <summary> /// Gets whether or not the token span of any node (single or split) in this collection is covered exactly by the given node's token span /// </summary> /// <param name="node">Given node</param> /// <returns>True if any token span is covered, false otherwise.</returns> public bool AnyCoversSameTokenSpanAs(TreeBankNode node) { // check single nodes foreach (TreeBankNode singleNode in _singleNodes) { if (singleNode.CoversSameTokensAs(node)) { return(true); } } // pre-fetch start and end token numbers int givenFirstTokenNumber = node.FirstToken.TokenNumber; int givenLastTokenNumber = node.LastToken.TokenNumber; // check each split node foreach (List <TreeBankNode> splitNode in _splitNodes) { // split node must be in same sentence as given node if (splitNode[0].InSameSentenceAs(node, true)) { // get start and end token numbers of split node int firstTokenNumber = splitNode[0].FirstToken.TokenNumber; int lastTokenNumber = splitNode[splitNode.Count - 1].LastToken.TokenNumber; // compare spans if (firstTokenNumber == givenFirstTokenNumber && lastTokenNumber == givenLastTokenNumber) { return(true); } } } return(false); }
/// <summary> /// Constructor. WARNING: this will accept an instance of any class derived from TreeBankNode (e.g., PropBankNode), but /// will return a PropBankNode with only TreeBankNode members instantiated. /// </summary> /// <param name="treeBankNode">TreeBankNode from which to construct this PropBankNode</param> /// <param name="parent">Parent of this PropBank node</param> protected PropBankNode(TreeBankNode treeBankNode, PropBankNode parent) : base(treeBankNode, parent, new TreeBankNodeConstructor(PropBankChildConstructor)) { _label = null; _information = null; _labeledNodeCollections = new List <PropBankLabeledNodeCollection>(); }
private void testTBIndexBtn_Click(object sender, EventArgs e) { ClearList(); Thread t = new Thread(new ThreadStart(delegate() { SetEnabled(false); foreach (string mrgFile in _treeBankEngine.IndexedMrgFiles) { int sentsInFile = 0; foreach (int sentNum in _treeBankEngine.GetSentenceNumbers(mrgFile)) { TreeBankNode root = _treeBankEngine.GetParseTree(mrgFile, sentNum); root.Test(); ++sentsInFile; // if we're searching, display sentences that pass the filter if (search.Checked) { AppendOutput(root); } } // if we're not searching, just display the number of sentences in the article if (!search.Checked) { AppendOutput(mrgFile + ": " + sentsInFile + " sentences", true); } } SetEnabled(true); })); t.Start(); }
/// <summary> /// Appends a TreeBank node to the output list /// </summary> /// <param name="n"></param> private void AppendOutput(TreeBankNode n) { string text = null; if (n is PropBankNode) { text = n.FullLocation + ": " + (n as PropBankNode).GetBracketedText(PropBankNode.BracketedOutputOptions.IgnoreBracketProbabilities); } else { if (displayParseTrees.Checked) { text = n.FullLocation + Environment.NewLine + n.GetBracketedText(useCategoryMnemonic.Checked, displayPhraseHeads.Checked); } else { text = n.FullLocation + ": " + n.GetSurfaceText(categoryTags.Checked, useCategoryMnemonic.Checked); } } if (_filterRE != null && !_filterRE.Match(text).Success) { return; } AppendOutput(text, false); }
/// <summary> /// Creates a graph from a node, prompting use to save file /// </summary> /// <param name="n"></param> private void CreateGraph(TreeBankNode n) { saveFileDialog.Title = "Select output path for graph"; if (saveFileDialog.ShowDialog() == DialogResult.OK) { if (_dot == null) { try { _dot = new Dot(Settings.Default.DotPath); } catch (Exception) { openFileDialog.Title = "Select path to Dot executable"; if (openFileDialog.ShowDialog() != DialogResult.OK || !File.Exists(openFileDialog.FileName)) { throw new Exception("Dot path not specified. Cannot create graph."); } _dot = new Dot(openFileDialog.FileName); Settings.Default.DotPath = _dot.DotPath; Settings.Default.Save(); } } if (outputFormat.SelectedItem == null) { MessageBox.Show("No output format selected. Using PNG."); outputFormat.SelectedItem = Dot.OutputFormat.PNG; } _dot.CreateGraph(n, (Dot.OutputFormat)outputFormat.SelectedItem, saveFileDialog.FileName); } }
/// <summary> /// Runs the current search for the first match of the current rule among the children of a node /// </summary> /// <param name="node">Node whose children should be searched</param> /// <param name="excludeNodes">Nodes to exclude from the search</param> /// <returns>First child that matches the current rule</returns> public TreeBankNode Run(TreeBankNode node, Set <TreeBankNode> excludeNodes) { // get list of children to search, reversing the order if we're searching from right to left List <TreeBankNode> children = new List <TreeBankNode>(); for (int i = 0; i < node.ChildCount; ++i) { children.Add(node.GetChild(i)); } if (_direction == SearchDirection.RightToLeft) { children.Reverse(); } // search for each category in the search list foreach (string cat in _searchList) { TreeBankEngine.SyntacticCategory searchCat = TreeBankEngine.GetSyntacticCategory(cat); foreach (TreeBankNode child in children) { if (child.Category == searchCat) { if (excludeNodes == null || !excludeNodes.Contains(child)) { return(child); } } } } return(null); }
/// <summary> /// Gets the next training node /// </summary> /// <returns>Training node</returns> public override ClassifiableEntity GetNextInstance() { // try to move to next node in current sentence while (!_nodeEnum.MoveNext()) { // try to move to next sentence in current MRG file while (!_sentEnum.MoveNext()) { // try to move to next MRG file...if there are none, we're done if (!MoveToNextValidMrgFile()) { return(null); } // start at first sentence of next MRG file _sentEnum = TreeBankEngine.GetSentenceNumbers(CurrentMrgFile).GetEnumerator(); } // filter all nodes in the tree, keeping the good ones TreeBankNode root = GetTrainingInstanceParseTree(CurrentMrgFile, _sentEnum.Current); List <TreeBankNode> filteredNodes = new List <TreeBankNode>(); foreach (TreeBankNode n in root.AllNodes) { if (Filter(n)) { filteredNodes.Add(n); } } _nodeEnum = filteredNodes.GetEnumerator(); } return(_nodeEnum.Current); }
/// <summary> /// Constructor. WARNING: this will accept an instance of any class derived from TreeBankNode (e.g., PropBankNode), but /// will return a PropBankNode with only TreeBankNode members instantiated. /// </summary> /// <param name="treeBankNode">TreeBankNode from which to construct this PropBankNode</param> public PropBankNode(TreeBankNode treeBankNode) : this(treeBankNode, null) { if (!treeBankNode.IsRoot) { throw new Exception("Can only create PropBankNodes from root TreeBankNodes"); } }
/// <summary> /// Constructs a PropBankNode from a TreeBankNode /// </summary> /// <param name="treeBankNode">TreeBankNode to construct a PropBankNode from</param> /// <param name="parent">Parent node</param> /// <returns>Constructed node</returns> private static TreeBankNode PropBankChildConstructor(TreeBankNode treeBankNode, TreeBankNode parent) { if (!(parent is PropBankNode)) { throw new Exception("Must have a PropBankNode"); } return(new PropBankNode(treeBankNode, parent as PropBankNode)); }
/// <summary> /// Adds a single node to this list /// </summary> /// <param name="singleNode">Single node to add</param> public override void AddSingleNode(TreeBankNode singleNode) { // apply label to node PropBankNode propBankNode = singleNode as PropBankNode; propBankNode.SetLabel(_label, false); base.AddSingleNode(propBankNode); }
/// <summary> /// Gets set of split nodes from a root node /// </summary> /// <param name="root">Root node to get split nodes from</param> /// <param name="splitNodeLocations">Node locations that form the split node</param> /// <returns>Set of split nodes</returns> private static List <TreeBankNode> GetSplitNode(TreeBankNode root, string splitNodeLocations) { List <TreeBankNode> splitNode = new List <TreeBankNode>(); foreach (string nodeLocation in splitNodeLocations.Split(',')) { splitNode.Add(root.GetNode(nodeLocation)); } if (splitNode.Count <= 1) { throw new Exception("Invalid split node locations"); } return(splitNode); }
/// <summary> /// Gets relation nodes given a root and a location series label /// </summary> /// <param name="root">Root node to get nodes from</param> /// <param name="locationLabel">Location series label</param> /// <param name="nodeCollection">Collection to add nodes to</param> public static void AddNodesToCollection(TreeBankNode root, string locationLabel, LabeledNodeCollection nodeCollection) { // split on * then on , string[] corefLocations = locationLabel.Split('*'); foreach (string corefLocation in corefLocations) { if (corefLocation.Contains(",")) { nodeCollection.AddSplitNode(GetSplitNode(root, corefLocation)); } else { nodeCollection.AddSingleNode(root.GetNode(corefLocation)); } } }
/// <summary> /// Gets training instance parse tree as a NomBank node, marking predicates and support verbs /// as determined from constructor. /// </summary> /// <param name="mrgFile">MRG file of tree</param> /// <param name="sentenceNumber">Sentence number of tree</param> /// <returns>Training instance parse tree as a NomBank node</returns> protected override TreeBankNode GetTrainingInstanceParseTree(string mrgFile, int sentenceNumber) { // get parse tree as usual TreeBankNode parseTree = base.GetTrainingInstanceParseTree(mrgFile, sentenceNumber); // turn parse tree into NomBank tree NomBankNode nomBankParseTree = new NomBankNode(parseTree); // label predicate and support verb nodes if (_labelPredicates || _labelSupportVerbs) { /* get MRG file in gold-parsed NomBank engine...we must use the gold NomBank engine for marking stuff because * the automatic version won't know about all of the markables due to syntactic parse errors that prevent some * propositions from being included in the auto-parse propositions file */ string goldMrgFile = _goldParsedNomBankEngine.GetFullMrgPath(mrgFile); foreach (NomBankNode token in nomBankParseTree.Tokens) { int tokenNumber = token.TokenNumber; if (_labelPredicates) { if (_goldParsedNomBankEngine.TokenIsMarkable(goldMrgFile, sentenceNumber, tokenNumber)) { token.AddLabel(new NomBankNodeLabel(NomBankNodeLabel.NodeType.Predicate, 1), true); } } if (_labelSupportVerbs) { if (_goldParsedNomBankEngine.TokenIsSupportVerb(goldMrgFile, sentenceNumber, tokenNumber)) { token.AddLabel(new NomBankNodeLabel(NomBankNodeLabel.NodeType.Support, 1), true); } } } } return(nomBankParseTree); }
/// <summary> /// Gets a predicate tree for a PropBank propositions entry /// </summary> /// <param name="vi">VerbInfo specifying tree to look up</param> /// <returns>PropBankNode</returns> public PropBankNode GetPropBankTree(VerbInfo vi) { TreeBankNode parse = GetParseTree(vi.File, vi.SentenceNumber); PropBankNode predTree = new PropBankNode(parse); predTree.Information = vi; // label information is space-delimited string[] labels = vi.LabeledNodeLocations.Split(' '); foreach (string label in labels) { // label columns are dash-delimited string[] labelCols = label.Split('-'); // get label type PropBankNodeLabel.NodeType labelType = PropBankNodeLabel.GetNodeType(labelCols[1]); // get label feature if any PropBankNodeLabel.NodeFeature labelFeature = PropBankNodeLabel.NodeFeature.None; if (labelCols.Length > 2) { // sometimes the feature is the actual preposition, so this might fail string featureStr = labelCols[2]; if (!PropBankNodeLabel.TryGetNodeFeature(featureStr, out labelFeature)) { // use PRP as the feature, which we have added for this case featureStr = "PRP"; labelFeature = PropBankNodeLabel.GetNodeFeature(featureStr); } if (labelCols.Length > 3) { throw new Exception("Missed feature"); } } // create new labeled node collection PropBankLabeledNodeCollection labeledNodes = new PropBankLabeledNodeCollection(new PropBankNodeLabel(labelType, labelFeature, 1)); AddNodesToCollection(predTree, labelCols[0], labeledNodes); // add to root's list of nodes predTree.LabeledNodeCollections.Add(labeledNodes); } // make sure one of the predicate leaves has the leaf number from the propositions file entry bool foundMatch = false; foreach (PropBankNode predicateNode in predTree.PredicateNodes) { foreach (PropBankNode leaf in predicateNode.Leaves) { if (leaf.LeafNumber == vi.LeafNumber) { foundMatch = true; break; } } if (foundMatch) { break; } } if (!foundMatch) { throw new Exception("Mismatch between VerbInfo predicate leaf number and actual predicate leaf number"); } return(predTree); }
/// <summary> /// Removes a single node from this collection /// </summary> /// <param name="singleNode">Single node to remove</param> public virtual void RemoveSingleNode(TreeBankNode singleNode) { _singleNodes.Remove(singleNode); }
/// <summary> /// Gets nodes within this collection /// </summary> /// <param name="includeNullElementNodes">Whether or not to include null-element nodes</param> /// <param name="includeSplitNodes">Whether or not to include nodes from a split node</param> /// <param name="headSplitNodesOnly">If including split nodes, this specifies whether or not to only include the head node /// of the split node. The head node is defined as the node that is the semantic head of the LCA of all nodes /// in the split node.</param> /// <param name="includeSingleNodes">Whether or not to include single nodes</param> /// <param name="excludeSingleNodesWhenMultiple">Whether or not to exclude single nodes if there are more than one</param> /// <returns>Set of nodes</returns> public Set <TreeBankNode> GetNodes(bool includeNullElementNodes, bool includeSplitNodes, bool headSplitNodesOnly, bool includeSingleNodes, bool excludeSingleNodesWhenMultiple) { if (!includeSplitNodes && headSplitNodesOnly) { throw new Exception("Inconsistent parameters. Cannot restrict split nodes to head nodes if we're not including split nodes to begin with."); } if (!includeSingleNodes && excludeSingleNodesWhenMultiple) { throw new Exception("Inconsistent parameters. Cannot restrict single nodes when node including single nodes to begin with."); } Set <TreeBankNode> nodes = new Set <TreeBankNode>(); // add split nodes if (includeSplitNodes) { foreach (List <TreeBankNode> splitNode in _splitNodes) { // track LCA of split node if requested TreeBankNode lca = null; foreach (TreeBankNode node in splitNode) { if (includeNullElementNodes || !node.IsNullElement) { // track the LCA of all split nodes if we're including only head split nodes if (headSplitNodesOnly) { if (lca == null) { lca = node; } else { lca = lca.GetLowestCommonAncestor(node); } } // otherwise, simply add the node to the set else { nodes.Add(node); } } } // add the head of the LCA node if it's covered by the split node if (lca != null) { TreeBankNode lcaHead = lca.SemanticHeadToken; if (includeNullElementNodes || !lcaHead.IsNullElement) { foreach (TreeBankNode node in splitNode) { if (node.IsAncestorOf(lcaHead)) { nodes.Add(lcaHead); break; } } } } } } // add single nodes if (includeSingleNodes) { if (!excludeSingleNodesWhenMultiple || _singleNodes.Count == 1) { foreach (TreeBankNode node in _singleNodes) { if (includeNullElementNodes || !node.IsNullElement) { nodes.Add(node); } } } } return(nodes); }
/// <summary> /// Gets spans for a set of argument nodes, indexed by sentence /// </summary> /// <param name="argNodes">Argument nodes</param> /// <returns>Contiguous spans, indexed by sentence</returns> private Dictionary <int, List <Span> > GetSpans(List <TreeBankNode> argNodes) { // make sure all discourse annotation nodes come from the same source document List <TreeBankNode> allNodes = new List <TreeBankNode>(); foreach (TreeBankNode node in argNodes) { if (node.MrgFile != argNodes[0].MrgFile) { throw new Exception("MRG file mismatch"); } else { allNodes.Add(node); } } // remove any null nodes and sort the result by node position for (int i = 0; i < allNodes.Count;) { if (allNodes[i].IsNullElement) { allNodes.RemoveAt(i); } else { ++i; } } if (allNodes.Count == 0) { throw new Exception("Invalid node list"); } // group nodes by sentence Dictionary <int, List <TreeBankNode> > sentenceNodes = new Dictionary <int, List <TreeBankNode> >(); foreach (TreeBankNode node in allNodes) { sentenceNodes.EnsureContainsKey(node.SentenceNumber, typeof(List <TreeBankNode>)); sentenceNodes[node.SentenceNumber].Add(node); } // create spans for each sentence Dictionary <int, List <Span> > sentenceSpans = new Dictionary <int, List <Span> >(); foreach (int sentNum in sentenceNodes.Keys) { // create span for each set of contiguous nodes List <Span> spans = new List <Span>(); foreach (List <TreeBankNode> nodes in TreeBankNode.GetContiguousNodes(sentenceNodes[sentNum])) { spans.Add(new Span(nodes[0].FirstToken.TokenNumber, nodes[nodes.Count - 1].LastToken.TokenNumber)); } sentenceSpans.Add(sentNum, spans); } return(sentenceSpans); }
/// <summary> /// Adds a single node to this collection /// </summary> /// <param name="singleNode">Single node to add</param> public virtual void AddSingleNode(TreeBankNode singleNode) { _singleNodes.Add(singleNode); }
/// <summary> /// Adds a single node to this collection /// </summary> /// <param name="singleNode">Single node to add</param> public override void AddSingleNode(TreeBankNode singleNode) { ApplyType(singleNode as NomBankNode); base.AddSingleNode(singleNode); }
/// <summary> /// Gets whether or not a single node is contained in this collection /// </summary> /// <param name="singleNode">Single node to check for</param> /// <returns>True if node is contained and false otherwise</returns> public virtual bool ContainsSingleNode(TreeBankNode singleNode) { return(_singleNodes.Contains(singleNode)); }
/// <summary> /// Constructor. WARNING: this will accept an instance of any class derived from TreeBankNode (e.g., NomBankNode), but /// will return a NomBankNode with only TreeBankNode members instantiated. Co-index IDs and referents are lost. /// </summary> /// <param name="treeBankNode">TreeBankNode from which to construct this NomBankNode</param> /// <param name="parent">Parent of this NomBankNode node</param> protected NomBankNode(TreeBankNode treeBankNode, NomBankNode parent) : base(treeBankNode, parent, new TreeBankNodeConstructor(NomBankChildConstructor)) { _labels = new Set <NomBankNodeLabel>(); _labeledNodeCollections = new List <NomBankLabeledNodeCollection>(); }