/// <summary>
        /// Gets whether or not the token span of any node (single or split) in this collection is covered exactly by the given node's token span
        /// </summary>
        /// <param name="node">Given node</param>
        /// <returns>True if any token span is covered, false otherwise.</returns>
        public bool AnyCoversSameTokenSpanAs(TreeBankNode node)
        {
            // check single nodes
            foreach (TreeBankNode singleNode in _singleNodes)
            {
                if (singleNode.CoversSameTokensAs(node))
                {
                    return(true);
                }
            }

            // pre-fetch start and end token numbers
            int givenFirstTokenNumber = node.FirstToken.TokenNumber;
            int givenLastTokenNumber  = node.LastToken.TokenNumber;

            // check each split node
            foreach (List <TreeBankNode> splitNode in _splitNodes)
            {
                // split node must be in same sentence as given node
                if (splitNode[0].InSameSentenceAs(node, true))
                {
                    // get start and end token numbers of split node
                    int firstTokenNumber = splitNode[0].FirstToken.TokenNumber;
                    int lastTokenNumber  = splitNode[splitNode.Count - 1].LastToken.TokenNumber;

                    // compare spans
                    if (firstTokenNumber == givenFirstTokenNumber && lastTokenNumber == givenLastTokenNumber)
                    {
                        return(true);
                    }
                }
            }

            return(false);
        }
Example #2
0
 /// <summary>
 /// Constructor. WARNING:  this will accept an instance of any class derived from TreeBankNode (e.g., PropBankNode), but
 /// will return a PropBankNode with only TreeBankNode members instantiated.
 /// </summary>
 /// <param name="treeBankNode">TreeBankNode from which to construct this PropBankNode</param>
 /// <param name="parent">Parent of this PropBank node</param>
 protected PropBankNode(TreeBankNode treeBankNode, PropBankNode parent)
     : base(treeBankNode, parent, new TreeBankNodeConstructor(PropBankChildConstructor))
 {
     _label                  = null;
     _information            = null;
     _labeledNodeCollections = new List <PropBankLabeledNodeCollection>();
 }
Example #3
0
        private void testTBIndexBtn_Click(object sender, EventArgs e)
        {
            ClearList();

            Thread t = new Thread(new ThreadStart(delegate()
            {
                SetEnabled(false);
                foreach (string mrgFile in _treeBankEngine.IndexedMrgFiles)
                {
                    int sentsInFile = 0;
                    foreach (int sentNum in _treeBankEngine.GetSentenceNumbers(mrgFile))
                    {
                        TreeBankNode root = _treeBankEngine.GetParseTree(mrgFile, sentNum);
                        root.Test();

                        ++sentsInFile;

                        // if we're searching, display sentences that pass the filter
                        if (search.Checked)
                        {
                            AppendOutput(root);
                        }
                    }

                    // if we're not searching, just display the number of sentences in the article
                    if (!search.Checked)
                    {
                        AppendOutput(mrgFile + ":  " + sentsInFile + " sentences", true);
                    }
                }
                SetEnabled(true);
            }));

            t.Start();
        }
Example #4
0
        /// <summary>
        /// Appends a TreeBank node to the output list
        /// </summary>
        /// <param name="n"></param>
        private void AppendOutput(TreeBankNode n)
        {
            string text = null;

            if (n is PropBankNode)
            {
                text = n.FullLocation + ":  " + (n as PropBankNode).GetBracketedText(PropBankNode.BracketedOutputOptions.IgnoreBracketProbabilities);
            }
            else
            {
                if (displayParseTrees.Checked)
                {
                    text = n.FullLocation + Environment.NewLine + n.GetBracketedText(useCategoryMnemonic.Checked, displayPhraseHeads.Checked);
                }
                else
                {
                    text = n.FullLocation + ":  " + n.GetSurfaceText(categoryTags.Checked, useCategoryMnemonic.Checked);
                }
            }

            if (_filterRE != null && !_filterRE.Match(text).Success)
            {
                return;
            }

            AppendOutput(text, false);
        }
        /// <summary>
        /// Creates a graph from a node, prompting use to save file
        /// </summary>
        /// <param name="n"></param>
        private void CreateGraph(TreeBankNode n)
        {
            saveFileDialog.Title = "Select output path for graph";
            if (saveFileDialog.ShowDialog() == DialogResult.OK)
            {
                if (_dot == null)
                {
                    try
                    {
                        _dot = new Dot(Settings.Default.DotPath);
                    }
                    catch (Exception)
                    {
                        openFileDialog.Title = "Select path to Dot executable";
                        if (openFileDialog.ShowDialog() != DialogResult.OK || !File.Exists(openFileDialog.FileName))
                        {
                            throw new Exception("Dot path not specified. Cannot create graph.");
                        }

                        _dot = new Dot(openFileDialog.FileName);

                        Settings.Default.DotPath = _dot.DotPath;
                        Settings.Default.Save();
                    }
                }

                if (outputFormat.SelectedItem == null)
                {
                    MessageBox.Show("No output format selected. Using PNG.");
                    outputFormat.SelectedItem = Dot.OutputFormat.PNG;
                }

                _dot.CreateGraph(n, (Dot.OutputFormat)outputFormat.SelectedItem, saveFileDialog.FileName);
            }
        }
Example #6
0
        /// <summary>
        /// Runs the current search for the first match of the current rule among the children of a node
        /// </summary>
        /// <param name="node">Node whose children should be searched</param>
        /// <param name="excludeNodes">Nodes to exclude from the search</param>
        /// <returns>First child that matches the current rule</returns>
        public TreeBankNode Run(TreeBankNode node, Set <TreeBankNode> excludeNodes)
        {
            // get list of children to search, reversing the order if we're searching from right to left
            List <TreeBankNode> children = new List <TreeBankNode>();

            for (int i = 0; i < node.ChildCount; ++i)
            {
                children.Add(node.GetChild(i));
            }

            if (_direction == SearchDirection.RightToLeft)
            {
                children.Reverse();
            }

            // search for each category in the search list
            foreach (string cat in _searchList)
            {
                TreeBankEngine.SyntacticCategory searchCat = TreeBankEngine.GetSyntacticCategory(cat);
                foreach (TreeBankNode child in children)
                {
                    if (child.Category == searchCat)
                    {
                        if (excludeNodes == null || !excludeNodes.Contains(child))
                        {
                            return(child);
                        }
                    }
                }
            }

            return(null);
        }
        /// <summary>
        /// Gets the next training node
        /// </summary>
        /// <returns>Training node</returns>
        public override ClassifiableEntity GetNextInstance()
        {
            // try to move to next node in current sentence
            while (!_nodeEnum.MoveNext())
            {
                // try to move to next sentence in current MRG file
                while (!_sentEnum.MoveNext())
                {
                    // try to move to next MRG file...if there are none, we're done
                    if (!MoveToNextValidMrgFile())
                    {
                        return(null);
                    }

                    // start at first sentence of next MRG file
                    _sentEnum = TreeBankEngine.GetSentenceNumbers(CurrentMrgFile).GetEnumerator();
                }

                // filter all nodes in the tree, keeping the good ones
                TreeBankNode        root          = GetTrainingInstanceParseTree(CurrentMrgFile, _sentEnum.Current);
                List <TreeBankNode> filteredNodes = new List <TreeBankNode>();
                foreach (TreeBankNode n in root.AllNodes)
                {
                    if (Filter(n))
                    {
                        filteredNodes.Add(n);
                    }
                }

                _nodeEnum = filteredNodes.GetEnumerator();
            }

            return(_nodeEnum.Current);
        }
Example #8
0
 /// <summary>
 /// Constructor. WARNING:  this will accept an instance of any class derived from TreeBankNode (e.g., PropBankNode), but
 /// will return a PropBankNode with only TreeBankNode members instantiated.
 /// </summary>
 /// <param name="treeBankNode">TreeBankNode from which to construct this PropBankNode</param>
 public PropBankNode(TreeBankNode treeBankNode)
     : this(treeBankNode, null)
 {
     if (!treeBankNode.IsRoot)
     {
         throw new Exception("Can only create PropBankNodes from root TreeBankNodes");
     }
 }
Example #9
0
        /// <summary>
        /// Constructs a PropBankNode from a TreeBankNode
        /// </summary>
        /// <param name="treeBankNode">TreeBankNode to construct a PropBankNode from</param>
        /// <param name="parent">Parent node</param>
        /// <returns>Constructed node</returns>
        private static TreeBankNode PropBankChildConstructor(TreeBankNode treeBankNode, TreeBankNode parent)
        {
            if (!(parent is PropBankNode))
            {
                throw new Exception("Must have a PropBankNode");
            }

            return(new PropBankNode(treeBankNode, parent as PropBankNode));
        }
        /// <summary>
        /// Adds a single node to this list
        /// </summary>
        /// <param name="singleNode">Single node to add</param>
        public override void AddSingleNode(TreeBankNode singleNode)
        {
            // apply label to node
            PropBankNode propBankNode = singleNode as PropBankNode;

            propBankNode.SetLabel(_label, false);

            base.AddSingleNode(propBankNode);
        }
        /// <summary>
        /// Gets set of split nodes from a root node
        /// </summary>
        /// <param name="root">Root node to get split nodes from</param>
        /// <param name="splitNodeLocations">Node locations that form the split node</param>
        /// <returns>Set of split nodes</returns>
        private static List <TreeBankNode> GetSplitNode(TreeBankNode root, string splitNodeLocations)
        {
            List <TreeBankNode> splitNode = new List <TreeBankNode>();

            foreach (string nodeLocation in splitNodeLocations.Split(','))
            {
                splitNode.Add(root.GetNode(nodeLocation));
            }

            if (splitNode.Count <= 1)
            {
                throw new Exception("Invalid split node locations");
            }

            return(splitNode);
        }
 /// <summary>
 /// Gets relation nodes given a root and a location series label
 /// </summary>
 /// <param name="root">Root node to get nodes from</param>
 /// <param name="locationLabel">Location series label</param>
 /// <param name="nodeCollection">Collection to add nodes to</param>
 public static void AddNodesToCollection(TreeBankNode root, string locationLabel, LabeledNodeCollection nodeCollection)
 {
     // split on * then on ,
     string[] corefLocations = locationLabel.Split('*');
     foreach (string corefLocation in corefLocations)
     {
         if (corefLocation.Contains(","))
         {
             nodeCollection.AddSplitNode(GetSplitNode(root, corefLocation));
         }
         else
         {
             nodeCollection.AddSingleNode(root.GetNode(corefLocation));
         }
     }
 }
        /// <summary>
        /// Gets training instance parse tree as a NomBank node, marking predicates and support verbs
        /// as determined from constructor.
        /// </summary>
        /// <param name="mrgFile">MRG file of tree</param>
        /// <param name="sentenceNumber">Sentence number of tree</param>
        /// <returns>Training instance parse tree as a NomBank node</returns>
        protected override TreeBankNode GetTrainingInstanceParseTree(string mrgFile, int sentenceNumber)
        {
            // get parse tree as usual
            TreeBankNode parseTree = base.GetTrainingInstanceParseTree(mrgFile, sentenceNumber);

            // turn parse tree into NomBank tree
            NomBankNode nomBankParseTree = new NomBankNode(parseTree);

            // label predicate and support verb nodes
            if (_labelPredicates || _labelSupportVerbs)
            {
                /* get MRG file in gold-parsed NomBank engine...we must use the gold NomBank engine for marking stuff because
                 * the automatic version won't know about all of the markables due to syntactic parse errors that prevent some
                 * propositions from being included in the auto-parse propositions file */
                string goldMrgFile = _goldParsedNomBankEngine.GetFullMrgPath(mrgFile);

                foreach (NomBankNode token in nomBankParseTree.Tokens)
                {
                    int tokenNumber = token.TokenNumber;

                    if (_labelPredicates)
                    {
                        if (_goldParsedNomBankEngine.TokenIsMarkable(goldMrgFile, sentenceNumber, tokenNumber))
                        {
                            token.AddLabel(new NomBankNodeLabel(NomBankNodeLabel.NodeType.Predicate, 1), true);
                        }
                    }

                    if (_labelSupportVerbs)
                    {
                        if (_goldParsedNomBankEngine.TokenIsSupportVerb(goldMrgFile, sentenceNumber, tokenNumber))
                        {
                            token.AddLabel(new NomBankNodeLabel(NomBankNodeLabel.NodeType.Support, 1), true);
                        }
                    }
                }
            }

            return(nomBankParseTree);
        }
        /// <summary>
        /// Gets a predicate tree for a PropBank propositions entry
        /// </summary>
        /// <param name="vi">VerbInfo specifying tree to look up</param>
        /// <returns>PropBankNode</returns>
        public PropBankNode GetPropBankTree(VerbInfo vi)
        {
            TreeBankNode parse    = GetParseTree(vi.File, vi.SentenceNumber);
            PropBankNode predTree = new PropBankNode(parse);

            predTree.Information = vi;

            // label information is space-delimited
            string[] labels = vi.LabeledNodeLocations.Split(' ');
            foreach (string label in labels)
            {
                // label columns are dash-delimited
                string[] labelCols = label.Split('-');

                // get label type
                PropBankNodeLabel.NodeType labelType = PropBankNodeLabel.GetNodeType(labelCols[1]);

                // get label feature if any
                PropBankNodeLabel.NodeFeature labelFeature = PropBankNodeLabel.NodeFeature.None;
                if (labelCols.Length > 2)
                {
                    // sometimes the feature is the actual preposition, so this might fail
                    string featureStr = labelCols[2];
                    if (!PropBankNodeLabel.TryGetNodeFeature(featureStr, out labelFeature))
                    {
                        // use PRP as the feature, which we have added for this case
                        featureStr   = "PRP";
                        labelFeature = PropBankNodeLabel.GetNodeFeature(featureStr);
                    }

                    if (labelCols.Length > 3)
                    {
                        throw new Exception("Missed feature");
                    }
                }

                // create new labeled node collection
                PropBankLabeledNodeCollection labeledNodes = new PropBankLabeledNodeCollection(new PropBankNodeLabel(labelType, labelFeature, 1));
                AddNodesToCollection(predTree, labelCols[0], labeledNodes);

                // add to root's list of nodes
                predTree.LabeledNodeCollections.Add(labeledNodes);
            }

            // make sure one of the predicate leaves has the leaf number from the propositions file entry
            bool foundMatch = false;

            foreach (PropBankNode predicateNode in predTree.PredicateNodes)
            {
                foreach (PropBankNode leaf in predicateNode.Leaves)
                {
                    if (leaf.LeafNumber == vi.LeafNumber)
                    {
                        foundMatch = true;
                        break;
                    }
                }

                if (foundMatch)
                {
                    break;
                }
            }

            if (!foundMatch)
            {
                throw new Exception("Mismatch between VerbInfo predicate leaf number and actual predicate leaf number");
            }

            return(predTree);
        }
 /// <summary>
 /// Removes a single node from this collection
 /// </summary>
 /// <param name="singleNode">Single node to remove</param>
 public virtual void RemoveSingleNode(TreeBankNode singleNode)
 {
     _singleNodes.Remove(singleNode);
 }
        /// <summary>
        /// Gets nodes within this collection
        /// </summary>
        /// <param name="includeNullElementNodes">Whether or not to include null-element nodes</param>
        /// <param name="includeSplitNodes">Whether or not to include nodes from a split node</param>
        /// <param name="headSplitNodesOnly">If including split nodes, this specifies whether or not to only include the head node
        /// of the split node. The head node is defined as the node that is the semantic head of the LCA of all nodes
        /// in the split node.</param>
        /// <param name="includeSingleNodes">Whether or not to include single nodes</param>
        /// <param name="excludeSingleNodesWhenMultiple">Whether or not to exclude single nodes if there are more than one</param>
        /// <returns>Set of nodes</returns>
        public Set <TreeBankNode> GetNodes(bool includeNullElementNodes,
                                           bool includeSplitNodes,
                                           bool headSplitNodesOnly,
                                           bool includeSingleNodes,
                                           bool excludeSingleNodesWhenMultiple)
        {
            if (!includeSplitNodes && headSplitNodesOnly)
            {
                throw new Exception("Inconsistent parameters. Cannot restrict split nodes to head nodes if we're not including split nodes to begin with.");
            }

            if (!includeSingleNodes && excludeSingleNodesWhenMultiple)
            {
                throw new Exception("Inconsistent parameters. Cannot restrict single nodes when node including single nodes to begin with.");
            }

            Set <TreeBankNode> nodes = new Set <TreeBankNode>();

            // add split nodes
            if (includeSplitNodes)
            {
                foreach (List <TreeBankNode> splitNode in _splitNodes)
                {
                    // track LCA of split node if requested
                    TreeBankNode lca = null;
                    foreach (TreeBankNode node in splitNode)
                    {
                        if (includeNullElementNodes || !node.IsNullElement)
                        {
                            // track the LCA of all split nodes if we're including only head split nodes
                            if (headSplitNodesOnly)
                            {
                                if (lca == null)
                                {
                                    lca = node;
                                }
                                else
                                {
                                    lca = lca.GetLowestCommonAncestor(node);
                                }
                            }
                            // otherwise, simply add the node to the set
                            else
                            {
                                nodes.Add(node);
                            }
                        }
                    }

                    // add the head of the LCA node if it's covered by the split node
                    if (lca != null)
                    {
                        TreeBankNode lcaHead = lca.SemanticHeadToken;
                        if (includeNullElementNodes || !lcaHead.IsNullElement)
                        {
                            foreach (TreeBankNode node in splitNode)
                            {
                                if (node.IsAncestorOf(lcaHead))
                                {
                                    nodes.Add(lcaHead);
                                    break;
                                }
                            }
                        }
                    }
                }
            }

            // add single nodes
            if (includeSingleNodes)
            {
                if (!excludeSingleNodesWhenMultiple || _singleNodes.Count == 1)
                {
                    foreach (TreeBankNode node in _singleNodes)
                    {
                        if (includeNullElementNodes || !node.IsNullElement)
                        {
                            nodes.Add(node);
                        }
                    }
                }
            }

            return(nodes);
        }
        /// <summary>
        /// Gets spans for a set of argument nodes, indexed by sentence
        /// </summary>
        /// <param name="argNodes">Argument nodes</param>
        /// <returns>Contiguous spans, indexed by sentence</returns>
        private Dictionary <int, List <Span> > GetSpans(List <TreeBankNode> argNodes)
        {
            // make sure all discourse annotation nodes come from the same source document
            List <TreeBankNode> allNodes = new List <TreeBankNode>();

            foreach (TreeBankNode node in argNodes)
            {
                if (node.MrgFile != argNodes[0].MrgFile)
                {
                    throw new Exception("MRG file mismatch");
                }
                else
                {
                    allNodes.Add(node);
                }
            }

            // remove any null nodes and sort the result by node position
            for (int i = 0; i < allNodes.Count;)
            {
                if (allNodes[i].IsNullElement)
                {
                    allNodes.RemoveAt(i);
                }
                else
                {
                    ++i;
                }
            }

            if (allNodes.Count == 0)
            {
                throw new Exception("Invalid node list");
            }

            // group nodes by sentence
            Dictionary <int, List <TreeBankNode> > sentenceNodes = new Dictionary <int, List <TreeBankNode> >();

            foreach (TreeBankNode node in allNodes)
            {
                sentenceNodes.EnsureContainsKey(node.SentenceNumber, typeof(List <TreeBankNode>));
                sentenceNodes[node.SentenceNumber].Add(node);
            }

            // create spans for each sentence
            Dictionary <int, List <Span> > sentenceSpans = new Dictionary <int, List <Span> >();

            foreach (int sentNum in sentenceNodes.Keys)
            {
                // create span for each set of contiguous nodes
                List <Span> spans = new List <Span>();
                foreach (List <TreeBankNode> nodes in TreeBankNode.GetContiguousNodes(sentenceNodes[sentNum]))
                {
                    spans.Add(new Span(nodes[0].FirstToken.TokenNumber, nodes[nodes.Count - 1].LastToken.TokenNumber));
                }

                sentenceSpans.Add(sentNum, spans);
            }

            return(sentenceSpans);
        }
 /// <summary>
 /// Adds a single node to this collection
 /// </summary>
 /// <param name="singleNode">Single node to add</param>
 public virtual void AddSingleNode(TreeBankNode singleNode)
 {
     _singleNodes.Add(singleNode);
 }
        /// <summary>
        /// Adds a single node to this collection
        /// </summary>
        /// <param name="singleNode">Single node to add</param>
        public override void AddSingleNode(TreeBankNode singleNode)
        {
            ApplyType(singleNode as NomBankNode);

            base.AddSingleNode(singleNode);
        }
 /// <summary>
 /// Gets whether or not a single node is contained in this collection
 /// </summary>
 /// <param name="singleNode">Single node to check for</param>
 /// <returns>True if node is contained and false otherwise</returns>
 public virtual bool ContainsSingleNode(TreeBankNode singleNode)
 {
     return(_singleNodes.Contains(singleNode));
 }
Example #21
0
 /// <summary>
 /// Constructor. WARNING:  this will accept an instance of any class derived from TreeBankNode (e.g., NomBankNode), but
 /// will return a NomBankNode with only TreeBankNode members instantiated. Co-index IDs and referents are lost.
 /// </summary>
 /// <param name="treeBankNode">TreeBankNode from which to construct this NomBankNode</param>
 /// <param name="parent">Parent of this NomBankNode node</param>
 protected NomBankNode(TreeBankNode treeBankNode, NomBankNode parent)
     : base(treeBankNode, parent, new TreeBankNodeConstructor(NomBankChildConstructor))
 {
     _labels = new Set <NomBankNodeLabel>();
     _labeledNodeCollections = new List <NomBankLabeledNodeCollection>();
 }