예제 #1
0
 /// <summary>
 /// Copies particular labels from the current node to corresponding nodes in another tree
 /// </summary>
 /// <param name="n">Tree to which to copy labels</param>
 /// <param name="labels">Labels to copy</param>
 public void CopyLabelsTo(NomBankNode n, params NomBankNodeLabel[] labels)
 {
     foreach (NomBankNodeLabel label in labels)
     {
         foreach (NomBankNode fromNode in GetDescendants(label))
         {
             int         startToken = fromNode.FirstToken.TokenNumber;
             int         endToken   = fromNode.LastToken.TokenNumber;
             NomBankNode toNode     = n.GetLowestCommonAncestorOfTokens(startToken, endToken) as NomBankNode;
             toNode.AddLabel(label.Copy(), true);
         }
     }
 }
        /// <summary>
        /// Applies the current type to a node, or does nothing if the node already has the type
        /// </summary>
        /// <param name="n">Node to apply type to</param>
        private void ApplyType(NomBankNode n)
        {
            // don't reapply type
            if (n.HasLabel(_label))
            {
                return;
            }

            // add label without hyphen indexes, and don't bother synching with root collection (this collection will be added to the root)
            NomBankNodeLabel label = _label.Copy();

            label.HyphenIndexes.Clear();
            n.AddLabel(label, false);

            // only apply hyphen indexes to text with a hyphen or slash
            string nodeText = n.SurfaceText;

            char[] hyphenSlash = new char[] { '-', '/' };
            if (nodeText.IndexOfAny(hyphenSlash) == -1)
            {
                return;
            }

            // try to apply each hyphen index
            foreach (NomBankNodeLabel.HyphenationIndex hyphenIndex in _label.HyphenIndexes)
            {
                int numParts = nodeText.Split(hyphenSlash, StringSplitOptions.RemoveEmptyEntries).Length;

                // get numeric hyphen index
                int index = int.Parse(hyphenIndex.ToString().Substring(1));

                // add index to node's label if applicable
                if (index < numParts)
                {
                    if (_appliedIndexes.Contains(hyphenIndex))
                    {
                        throw new Exception("Hyphen index applied more than once");
                    }

                    label.AddHyphenIndex(hyphenIndex);
                    _appliedIndexes.Add(hyphenIndex);
                }
            }
        }
        /// <summary>
        /// Gets training instance parse tree as a NomBank node, marking predicates and support verbs
        /// as determined from constructor.
        /// </summary>
        /// <param name="mrgFile">MRG file of tree</param>
        /// <param name="sentenceNumber">Sentence number of tree</param>
        /// <returns>Training instance parse tree as a NomBank node</returns>
        protected override TreeBankNode GetTrainingInstanceParseTree(string mrgFile, int sentenceNumber)
        {
            // get parse tree as usual
            TreeBankNode parseTree = base.GetTrainingInstanceParseTree(mrgFile, sentenceNumber);

            // turn parse tree into NomBank tree
            NomBankNode nomBankParseTree = new NomBankNode(parseTree);

            // label predicate and support verb nodes
            if (_labelPredicates || _labelSupportVerbs)
            {
                /* get MRG file in gold-parsed NomBank engine...we must use the gold NomBank engine for marking stuff because
                 * the automatic version won't know about all of the markables due to syntactic parse errors that prevent some
                 * propositions from being included in the auto-parse propositions file */
                string goldMrgFile = _goldParsedNomBankEngine.GetFullMrgPath(mrgFile);

                foreach (NomBankNode token in nomBankParseTree.Tokens)
                {
                    int tokenNumber = token.TokenNumber;

                    if (_labelPredicates)
                    {
                        if (_goldParsedNomBankEngine.TokenIsMarkable(goldMrgFile, sentenceNumber, tokenNumber))
                        {
                            token.AddLabel(new NomBankNodeLabel(NomBankNodeLabel.NodeType.Predicate, 1), true);
                        }
                    }

                    if (_labelSupportVerbs)
                    {
                        if (_goldParsedNomBankEngine.TokenIsSupportVerb(goldMrgFile, sentenceNumber, tokenNumber))
                        {
                            token.AddLabel(new NomBankNodeLabel(NomBankNodeLabel.NodeType.Support, 1), true);
                        }
                    }
                }
            }

            return(nomBankParseTree);
        }
        /// <summary>
        /// Gets next training instance for models build over NomBank
        /// </summary>
        /// <returns>Next training instance</returns>
        public override ClassifiableEntity GetNextInstance()
        {
            // try to move to next node
            while (!_nodes.MoveNext())
            {
                NomBankEngine nomBankEngine = TreeBankEngine as NomBankEngine;

                // try to move to next NounInfo
                while (!MoveToNextValidNounInfo(ref _nounInfo))
                {
                    // try to move to next noun...if there are none we're done
                    if (!_nouns.MoveNext())
                    {
                        return(null);
                    }

                    // start before first NounInfo for current noun
                    _nounInfo = nomBankEngine.GetNounInfo(_nouns.Current).GetEnumerator();
                }

                // filter all nodes in the tree, keeping the ones that pass
                NomBankNode root = nomBankEngine.GetNomBankTree(_nounInfo.Current);
                _filteredNodes.Clear();  // reuse node collection for better memory usage
                foreach (NomBankNode n in root.AllNodes)
                {
                    if (Filter(n))
                    {
                        _filteredNodes.Add(n);
                    }
                }

                _nodes = _filteredNodes.GetEnumerator();
            }

            return(_nodes.Current);
        }
예제 #5
0
        /// <summary>
        /// Gets bracketed text for this node
        /// </summary>
        /// <param name="options">Bracketing options</param>
        /// <returns>Bracketed text</returns>
        public string GetBracketedText(params BracketedOutputOptions[] options)
        {
            // get set of options
            Set <BracketedOutputOptions> optionsSet = new Set <BracketedOutputOptions>();

            if (options != null)
            {
                optionsSet.AddRange(options);
            }

            // bracketed text builder
            StringBuilder text = new StringBuilder();

            // whether or not we have bracketed this node
            bool bracketed = false;

            // whether or not we should prepend a space before adding any more text
            bool prependSpace = false;

            // get labels to bracket for this node
            List <NomBankNodeLabel> labelsToBracket = new List <NomBankNodeLabel>();
            bool dontBracketSupportVerbs            = optionsSet.Contains(BracketedOutputOptions.IgnoreSupportVerbs);

            foreach (NomBankNodeLabel label in _labels)
            {
                if (dontBracketSupportVerbs && label.Type == NomBankNodeLabel.NodeType.Support)
                {
                    continue;
                }
                else
                {
                    labelsToBracket.Add(label);
                }
            }

            // add bracketing with labels
            if (labelsToBracket.Count > 0)
            {
                text.Append("[");

                // multiple labels are separated by a slash
                bool prependSlash = false;
                foreach (NomBankNodeLabel label in labelsToBracket)
                {
                    if (prependSlash)
                    {
                        text.Append("/");
                    }

                    // get label string
                    text.Append(label.ToString(!optionsSet.Contains(BracketedOutputOptions.IgnoreArgumentFeatures),
                                               !optionsSet.Contains(BracketedOutputOptions.IgnoreHyphenIndexes)));

                    // add sense information to predicate labels
                    if (label.IsPredicate && optionsSet.Contains(BracketedOutputOptions.IncludePredicateFrame))
                    {
                        text.Append("." + Information.RoleSetId);
                    }

                    // add confidence score
                    if (!optionsSet.Contains(BracketedOutputOptions.IgnoreBracketProbabilities))
                    {
                        text.Append(" " + label.Confidence);
                    }

                    prependSlash = true;
                }

                bracketed = prependSpace = true;
            }

            // check for leaf
            if (IsLeaf)
            {
                text.Append((prependSpace ? " " : "") + SurfaceText);
            }
            // add bracketed text for each child
            else
            {
                IEnumerator <TreeBankNode> childEnum = Children;
                while (childEnum.MoveNext())
                {
                    NomBankNode child = childEnum.Current as NomBankNode;

                    if (!child.IsNullElement)
                    {
                        text.Append((prependSpace ? " " : "") + child.GetBracketedText(options));
                        prependSpace = true;
                    }
                }
            }

            // add closing bracket if we started one above
            if (bracketed)
            {
                text.Append("]");
            }

            return(text.ToString());
        }
예제 #6
0
 /// <summary>
 /// Constructor. WARNING:  this will accept an instance of any class derived from TreeBankNode (e.g., NomBankNode), but
 /// will return a NomBankNode with only TreeBankNode members instantiated. Co-index IDs and referents are lost.
 /// </summary>
 /// <param name="treeBankNode">TreeBankNode from which to construct this NomBankNode</param>
 /// <param name="parent">Parent of this NomBankNode node</param>
 protected NomBankNode(TreeBankNode treeBankNode, NomBankNode parent)
     : base(treeBankNode, parent, new TreeBankNodeConstructor(NomBankChildConstructor))
 {
     _labels = new Set <NomBankNodeLabel>();
     _labeledNodeCollections = new List <NomBankLabeledNodeCollection>();
 }
예제 #7
0
        /// <summary>
        /// Gets whether or not the current tree has the same argument labeling as another tree
        /// </summary>
        /// <param name="otherTree">Tree to compare the current one to</param>
        /// <param name="ignoreNullNodes">Whether or not to ignore null nodes</param>
        /// <param name="labelsToCheck">Node labels to check</param>
        /// <returns>True if argument labelings are the same, false otherwise</returns>
        public bool HasSameLabelingAs(NomBankNode otherTree, bool ignoreNullNodes, NomBankNodeLabel[] labelsToCheck)
        {
            if (!IsRoot)
            {
                throw new Exception("Non-root node");
            }

            if (!otherTree.IsRoot)
            {
                throw new Exception("Non-root node");
            }

            // check all node labels
            foreach (NomBankNodeLabel label in labelsToCheck)
            {
                List <NomBankNode> nodes1 = GetDescendants(label);
                if (ignoreNullNodes)
                {
                    for (int i = 0; i < nodes1.Count;)
                    {
                        if (nodes1[i].IsNullElement)
                        {
                            nodes1.RemoveAt(i);
                        }
                        else
                        {
                            ++i;
                        }
                    }
                }

                List <NomBankNode> nodes2 = otherTree.GetDescendants(label);
                if (ignoreNullNodes)
                {
                    for (int i = 0; i < nodes2.Count;)
                    {
                        if (nodes2[i].IsNullElement)
                        {
                            nodes2.RemoveAt(i);
                        }
                        else
                        {
                            ++i;
                        }
                    }
                }

                if (nodes1.Count != nodes2.Count)
                {
                    return(false);
                }

                // check current nodes against the other nodes
                foreach (NomBankNode node1 in nodes1)
                {
                    bool matched = false;
                    foreach (NomBankNode node2 in nodes2)
                    {
                        if (node1.CoversSameTokensAs(node2))
                        {
                            matched = true;
                            break;
                        }
                    }

                    if (!matched)
                    {
                        return(false);
                    }
                }

                // check the other nodes against the current ones
                foreach (NomBankNode node2 in nodes2)
                {
                    bool matched = false;
                    foreach (NomBankNode node1 in nodes1)
                    {
                        if (node2.CoversSameTokensAs(node1))
                        {
                            matched = true;
                            break;
                        }
                    }

                    if (!matched)
                    {
                        return(false);
                    }
                }
            }

            return(true);
        }
예제 #8
0
        /// <summary>
        /// Marks argument nodes from the current node in the corresponding parse from a different TreeBank. This is used when
        /// transferring NomBank annotations to parse trees other than those distributed in the TreeBank (e.g., those produced
        /// by an automatic syntactic parser).
        /// </summary>
        /// <param name="treeBankEngine">Initialized TreeBank engine from which to pull the parse tree to mark NomBank arguments within</param>
        /// <returns>NomBank node, or null if all arguments couldn't be minimally transferred to the other parse tree. An argument
        /// is minimally transferred if the corresponding node in the other parse tree subsumes precisely the same text as the node
        /// in the current parse tree. Sometimes this is not possible due to parse errors.</returns>
        public NomBankNode MarkArgumentNodesIn(TreeBankEngine treeBankEngine)
        {
            // make sure we're marking arguments using a root node
            if (!IsRoot)
            {
                throw new Exception("Must pass root node");
            }

            // get mrg file in other tree bank
            string treeBankMrgFile = treeBankEngine.GetFullMrgPath(MrgFile);

            // need a NomBank root to mark arguments within
            NomBankNode nbRoot = new NomBankNode(treeBankEngine.GetParseTree(treeBankMrgFile, SentenceNumber));

            // make sure we got the right sentence
            if (nbRoot.SurfaceText != SurfaceText)
            {
                throw new Exception("Failed to get same parse tree");
            }

            // Add information to root. Ignore leaf number and argument information - we'll set them at the end.
            treeBankMrgFile = treeBankMrgFile.Substring(treeBankEngine.MrgPath.Length).Trim(Path.DirectorySeparatorChar);
            NounInfo currInfo = Information;

            nbRoot.Information = new NounInfo(currInfo.Noun, treeBankMrgFile, currInfo.SentenceNumber, -1, currInfo.RoleSetId, "");

            // transfer all argument node lists
            foreach (NomBankLabeledNodeCollection corefList in LabeledNodeCollections)
            {
                // new node list
                NomBankLabeledNodeCollection otherCorefList = new NomBankLabeledNodeCollection(corefList.Label.Copy());

                // get single nodes
                foreach (NomBankNode singleNode in corefList.SingleNodes)
                {
                    if (!singleNode.IsNullElement)
                    {
                        // get argument node from other parse tree
                        NomBankNode otherArgNode = nbRoot.GetMinimallySubsumingNode(singleNode.FirstToken, singleNode.LastToken) as NomBankNode;
                        if (otherArgNode == null)
                        {
                            return(null);
                        }

                        otherCorefList.AddSingleNode(otherArgNode);
                    }
                }

                // get split arguments
                foreach (List <TreeBankNode> splitNode in corefList.SplitNodes)
                {
                    List <TreeBankNode> otherSplitArg = new List <TreeBankNode>();

                    // get each node in the split argument
                    foreach (NomBankNode node in splitNode)
                    {
                        if (!node.IsNullElement)
                        {
                            // get split node in other tree
                            NomBankNode otherSplitArgNode = nbRoot.GetMinimallySubsumingNode(node.FirstToken, node.LastToken) as NomBankNode;
                            if (otherSplitArgNode == null)
                            {
                                return(null);
                            }

                            otherSplitArg.Add(otherSplitArgNode);
                        }
                    }

                    // if only one node of the split arg was non-null, at that node as a single
                    if (otherSplitArg.Count == 1)
                    {
                        otherCorefList.AddSingleNode(otherSplitArg.First());
                    }
                    // otherwise, add the split arg normally
                    else if (otherSplitArg.Count >= 2)
                    {
                        otherCorefList.AddSplitNode(otherSplitArg);
                    }
                }

                // make sure all hyphen indexes were applied
                if (otherCorefList.Label.HyphenIndexes.Count != otherCorefList.AppliedIndexes.Count)
                {
                    throw new Exception("Not all hyphen indexes were applied");
                }

                // add coref list if we found non-null nodes
                if (otherCorefList.SingleNodes.Count > 0 || otherCorefList.SplitNodes.Count > 0)
                {
                    nbRoot.LabeledNodeCollections.Add(otherCorefList);
                }
            }

            // return null if we didn't find any argument node lists with non-null nodes
            if (nbRoot.LabeledNodeCollections.Count == 0)
            {
                return(null);
            }

            // set leaf number and argument locations in the information object
            nbRoot.Information.LeafNumber           = nbRoot.PredicateNode.Leaves[0].LeafNumber;
            nbRoot.Information.LabeledNodeLocations = nbRoot.LabeledNodeLocations;

            return(nbRoot);
        }