Exemplo n.º 1
0
 /// <summary>
 /// Constructor. WARNING:  this will accept an instance of any class derived from TreeBankNode (e.g., PropBankNode), but
 /// will return a PropBankNode with only TreeBankNode members instantiated.
 /// </summary>
 /// <param name="treeBankNode">TreeBankNode from which to construct this PropBankNode</param>
 /// <param name="parent">Parent of this PropBank node</param>
 protected PropBankNode(TreeBankNode treeBankNode, PropBankNode parent)
     : base(treeBankNode, parent, new TreeBankNodeConstructor(PropBankChildConstructor))
 {
     _label                  = null;
     _information            = null;
     _labeledNodeCollections = new List <PropBankLabeledNodeCollection>();
 }
Exemplo n.º 2
0
        /// <summary>
        /// Gets bracketed text for this node
        /// </summary>
        /// <param name="options">Bracketing options</param>
        public string GetBracketedText(params BracketedOutputOptions[] options)
        {
            // get set of options
            Set <BracketedOutputOptions> optionsSet = new Set <BracketedOutputOptions>();

            if (options != null)
            {
                optionsSet.AddRange(options);
            }

            StringBuilder text = new StringBuilder();

            bool bracketed    = false;
            bool prependSpace = false;

            // add label if we have one
            if (_label != null)
            {
                text.Append("[" + _label.ToString(true));

                // append predicate sense if needed
                if (_label.IsPredicate && optionsSet.Contains(BracketedOutputOptions.IncludePredicateFrame))
                {
                    text.Append(Information.RoleSetId);
                }

                // add label probability if needed
                if (!optionsSet.Contains(BracketedOutputOptions.IgnoreBracketProbabilities))
                {
                    text.Append(" " + _label.Confidence);
                }

                bracketed = prependSpace = true;
            }

            // check for leaf
            if (IsLeaf)
            {
                text.Append((prependSpace ? " " : "") + SurfaceText);
            }
            // add children
            else
            {
                IEnumerator <TreeBankNode> children = Children;
                while (children.MoveNext())
                {
                    PropBankNode child = children.Current as PropBankNode;
                    if (!child.IsNullElement)
                    {
                        text.Append((prependSpace ? " " : "") + child.GetBracketedText(options));
                        prependSpace = true;
                    }
                }
            }

            string bracketedText = text.ToString() + (bracketed ? "]" : "");

            return(bracketedText);
        }
        /// <summary>
        /// Adds a single node to this list
        /// </summary>
        /// <param name="singleNode">Single node to add</param>
        public override void AddSingleNode(TreeBankNode singleNode)
        {
            // apply label to node
            PropBankNode propBankNode = singleNode as PropBankNode;

            propBankNode.SetLabel(_label, false);

            base.AddSingleNode(propBankNode);
        }
Exemplo n.º 4
0
        /// <summary>
        /// Gets next training instance for models build over PropBank
        /// </summary>
        /// <returns>Next training instance</returns>
        public override ClassifiableEntity GetNextInstance()
        {
            // try to move to next node
            while (!_nodeEnum.MoveNext())
            {
                PropBankEngine propBankEngine = TreeBankEngine as PropBankEngine;

                // try to move to next VerbInfo
                while (!MoveToNextValidVerbInfo(ref _verbInfoEnum))
                {
                    // try to move to next verb...if there are none we're done
                    if (!_verbEnum.MoveNext())
                    {
                        return(null);
                    }

                    // start before first VerbInfo for current verb
                    _verbInfoEnum = propBankEngine.GetVerbInfo(_verbEnum.Current).GetEnumerator();
                }

                // filter all nodes in the tree, keeping the good ones
                PropBankNode        root          = propBankEngine.GetPropBankTree(_verbInfoEnum.Current);
                List <PropBankNode> filteredNodes = new List <PropBankNode>();
                foreach (PropBankNode n in root.AllNodes)
                {
                    if (Filter(n))
                    {
                        filteredNodes.Add(n);
                    }
                }

                _nodeEnum = filteredNodes.GetEnumerator();
            }

            return(_nodeEnum.Current);
        }
Exemplo n.º 5
0
        /// <summary>
        /// Gets full bracketing of a sentence
        /// </summary>
        /// <param name="mrgFile">MRG file to get sentence from</param>
        /// <param name="sentNum">Sentence number</param>
        /// <returns>Full bracketing</returns>
        public string GetFullBracketing(string mrgFile, int sentNum)
        {
            // make sure we have predicates for the sentence...if we don't, simply return the surface text if needed
            List <VerbInfo> sentVerbInfo;

            if (!TryGetVerbInfoForSentence(mrgFile, sentNum, out sentVerbInfo))
            {
                return(GetParseTree(mrgFile, sentNum).SurfaceText);
            }

            // lists of tokens and labels for each token across all predicates
            List <string> tokens = null;
            Dictionary <int, List <string> > tokenLabels = null;

            // read each predicate for the sentence
            foreach (VerbInfo vi in sentVerbInfo)
            {
                PropBankNode predTree = GetPropBankTree(vi);

                // init lists on first predicate
                if (tokens == null)
                {
                    tokens      = new List <string>();
                    tokenLabels = new Dictionary <int, List <string> >();

                    foreach (TreeBankNode token in predTree.Tokens)
                    {
                        tokens.Add(token.SurfaceText);
                        tokenLabels.Add(token.TokenNumber, new List <string>());
                    }
                }
                // sanity check on number of tokens
                else if (tokenLabels.Count != predTree.Tokens.Length)
                {
                    throw new Exception("Token count mismatch");
                }

                // collect labels for tokens w.r.t. current predicate
                foreach (PropBankNode token in predTree.Tokens)
                {
                    // get labels from current token as well as all ancestors
                    List <PropBankNodeLabel> labels = new List <PropBankNodeLabel>();
                    foreach (PropBankNode node in token.Ancestors)
                    {
                        if (node.Label != null)
                        {
                            labels.Add(node.Label);
                        }
                    }

                    // build label for token
                    string tokenLabel = "";
                    foreach (PropBankNodeLabel label in labels)
                    {
                        string typeStr = label.Type.ToString();

                        if (label.IsArgument)
                        {
                            tokenLabel += typeStr[0].ToString() + typeStr[typeStr.Length - 1].ToString() + ",";
                        }
                        else if (label.IsModifier)
                        {
                            tokenLabel += typeStr[0] + "-" + label.Feature.ToString() + ",";
                        }
                        else if (label.IsPredicate)
                        {
                            tokenLabel += "Predicate,";
                        }
                    }

                    tokenLabels[token.TokenNumber].Add(tokenLabel.Trim(','));
                }
            }

            // build format string
            string format = "";

            for (int tokenNum = 0; tokenNum < tokens.Count; ++tokenNum)
            {
                // get maximum label length
                int maxLabelLen = tokens[tokenNum].Length;
                foreach (string label in tokenLabels[tokenNum])
                {
                    if (label.Length > maxLabelLen)
                    {
                        maxLabelLen = label.Length;
                    }
                }

                format += "{" + tokenNum + "," + maxLabelLen + "}|";
            }

            format = format.Trim('|');

            // start with surface text
            StringBuilder bracketedText = new StringBuilder(string.Format(format, tokens.ToArray()));

            // only add newline if there is another line
            int numPreds = sentVerbInfo.Count;

            if (numPreds > 0)
            {
                bracketedText.Append(Environment.NewLine);
            }

            // add each predicate line
            for (int lineNum = 0; lineNum < numPreds; ++lineNum)
            {
                List <string> lineLabels = new List <string>();
                foreach (int tokenNum in tokenLabels.Keys)
                {
                    lineLabels.Add(tokenLabels[tokenNum][lineNum]);
                }

                // append bracketed line
                bracketedText.Append(string.Format(format, lineLabels.ToArray()));

                // only add newline if there is another line
                if (lineNum < numPreds - 1)
                {
                    bracketedText.Append(Environment.NewLine);
                }
            }

            return(bracketedText.ToString());
        }
Exemplo n.º 6
0
        /// <summary>
        /// Gets a predicate tree for a PropBank propositions entry
        /// </summary>
        /// <param name="vi">VerbInfo specifying tree to look up</param>
        /// <returns>PropBankNode</returns>
        public PropBankNode GetPropBankTree(VerbInfo vi)
        {
            TreeBankNode parse    = GetParseTree(vi.File, vi.SentenceNumber);
            PropBankNode predTree = new PropBankNode(parse);

            predTree.Information = vi;

            // label information is space-delimited
            string[] labels = vi.LabeledNodeLocations.Split(' ');
            foreach (string label in labels)
            {
                // label columns are dash-delimited
                string[] labelCols = label.Split('-');

                // get label type
                PropBankNodeLabel.NodeType labelType = PropBankNodeLabel.GetNodeType(labelCols[1]);

                // get label feature if any
                PropBankNodeLabel.NodeFeature labelFeature = PropBankNodeLabel.NodeFeature.None;
                if (labelCols.Length > 2)
                {
                    // sometimes the feature is the actual preposition, so this might fail
                    string featureStr = labelCols[2];
                    if (!PropBankNodeLabel.TryGetNodeFeature(featureStr, out labelFeature))
                    {
                        // use PRP as the feature, which we have added for this case
                        featureStr   = "PRP";
                        labelFeature = PropBankNodeLabel.GetNodeFeature(featureStr);
                    }

                    if (labelCols.Length > 3)
                    {
                        throw new Exception("Missed feature");
                    }
                }

                // create new labeled node collection
                PropBankLabeledNodeCollection labeledNodes = new PropBankLabeledNodeCollection(new PropBankNodeLabel(labelType, labelFeature, 1));
                AddNodesToCollection(predTree, labelCols[0], labeledNodes);

                // add to root's list of nodes
                predTree.LabeledNodeCollections.Add(labeledNodes);
            }

            // make sure one of the predicate leaves has the leaf number from the propositions file entry
            bool foundMatch = false;

            foreach (PropBankNode predicateNode in predTree.PredicateNodes)
            {
                foreach (PropBankNode leaf in predicateNode.Leaves)
                {
                    if (leaf.LeafNumber == vi.LeafNumber)
                    {
                        foundMatch = true;
                        break;
                    }
                }

                if (foundMatch)
                {
                    break;
                }
            }

            if (!foundMatch)
            {
                throw new Exception("Mismatch between VerbInfo predicate leaf number and actual predicate leaf number");
            }

            return(predTree);
        }
Exemplo n.º 7
0
        /// <summary>
        /// Builds the mapping from verbs to their morphological variants. Variants are identified by looking
        /// at the marked predicates throughout PropBank. For example, the verb "join" will have many different surface
        /// realizations in the TreeBank:  "joins", "joined", etc.
        /// </summary>
        private void BuildMorphMap()
        {
            if (File.Exists(MorphMapIndexPath))
            {
                // load verb morphs from saved file
                StreamReader file = new StreamReader(MorphMapIndexPath);
                _verbMorphs = new Dictionary <string, Set <string> >(int.Parse(file.ReadLine()));
                string line;
                while (file.TryReadLine(out line))
                {
                    _verbMorphs.Add(line, new Set <string>(file.ReadLine().Split('|')));
                }
            }
            else
            {
                // process all verbs
                _verbMorphs = new Dictionary <string, Set <string> >();
                foreach (string verb in AllVerbs)
                {
                    // get each predicate tree, checking for morphological variants of the verb
                    Set <string> variants = new Set <string>(false);
                    foreach (VerbInfo vi in GetVerbInfo(verb))
                    {
                        PropBankNode predicateTree = GetPropBankTree(vi);

                        // get variant
                        StringBuilder variantBuilder = new StringBuilder();
                        foreach (PropBankNode predNode in predicateTree.PredicateNodes)
                        {
                            variantBuilder.Append((variantBuilder.Length > 0 ? " " : "") + predNode.SurfaceText);
                        }

                        // normalize case
                        string variant = variantBuilder.ToString().ToLower();

                        // add if we found a variant
                        if (variant != verb)
                        {
                            variants.Add(variant);
                        }
                    }

                    // add variants if we found any
                    if (variants.Count > 0)
                    {
                        _verbMorphs.Add(verb, variants);
                    }
                }

                // save verb morphs collection
                _verbMorphs.Save(MorphMapIndexPath, x => x, x =>
                {
                    StringBuilder morphBuilder = new StringBuilder();
                    foreach (string morph in x)
                    {
                        morph.Disallow('|');
                        morphBuilder.Append((morphBuilder.Length > 0 ? "|" : "") + morph);
                    }

                    return(morphBuilder.ToString());
                }, DictionaryExtensions.Sort.None, false, true, "", Environment.NewLine, true);
            }

            // map variants back to base verbs
            _morphVerb = new Dictionary <string, string>();
            foreach (string verb in _verbMorphs.Keys)
            {
                foreach (string morph in _verbMorphs[verb])
                {
                    // make sure morphs have a single base verb
                    string currBase;
                    if (!_morphVerb.TryGetValue(morph, out currBase))
                    {
                        _morphVerb.Add(morph, verb);
                    }
                    else if (currBase != verb)
                    {
                        throw new Exception("Multiple base verbs");
                    }
                }
            }
        }
Exemplo n.º 8
0
        /// <summary>
        /// Gets whether or not the current tree has the same argument labeling as another tree
        /// </summary>
        /// <param name="otherTree">Tree to compare the current one to</param>
        /// <param name="ignoreNullNodes">Whether or not to ignore null nodes</param>
        /// <param name="argumentTypesToCheck">Argument types to check</param>
        /// <returns>True if argument labelings are the same, false otherwise</returns>
        public bool HasSameLabelingAs(PropBankNode otherTree, bool ignoreNullNodes, params PropBankNodeLabel.NodeType[] argumentTypesToCheck)
        {
            if (!IsRoot)
            {
                throw new Exception("Non-root node");
            }

            if (!otherTree.IsRoot)
            {
                throw new Exception("Non-root node");
            }

            // check all node types
            foreach (PropBankNodeLabel.NodeType nodeType in argumentTypesToCheck)
            {
                // get list of nodes in the current tree of the current type
                List <PropBankNode> nodes1 = GetDescendants(nodeType);
                if (ignoreNullNodes)
                {
                    for (int i = 0; i < nodes1.Count;)
                    {
                        if (nodes1[i].IsNullElement)
                        {
                            nodes1.RemoveAt(i);
                        }
                        else
                        {
                            ++i;
                        }
                    }
                }

                // get list of nodes in the current tree of the current type
                List <PropBankNode> nodes2 = otherTree.GetDescendants(nodeType);
                if (ignoreNullNodes)
                {
                    for (int i = 0; i < nodes2.Count;)
                    {
                        if (nodes2[i].IsNullElement)
                        {
                            nodes2.RemoveAt(i);
                        }
                        else
                        {
                            ++i;
                        }
                    }
                }

                if (nodes1.Count != nodes2.Count)
                {
                    return(false);
                }

                // check current nodes against the other nodes
                foreach (PropBankNode node1 in nodes1)
                {
                    bool matched = false;
                    foreach (PropBankNode node2 in nodes2)
                    {
                        if (node1.CoversSameTokensAs(node2))
                        {
                            matched = true;
                            break;
                        }
                    }

                    if (!matched)
                    {
                        return(false);
                    }
                }

                // check the other nodes against the current ones
                foreach (PropBankNode node2 in nodes2)
                {
                    bool matched = false;
                    foreach (PropBankNode node1 in nodes1)
                    {
                        if (node2.CoversSameTokensAs(node1))
                        {
                            matched = true;
                            break;
                        }
                    }

                    if (!matched)
                    {
                        return(false);
                    }
                }
            }

            return(true);
        }
Exemplo n.º 9
0
        /// <summary>
        /// Marks argument nodes from the current node in the corresponding parse from a different TreeBank. This is used when
        /// transferring PropBank annotations to parse trees other than those distributed in the TreeBank (e.g., those produced
        /// by an automatic syntactic parser).
        /// </summary>
        /// <param name="treeBankEngine">Initialized TreeBank engine from which to pull the parse tree to mark PropBank arguments within</param>
        /// <returns>PropBank node, or null if all arguments couldn't be minimally transferred to the other parse tree. An argument
        /// is minimally transferred if the corresponding node in the other parse tree subsumes precisely the same text as the node in the
        /// current parse tree. Sometimes this is not possible due to parse errors.</returns>
        public PropBankNode MarkArgumentNodesIn(TreeBankEngine treeBankEngine)
        {
            if (!IsRoot)
            {
                throw new Exception("Attempted to transform non-root node");
            }

            // get mrg file in other tree bank
            string treeBankMrgFile = treeBankEngine.GetFullMrgPath(MrgFile.Substring(MrgFile.LastIndexOf(Path.DirectorySeparatorChar) + 1));

            // need a PropBank root to mark arguments within
            PropBankNode pbRoot = new PropBankNode(treeBankEngine.GetParseTree(treeBankMrgFile, SentenceNumber));

            // make sure we got the right sentence
            if (pbRoot.SurfaceText != SurfaceText)
            {
                throw new Exception("Failed to convert root to Charniak-parsed version");
            }

            // Add information to root. Ignore leaf number and argument info for now - we'll set them at the end.
            treeBankMrgFile = treeBankMrgFile.Substring(treeBankEngine.MrgPath.Length);
            VerbInfo pbInfo = Information;

            pbRoot.Information = new VerbInfo(pbInfo.Verb, treeBankMrgFile, pbInfo.SentenceNumber,
                                              -1, pbInfo.Tagger, pbInfo.RoleSetId,
                                              pbInfo.VForm, pbInfo.VTense, pbInfo.VAspect,
                                              pbInfo.VPerson, pbInfo.VVoice, "");

            // transfer all argument node lists
            foreach (PropBankLabeledNodeCollection nodeCollection in LabeledNodeCollections)
            {
                // new node collection
                PropBankLabeledNodeCollection otherNodeCollection = new PropBankLabeledNodeCollection(new PropBankNodeLabel(nodeCollection.Label.Type, nodeCollection.Label.Feature, nodeCollection.Label.Confidence));

                // get single nodes
                foreach (PropBankNode singleNode in nodeCollection.SingleNodes)
                {
                    if (!singleNode.IsNullElement)
                    {
                        // get argument node from other parse tree
                        PropBankNode otherArgNode = (PropBankNode)pbRoot.GetMinimallySubsumingNode(singleNode.FirstToken, singleNode.LastToken);
                        if (otherArgNode == null)
                        {
                            return(null);
                        }

                        otherNodeCollection.AddSingleNode(otherArgNode);
                    }
                }

                // get split arguments
                foreach (List <TreeBankNode> splitArg in nodeCollection.SplitNodes)
                {
                    List <TreeBankNode> otherSplitArg = new List <TreeBankNode>();

                    // get each node in the split argument
                    foreach (PropBankNode splitArgNode in splitArg)
                    {
                        if (!splitArgNode.IsNullElement)
                        {
                            // get split node in other tree
                            PropBankNode otherSplitArgNode = (PropBankNode)pbRoot.GetMinimallySubsumingNode(splitArgNode.FirstToken, splitArgNode.LastToken);
                            if (otherSplitArgNode == null)
                            {
                                return(null);
                            }

                            otherSplitArg.Add(otherSplitArgNode);
                        }
                    }

                    // if only one node of the split arg was non-null, at that node as a single
                    if (otherSplitArg.Count == 1)
                    {
                        otherNodeCollection.AddSingleNode(otherSplitArg.First());
                    }
                    // otherwise, add the split arg normally
                    else if (otherSplitArg.Count >= 2)
                    {
                        otherNodeCollection.AddSplitNode(otherSplitArg);
                    }
                }

                // add coref list if we found non-null nodes
                if (otherNodeCollection.SingleNodes.Count > 0 || otherNodeCollection.SplitNodes.Count > 0)
                {
                    pbRoot.LabeledNodeCollections.Add(otherNodeCollection);
                }
            }

            // return null if we didn't find any argument node lists with non-null nodes
            if (pbRoot.LabeledNodeCollections.Count == 0)
            {
                return(null);
            }

            // set leaf number and argument information
            pbRoot.Information.LeafNumber           = pbRoot.PredicateNodes.First().Leaves[0].LeafNumber;
            pbRoot.Information.LabeledNodeLocations = pbRoot.LabeledNodeLocations;

            return(pbRoot);
        }