/// <summary> /// Constructor. WARNING: this will accept an instance of any class derived from TreeBankNode (e.g., PropBankNode), but /// will return a PropBankNode with only TreeBankNode members instantiated. /// </summary> /// <param name="treeBankNode">TreeBankNode from which to construct this PropBankNode</param> /// <param name="parent">Parent of this PropBank node</param> protected PropBankNode(TreeBankNode treeBankNode, PropBankNode parent) : base(treeBankNode, parent, new TreeBankNodeConstructor(PropBankChildConstructor)) { _label = null; _information = null; _labeledNodeCollections = new List <PropBankLabeledNodeCollection>(); }
/// <summary> /// Gets bracketed text for this node /// </summary> /// <param name="options">Bracketing options</param> public string GetBracketedText(params BracketedOutputOptions[] options) { // get set of options Set <BracketedOutputOptions> optionsSet = new Set <BracketedOutputOptions>(); if (options != null) { optionsSet.AddRange(options); } StringBuilder text = new StringBuilder(); bool bracketed = false; bool prependSpace = false; // add label if we have one if (_label != null) { text.Append("[" + _label.ToString(true)); // append predicate sense if needed if (_label.IsPredicate && optionsSet.Contains(BracketedOutputOptions.IncludePredicateFrame)) { text.Append(Information.RoleSetId); } // add label probability if needed if (!optionsSet.Contains(BracketedOutputOptions.IgnoreBracketProbabilities)) { text.Append(" " + _label.Confidence); } bracketed = prependSpace = true; } // check for leaf if (IsLeaf) { text.Append((prependSpace ? " " : "") + SurfaceText); } // add children else { IEnumerator <TreeBankNode> children = Children; while (children.MoveNext()) { PropBankNode child = children.Current as PropBankNode; if (!child.IsNullElement) { text.Append((prependSpace ? " " : "") + child.GetBracketedText(options)); prependSpace = true; } } } string bracketedText = text.ToString() + (bracketed ? "]" : ""); return(bracketedText); }
/// <summary> /// Adds a single node to this list /// </summary> /// <param name="singleNode">Single node to add</param> public override void AddSingleNode(TreeBankNode singleNode) { // apply label to node PropBankNode propBankNode = singleNode as PropBankNode; propBankNode.SetLabel(_label, false); base.AddSingleNode(propBankNode); }
/// <summary> /// Gets next training instance for models build over PropBank /// </summary> /// <returns>Next training instance</returns> public override ClassifiableEntity GetNextInstance() { // try to move to next node while (!_nodeEnum.MoveNext()) { PropBankEngine propBankEngine = TreeBankEngine as PropBankEngine; // try to move to next VerbInfo while (!MoveToNextValidVerbInfo(ref _verbInfoEnum)) { // try to move to next verb...if there are none we're done if (!_verbEnum.MoveNext()) { return(null); } // start before first VerbInfo for current verb _verbInfoEnum = propBankEngine.GetVerbInfo(_verbEnum.Current).GetEnumerator(); } // filter all nodes in the tree, keeping the good ones PropBankNode root = propBankEngine.GetPropBankTree(_verbInfoEnum.Current); List <PropBankNode> filteredNodes = new List <PropBankNode>(); foreach (PropBankNode n in root.AllNodes) { if (Filter(n)) { filteredNodes.Add(n); } } _nodeEnum = filteredNodes.GetEnumerator(); } return(_nodeEnum.Current); }
/// <summary> /// Gets full bracketing of a sentence /// </summary> /// <param name="mrgFile">MRG file to get sentence from</param> /// <param name="sentNum">Sentence number</param> /// <returns>Full bracketing</returns> public string GetFullBracketing(string mrgFile, int sentNum) { // make sure we have predicates for the sentence...if we don't, simply return the surface text if needed List <VerbInfo> sentVerbInfo; if (!TryGetVerbInfoForSentence(mrgFile, sentNum, out sentVerbInfo)) { return(GetParseTree(mrgFile, sentNum).SurfaceText); } // lists of tokens and labels for each token across all predicates List <string> tokens = null; Dictionary <int, List <string> > tokenLabels = null; // read each predicate for the sentence foreach (VerbInfo vi in sentVerbInfo) { PropBankNode predTree = GetPropBankTree(vi); // init lists on first predicate if (tokens == null) { tokens = new List <string>(); tokenLabels = new Dictionary <int, List <string> >(); foreach (TreeBankNode token in predTree.Tokens) { tokens.Add(token.SurfaceText); tokenLabels.Add(token.TokenNumber, new List <string>()); } } // sanity check on number of tokens else if (tokenLabels.Count != predTree.Tokens.Length) { throw new Exception("Token count mismatch"); } // collect labels for tokens w.r.t. current predicate foreach (PropBankNode token in predTree.Tokens) { // get labels from current token as well as all ancestors List <PropBankNodeLabel> labels = new List <PropBankNodeLabel>(); foreach (PropBankNode node in token.Ancestors) { if (node.Label != null) { labels.Add(node.Label); } } // build label for token string tokenLabel = ""; foreach (PropBankNodeLabel label in labels) { string typeStr = label.Type.ToString(); if (label.IsArgument) { tokenLabel += typeStr[0].ToString() + typeStr[typeStr.Length - 1].ToString() + ","; } else if (label.IsModifier) { tokenLabel += typeStr[0] + "-" + label.Feature.ToString() + ","; } else if (label.IsPredicate) { tokenLabel += "Predicate,"; } } tokenLabels[token.TokenNumber].Add(tokenLabel.Trim(',')); } } // build format string string format = ""; for (int tokenNum = 0; tokenNum < tokens.Count; ++tokenNum) { // get maximum label length int maxLabelLen = tokens[tokenNum].Length; foreach (string label in tokenLabels[tokenNum]) { if (label.Length > maxLabelLen) { maxLabelLen = label.Length; } } format += "{" + tokenNum + "," + maxLabelLen + "}|"; } format = format.Trim('|'); // start with surface text StringBuilder bracketedText = new StringBuilder(string.Format(format, tokens.ToArray())); // only add newline if there is another line int numPreds = sentVerbInfo.Count; if (numPreds > 0) { bracketedText.Append(Environment.NewLine); } // add each predicate line for (int lineNum = 0; lineNum < numPreds; ++lineNum) { List <string> lineLabels = new List <string>(); foreach (int tokenNum in tokenLabels.Keys) { lineLabels.Add(tokenLabels[tokenNum][lineNum]); } // append bracketed line bracketedText.Append(string.Format(format, lineLabels.ToArray())); // only add newline if there is another line if (lineNum < numPreds - 1) { bracketedText.Append(Environment.NewLine); } } return(bracketedText.ToString()); }
/// <summary> /// Gets a predicate tree for a PropBank propositions entry /// </summary> /// <param name="vi">VerbInfo specifying tree to look up</param> /// <returns>PropBankNode</returns> public PropBankNode GetPropBankTree(VerbInfo vi) { TreeBankNode parse = GetParseTree(vi.File, vi.SentenceNumber); PropBankNode predTree = new PropBankNode(parse); predTree.Information = vi; // label information is space-delimited string[] labels = vi.LabeledNodeLocations.Split(' '); foreach (string label in labels) { // label columns are dash-delimited string[] labelCols = label.Split('-'); // get label type PropBankNodeLabel.NodeType labelType = PropBankNodeLabel.GetNodeType(labelCols[1]); // get label feature if any PropBankNodeLabel.NodeFeature labelFeature = PropBankNodeLabel.NodeFeature.None; if (labelCols.Length > 2) { // sometimes the feature is the actual preposition, so this might fail string featureStr = labelCols[2]; if (!PropBankNodeLabel.TryGetNodeFeature(featureStr, out labelFeature)) { // use PRP as the feature, which we have added for this case featureStr = "PRP"; labelFeature = PropBankNodeLabel.GetNodeFeature(featureStr); } if (labelCols.Length > 3) { throw new Exception("Missed feature"); } } // create new labeled node collection PropBankLabeledNodeCollection labeledNodes = new PropBankLabeledNodeCollection(new PropBankNodeLabel(labelType, labelFeature, 1)); AddNodesToCollection(predTree, labelCols[0], labeledNodes); // add to root's list of nodes predTree.LabeledNodeCollections.Add(labeledNodes); } // make sure one of the predicate leaves has the leaf number from the propositions file entry bool foundMatch = false; foreach (PropBankNode predicateNode in predTree.PredicateNodes) { foreach (PropBankNode leaf in predicateNode.Leaves) { if (leaf.LeafNumber == vi.LeafNumber) { foundMatch = true; break; } } if (foundMatch) { break; } } if (!foundMatch) { throw new Exception("Mismatch between VerbInfo predicate leaf number and actual predicate leaf number"); } return(predTree); }
/// <summary> /// Builds the mapping from verbs to their morphological variants. Variants are identified by looking /// at the marked predicates throughout PropBank. For example, the verb "join" will have many different surface /// realizations in the TreeBank: "joins", "joined", etc. /// </summary> private void BuildMorphMap() { if (File.Exists(MorphMapIndexPath)) { // load verb morphs from saved file StreamReader file = new StreamReader(MorphMapIndexPath); _verbMorphs = new Dictionary <string, Set <string> >(int.Parse(file.ReadLine())); string line; while (file.TryReadLine(out line)) { _verbMorphs.Add(line, new Set <string>(file.ReadLine().Split('|'))); } } else { // process all verbs _verbMorphs = new Dictionary <string, Set <string> >(); foreach (string verb in AllVerbs) { // get each predicate tree, checking for morphological variants of the verb Set <string> variants = new Set <string>(false); foreach (VerbInfo vi in GetVerbInfo(verb)) { PropBankNode predicateTree = GetPropBankTree(vi); // get variant StringBuilder variantBuilder = new StringBuilder(); foreach (PropBankNode predNode in predicateTree.PredicateNodes) { variantBuilder.Append((variantBuilder.Length > 0 ? " " : "") + predNode.SurfaceText); } // normalize case string variant = variantBuilder.ToString().ToLower(); // add if we found a variant if (variant != verb) { variants.Add(variant); } } // add variants if we found any if (variants.Count > 0) { _verbMorphs.Add(verb, variants); } } // save verb morphs collection _verbMorphs.Save(MorphMapIndexPath, x => x, x => { StringBuilder morphBuilder = new StringBuilder(); foreach (string morph in x) { morph.Disallow('|'); morphBuilder.Append((morphBuilder.Length > 0 ? "|" : "") + morph); } return(morphBuilder.ToString()); }, DictionaryExtensions.Sort.None, false, true, "", Environment.NewLine, true); } // map variants back to base verbs _morphVerb = new Dictionary <string, string>(); foreach (string verb in _verbMorphs.Keys) { foreach (string morph in _verbMorphs[verb]) { // make sure morphs have a single base verb string currBase; if (!_morphVerb.TryGetValue(morph, out currBase)) { _morphVerb.Add(morph, verb); } else if (currBase != verb) { throw new Exception("Multiple base verbs"); } } } }
/// <summary> /// Gets whether or not the current tree has the same argument labeling as another tree /// </summary> /// <param name="otherTree">Tree to compare the current one to</param> /// <param name="ignoreNullNodes">Whether or not to ignore null nodes</param> /// <param name="argumentTypesToCheck">Argument types to check</param> /// <returns>True if argument labelings are the same, false otherwise</returns> public bool HasSameLabelingAs(PropBankNode otherTree, bool ignoreNullNodes, params PropBankNodeLabel.NodeType[] argumentTypesToCheck) { if (!IsRoot) { throw new Exception("Non-root node"); } if (!otherTree.IsRoot) { throw new Exception("Non-root node"); } // check all node types foreach (PropBankNodeLabel.NodeType nodeType in argumentTypesToCheck) { // get list of nodes in the current tree of the current type List <PropBankNode> nodes1 = GetDescendants(nodeType); if (ignoreNullNodes) { for (int i = 0; i < nodes1.Count;) { if (nodes1[i].IsNullElement) { nodes1.RemoveAt(i); } else { ++i; } } } // get list of nodes in the current tree of the current type List <PropBankNode> nodes2 = otherTree.GetDescendants(nodeType); if (ignoreNullNodes) { for (int i = 0; i < nodes2.Count;) { if (nodes2[i].IsNullElement) { nodes2.RemoveAt(i); } else { ++i; } } } if (nodes1.Count != nodes2.Count) { return(false); } // check current nodes against the other nodes foreach (PropBankNode node1 in nodes1) { bool matched = false; foreach (PropBankNode node2 in nodes2) { if (node1.CoversSameTokensAs(node2)) { matched = true; break; } } if (!matched) { return(false); } } // check the other nodes against the current ones foreach (PropBankNode node2 in nodes2) { bool matched = false; foreach (PropBankNode node1 in nodes1) { if (node2.CoversSameTokensAs(node1)) { matched = true; break; } } if (!matched) { return(false); } } } return(true); }
/// <summary> /// Marks argument nodes from the current node in the corresponding parse from a different TreeBank. This is used when /// transferring PropBank annotations to parse trees other than those distributed in the TreeBank (e.g., those produced /// by an automatic syntactic parser). /// </summary> /// <param name="treeBankEngine">Initialized TreeBank engine from which to pull the parse tree to mark PropBank arguments within</param> /// <returns>PropBank node, or null if all arguments couldn't be minimally transferred to the other parse tree. An argument /// is minimally transferred if the corresponding node in the other parse tree subsumes precisely the same text as the node in the /// current parse tree. Sometimes this is not possible due to parse errors.</returns> public PropBankNode MarkArgumentNodesIn(TreeBankEngine treeBankEngine) { if (!IsRoot) { throw new Exception("Attempted to transform non-root node"); } // get mrg file in other tree bank string treeBankMrgFile = treeBankEngine.GetFullMrgPath(MrgFile.Substring(MrgFile.LastIndexOf(Path.DirectorySeparatorChar) + 1)); // need a PropBank root to mark arguments within PropBankNode pbRoot = new PropBankNode(treeBankEngine.GetParseTree(treeBankMrgFile, SentenceNumber)); // make sure we got the right sentence if (pbRoot.SurfaceText != SurfaceText) { throw new Exception("Failed to convert root to Charniak-parsed version"); } // Add information to root. Ignore leaf number and argument info for now - we'll set them at the end. treeBankMrgFile = treeBankMrgFile.Substring(treeBankEngine.MrgPath.Length); VerbInfo pbInfo = Information; pbRoot.Information = new VerbInfo(pbInfo.Verb, treeBankMrgFile, pbInfo.SentenceNumber, -1, pbInfo.Tagger, pbInfo.RoleSetId, pbInfo.VForm, pbInfo.VTense, pbInfo.VAspect, pbInfo.VPerson, pbInfo.VVoice, ""); // transfer all argument node lists foreach (PropBankLabeledNodeCollection nodeCollection in LabeledNodeCollections) { // new node collection PropBankLabeledNodeCollection otherNodeCollection = new PropBankLabeledNodeCollection(new PropBankNodeLabel(nodeCollection.Label.Type, nodeCollection.Label.Feature, nodeCollection.Label.Confidence)); // get single nodes foreach (PropBankNode singleNode in nodeCollection.SingleNodes) { if (!singleNode.IsNullElement) { // get argument node from other parse tree PropBankNode otherArgNode = (PropBankNode)pbRoot.GetMinimallySubsumingNode(singleNode.FirstToken, singleNode.LastToken); if (otherArgNode == null) { return(null); } otherNodeCollection.AddSingleNode(otherArgNode); } } // get split arguments foreach (List <TreeBankNode> splitArg in nodeCollection.SplitNodes) { List <TreeBankNode> otherSplitArg = new List <TreeBankNode>(); // get each node in the split argument foreach (PropBankNode splitArgNode in splitArg) { if (!splitArgNode.IsNullElement) { // get split node in other tree PropBankNode otherSplitArgNode = (PropBankNode)pbRoot.GetMinimallySubsumingNode(splitArgNode.FirstToken, splitArgNode.LastToken); if (otherSplitArgNode == null) { return(null); } otherSplitArg.Add(otherSplitArgNode); } } // if only one node of the split arg was non-null, at that node as a single if (otherSplitArg.Count == 1) { otherNodeCollection.AddSingleNode(otherSplitArg.First()); } // otherwise, add the split arg normally else if (otherSplitArg.Count >= 2) { otherNodeCollection.AddSplitNode(otherSplitArg); } } // add coref list if we found non-null nodes if (otherNodeCollection.SingleNodes.Count > 0 || otherNodeCollection.SplitNodes.Count > 0) { pbRoot.LabeledNodeCollections.Add(otherNodeCollection); } } // return null if we didn't find any argument node lists with non-null nodes if (pbRoot.LabeledNodeCollections.Count == 0) { return(null); } // set leaf number and argument information pbRoot.Information.LeafNumber = pbRoot.PredicateNodes.First().Leaves[0].LeafNumber; pbRoot.Information.LabeledNodeLocations = pbRoot.LabeledNodeLocations; return(pbRoot); }