/// <summary> /// Constructor. WARNING: this will accept an instance of any class derived from TreeBankNode (e.g., PropBankNode), but /// will return a PropBankNode with only TreeBankNode members instantiated. /// </summary> /// <param name="treeBankNode">TreeBankNode from which to construct this PropBankNode</param> /// <param name="parent">Parent of this PropBank node</param> protected PropBankNode(TreeBankNode treeBankNode, PropBankNode parent) : base(treeBankNode, parent, new TreeBankNodeConstructor(PropBankChildConstructor)) { _label = null; _information = null; _labeledNodeCollections = new List <PropBankLabeledNodeCollection>(); }
/// <summary> /// Loads a VerbInfo list from a file starting at a specific position /// </summary> /// <param name="file">File to load list from</param> /// <param name="position">Position to start at</param> /// <returns>List of VerbInfo</returns> private List <VerbInfo> LoadVerbInfo(string file, long position) { // read line at specified position StreamReader loadFile = new StreamReader(file); loadFile.BaseStream.Position = position; string infoLine = loadFile.ReadLine(); loadFile.Close(); // parse line...pipe-delimited series of verb entries List <VerbInfo> viList = new List <VerbInfo>(); int pipeLoc = infoLine.IndexOf('|'); while (pipeLoc != infoLine.Length - 1) { // get entry line int nextPipeLoc = infoLine.IndexOf('|', pipeLoc + 1); string entry = infoLine.Substring(pipeLoc + 1, nextPipeLoc - pipeLoc - 1); // parse verb info object VerbInfo vi = new VerbInfo(entry); // attach frame if we can find one for the verb Frame frame; if (_verbFrame.TryGetValue(vi.Verb, out frame)) { vi.VerbFrame = frame; } // if we didn't get a frame for the verb, throw exception if we're using a gold-standard tagger (non-gold taggers might identify predicates that propbank doesn't know about) else if (vi.Tagger == "gold") { throw new Exception("Failed to get PropBank frame for verb: " + vi.Verb); } // set full file path vi.File = Path.Combine(MrgPath, vi.File); if (!File.Exists(vi.File)) { throw new Exception("Invalid PropBank file: " + vi.File); } viList.Add(vi); // start at next pipe pipeLoc = nextPipeLoc; } return(viList); }
/// <summary> /// Gets a predicate tree for a PropBank propositions entry /// </summary> /// <param name="vi">VerbInfo specifying tree to look up</param> /// <returns>PropBankNode</returns> public PropBankNode GetPropBankTree(VerbInfo vi) { TreeBankNode parse = GetParseTree(vi.File, vi.SentenceNumber); PropBankNode predTree = new PropBankNode(parse); predTree.Information = vi; // label information is space-delimited string[] labels = vi.LabeledNodeLocations.Split(' '); foreach (string label in labels) { // label columns are dash-delimited string[] labelCols = label.Split('-'); // get label type PropBankNodeLabel.NodeType labelType = PropBankNodeLabel.GetNodeType(labelCols[1]); // get label feature if any PropBankNodeLabel.NodeFeature labelFeature = PropBankNodeLabel.NodeFeature.None; if (labelCols.Length > 2) { // sometimes the feature is the actual preposition, so this might fail string featureStr = labelCols[2]; if (!PropBankNodeLabel.TryGetNodeFeature(featureStr, out labelFeature)) { // use PRP as the feature, which we have added for this case featureStr = "PRP"; labelFeature = PropBankNodeLabel.GetNodeFeature(featureStr); } if (labelCols.Length > 3) { throw new Exception("Missed feature"); } } // create new labeled node collection PropBankLabeledNodeCollection labeledNodes = new PropBankLabeledNodeCollection(new PropBankNodeLabel(labelType, labelFeature, 1)); AddNodesToCollection(predTree, labelCols[0], labeledNodes); // add to root's list of nodes predTree.LabeledNodeCollections.Add(labeledNodes); } // make sure one of the predicate leaves has the leaf number from the propositions file entry bool foundMatch = false; foreach (PropBankNode predicateNode in predTree.PredicateNodes) { foreach (PropBankNode leaf in predicateNode.Leaves) { if (leaf.LeafNumber == vi.LeafNumber) { foundMatch = true; break; } } if (foundMatch) { break; } } if (!foundMatch) { throw new Exception("Mismatch between VerbInfo predicate leaf number and actual predicate leaf number"); } return(predTree); }
/// <summary> /// Loads the propositions file /// </summary> /// <param name="propsPath">Path to prop.txt file</param> private void LoadProps(string propsPath) { if (!File.Exists(propsPath)) { throw new Exception("Invalid PropBank propositions file: \"" + propsPath + "\""); } // reuse existing index files if present if (File.Exists(VerbInfoPath) && File.Exists(VerbInfoFilePositionPath) && File.Exists(MrgSentenceInfoPath) && File.Exists(MrgSentenceInfoFilePositionsPath)) { // load verb info positions _verbInfoFilePosition = new Dictionary <string, long>(); StreamReader positionsFile = new StreamReader(VerbInfoFilePositionPath); string line; while ((line = positionsFile.ReadLine()) != null) { // format: position verb int spaceLoc = line.IndexOf(' '); _verbInfoFilePosition.Add(line.Substring(spaceLoc + 1), long.Parse(line.Substring(0, spaceLoc))); } positionsFile.Close(); // load mrg-sentence verb info positions _mrgSentInfoFilePosition = new Dictionary <string, Dictionary <int, long> >(); positionsFile = new StreamReader(MrgSentenceInfoFilePositionsPath); while ((line = positionsFile.ReadLine()) != null) { // each line lists a MRG file then a series of pipe-delimited sentence-position pairs...get MRG file first int pipeLoc = line.IndexOf('|'); string mrgFile = line.Substring(0, pipeLoc); _mrgSentInfoFilePosition.Add(mrgFile, new Dictionary <int, long>()); // get sentence-position pairs while (pipeLoc != line.Length - 1) { // find next pipe and space int nextPipe = line.IndexOf('|', pipeLoc + 1); int spaceLoc = line.IndexOf(' ', pipeLoc + 1); // get sentence and position int sent = int.Parse(line.Substring(pipeLoc + 1, spaceLoc - pipeLoc - 1)); long position = long.Parse(line.Substring(spaceLoc + 1, nextPipe - spaceLoc - 1)); // add to index _mrgSentInfoFilePosition[mrgFile].Add(sent, position); pipeLoc = nextPipe; } } positionsFile.Close(); return; } // maps each verb to a list of VerbInfo objects, each of which stores an annotation instance Dictionary <string, List <VerbInfo> > verbInfo = new Dictionary <string, List <VerbInfo> >(); // maps each mrg file and sentence number to a list of VerbInfo objects for that sentence Dictionary <string, Dictionary <int, List <VerbInfo> > > mrgSentInfo = new Dictionary <string, Dictionary <int, List <VerbInfo> > >(); // read propositions file StreamReader propFile = new StreamReader(propsPath); string propLine; while ((propLine = propFile.ReadLine()) != null) { VerbInfo vi = new VerbInfo(propLine); // add to mapping from verbs to their information verbInfo.EnsureContainsKey(vi.Verb, typeof(List <VerbInfo>)); verbInfo[vi.Verb].Add(vi); // add to mapping from file-sentence pairs to their information mrgSentInfo.EnsureContainsKey(vi.File, typeof(Dictionary <int, List <VerbInfo> >)); mrgSentInfo[vi.File].EnsureContainsKey(vi.SentenceNumber, typeof(List <VerbInfo>)); mrgSentInfo[vi.File][vi.SentenceNumber].Add(vi); } propFile.Close(); // write verb index to disk and record file positions of verb info lists _verbInfoFilePosition = new Dictionary <string, long>(); FileStream saveStream = new FileStream(VerbInfoPath, FileMode.Create); foreach (string verb in verbInfo.Keys) { // save position of VerbInfo list _verbInfoFilePosition.Add(verb, saveStream.Position); WriteVerbInfoList(verbInfo[verb], saveStream); } saveStream.Close(); // save file positions StreamWriter verbInfoPositionFile = new StreamWriter(VerbInfoFilePositionPath); foreach (string verb in _verbInfoFilePosition.Keys) { verbInfoPositionFile.WriteLine(_verbInfoFilePosition[verb] + " " + verb); } verbInfoPositionFile.Close(); // save mrg-sentence info _mrgSentInfoFilePosition = new Dictionary <string, Dictionary <int, long> >(); saveStream = new FileStream(MrgSentenceInfoPath, FileMode.Create); foreach (string mrgFile in mrgSentInfo.Keys) { // add each sentence, recording position Dictionary <int, long> sentInfoPosition = new Dictionary <int, long>(); foreach (int sentNum in mrgSentInfo[mrgFile].Keys) { // add index of sentence to file position sentInfoPosition.Add(sentNum, saveStream.Position); // write VerbInfo list for sentence WriteVerbInfoList(mrgSentInfo[mrgFile][sentNum], saveStream); } _mrgSentInfoFilePosition.Add(mrgFile, sentInfoPosition); } saveStream.Close(); // save file positions for MRG file index StreamWriter mrgSentInfoPositionsFile = new StreamWriter(MrgSentenceInfoFilePositionsPath); foreach (string mrgFile in _mrgSentInfoFilePosition.Keys) { mrgSentInfoPositionsFile.Write(mrgFile); foreach (int sent in _mrgSentInfoFilePosition[mrgFile].Keys) { mrgSentInfoPositionsFile.Write("|" + sent + " " + _mrgSentInfoFilePosition[mrgFile][sent]); } mrgSentInfoPositionsFile.WriteLine("|"); } mrgSentInfoPositionsFile.Close(); }
/// <summary> /// Marks argument nodes from the current node in the corresponding parse from a different TreeBank. This is used when /// transferring PropBank annotations to parse trees other than those distributed in the TreeBank (e.g., those produced /// by an automatic syntactic parser). /// </summary> /// <param name="treeBankEngine">Initialized TreeBank engine from which to pull the parse tree to mark PropBank arguments within</param> /// <returns>PropBank node, or null if all arguments couldn't be minimally transferred to the other parse tree. An argument /// is minimally transferred if the corresponding node in the other parse tree subsumes precisely the same text as the node in the /// current parse tree. Sometimes this is not possible due to parse errors.</returns> public PropBankNode MarkArgumentNodesIn(TreeBankEngine treeBankEngine) { if (!IsRoot) { throw new Exception("Attempted to transform non-root node"); } // get mrg file in other tree bank string treeBankMrgFile = treeBankEngine.GetFullMrgPath(MrgFile.Substring(MrgFile.LastIndexOf(Path.DirectorySeparatorChar) + 1)); // need a PropBank root to mark arguments within PropBankNode pbRoot = new PropBankNode(treeBankEngine.GetParseTree(treeBankMrgFile, SentenceNumber)); // make sure we got the right sentence if (pbRoot.SurfaceText != SurfaceText) { throw new Exception("Failed to convert root to Charniak-parsed version"); } // Add information to root. Ignore leaf number and argument info for now - we'll set them at the end. treeBankMrgFile = treeBankMrgFile.Substring(treeBankEngine.MrgPath.Length); VerbInfo pbInfo = Information; pbRoot.Information = new VerbInfo(pbInfo.Verb, treeBankMrgFile, pbInfo.SentenceNumber, -1, pbInfo.Tagger, pbInfo.RoleSetId, pbInfo.VForm, pbInfo.VTense, pbInfo.VAspect, pbInfo.VPerson, pbInfo.VVoice, ""); // transfer all argument node lists foreach (PropBankLabeledNodeCollection nodeCollection in LabeledNodeCollections) { // new node collection PropBankLabeledNodeCollection otherNodeCollection = new PropBankLabeledNodeCollection(new PropBankNodeLabel(nodeCollection.Label.Type, nodeCollection.Label.Feature, nodeCollection.Label.Confidence)); // get single nodes foreach (PropBankNode singleNode in nodeCollection.SingleNodes) { if (!singleNode.IsNullElement) { // get argument node from other parse tree PropBankNode otherArgNode = (PropBankNode)pbRoot.GetMinimallySubsumingNode(singleNode.FirstToken, singleNode.LastToken); if (otherArgNode == null) { return(null); } otherNodeCollection.AddSingleNode(otherArgNode); } } // get split arguments foreach (List <TreeBankNode> splitArg in nodeCollection.SplitNodes) { List <TreeBankNode> otherSplitArg = new List <TreeBankNode>(); // get each node in the split argument foreach (PropBankNode splitArgNode in splitArg) { if (!splitArgNode.IsNullElement) { // get split node in other tree PropBankNode otherSplitArgNode = (PropBankNode)pbRoot.GetMinimallySubsumingNode(splitArgNode.FirstToken, splitArgNode.LastToken); if (otherSplitArgNode == null) { return(null); } otherSplitArg.Add(otherSplitArgNode); } } // if only one node of the split arg was non-null, at that node as a single if (otherSplitArg.Count == 1) { otherNodeCollection.AddSingleNode(otherSplitArg.First()); } // otherwise, add the split arg normally else if (otherSplitArg.Count >= 2) { otherNodeCollection.AddSplitNode(otherSplitArg); } } // add coref list if we found non-null nodes if (otherNodeCollection.SingleNodes.Count > 0 || otherNodeCollection.SplitNodes.Count > 0) { pbRoot.LabeledNodeCollections.Add(otherNodeCollection); } } // return null if we didn't find any argument node lists with non-null nodes if (pbRoot.LabeledNodeCollections.Count == 0) { return(null); } // set leaf number and argument information pbRoot.Information.LeafNumber = pbRoot.PredicateNodes.First().Leaves[0].LeafNumber; pbRoot.Information.LabeledNodeLocations = pbRoot.LabeledNodeLocations; return(pbRoot); }