Exemple #1
0
 /// <summary>
 /// Constructor. WARNING:  this will accept an instance of any class derived from TreeBankNode (e.g., PropBankNode), but
 /// will return a PropBankNode with only TreeBankNode members instantiated.
 /// </summary>
 /// <param name="treeBankNode">TreeBankNode from which to construct this PropBankNode</param>
 /// <param name="parent">Parent of this PropBank node</param>
 protected PropBankNode(TreeBankNode treeBankNode, PropBankNode parent)
     : base(treeBankNode, parent, new TreeBankNodeConstructor(PropBankChildConstructor))
 {
     _label                  = null;
     _information            = null;
     _labeledNodeCollections = new List <PropBankLabeledNodeCollection>();
 }
        /// <summary>
        /// Loads a VerbInfo list from a file starting at a specific position
        /// </summary>
        /// <param name="file">File to load list from</param>
        /// <param name="position">Position to start at</param>
        /// <returns>List of VerbInfo</returns>
        private List <VerbInfo> LoadVerbInfo(string file, long position)
        {
            // read line at specified position
            StreamReader loadFile = new StreamReader(file);

            loadFile.BaseStream.Position = position;
            string infoLine = loadFile.ReadLine();

            loadFile.Close();

            // parse line...pipe-delimited series of verb entries
            List <VerbInfo> viList  = new List <VerbInfo>();
            int             pipeLoc = infoLine.IndexOf('|');

            while (pipeLoc != infoLine.Length - 1)
            {
                // get entry line
                int    nextPipeLoc = infoLine.IndexOf('|', pipeLoc + 1);
                string entry       = infoLine.Substring(pipeLoc + 1, nextPipeLoc - pipeLoc - 1);

                // parse verb info object
                VerbInfo vi = new VerbInfo(entry);

                // attach frame if we can find one for the verb
                Frame frame;
                if (_verbFrame.TryGetValue(vi.Verb, out frame))
                {
                    vi.VerbFrame = frame;
                }
                // if we didn't get a frame for the verb, throw exception if we're using a gold-standard tagger (non-gold taggers might identify predicates that propbank doesn't know about)
                else if (vi.Tagger == "gold")
                {
                    throw new Exception("Failed to get PropBank frame for verb:  " + vi.Verb);
                }

                // set full file path
                vi.File = Path.Combine(MrgPath, vi.File);
                if (!File.Exists(vi.File))
                {
                    throw new Exception("Invalid PropBank file:  " + vi.File);
                }

                viList.Add(vi);

                // start at next pipe
                pipeLoc = nextPipeLoc;
            }

            return(viList);
        }
        /// <summary>
        /// Gets a predicate tree for a PropBank propositions entry
        /// </summary>
        /// <param name="vi">VerbInfo specifying tree to look up</param>
        /// <returns>PropBankNode</returns>
        public PropBankNode GetPropBankTree(VerbInfo vi)
        {
            TreeBankNode parse    = GetParseTree(vi.File, vi.SentenceNumber);
            PropBankNode predTree = new PropBankNode(parse);

            predTree.Information = vi;

            // label information is space-delimited
            string[] labels = vi.LabeledNodeLocations.Split(' ');
            foreach (string label in labels)
            {
                // label columns are dash-delimited
                string[] labelCols = label.Split('-');

                // get label type
                PropBankNodeLabel.NodeType labelType = PropBankNodeLabel.GetNodeType(labelCols[1]);

                // get label feature if any
                PropBankNodeLabel.NodeFeature labelFeature = PropBankNodeLabel.NodeFeature.None;
                if (labelCols.Length > 2)
                {
                    // sometimes the feature is the actual preposition, so this might fail
                    string featureStr = labelCols[2];
                    if (!PropBankNodeLabel.TryGetNodeFeature(featureStr, out labelFeature))
                    {
                        // use PRP as the feature, which we have added for this case
                        featureStr   = "PRP";
                        labelFeature = PropBankNodeLabel.GetNodeFeature(featureStr);
                    }

                    if (labelCols.Length > 3)
                    {
                        throw new Exception("Missed feature");
                    }
                }

                // create new labeled node collection
                PropBankLabeledNodeCollection labeledNodes = new PropBankLabeledNodeCollection(new PropBankNodeLabel(labelType, labelFeature, 1));
                AddNodesToCollection(predTree, labelCols[0], labeledNodes);

                // add to root's list of nodes
                predTree.LabeledNodeCollections.Add(labeledNodes);
            }

            // make sure one of the predicate leaves has the leaf number from the propositions file entry
            bool foundMatch = false;

            foreach (PropBankNode predicateNode in predTree.PredicateNodes)
            {
                foreach (PropBankNode leaf in predicateNode.Leaves)
                {
                    if (leaf.LeafNumber == vi.LeafNumber)
                    {
                        foundMatch = true;
                        break;
                    }
                }

                if (foundMatch)
                {
                    break;
                }
            }

            if (!foundMatch)
            {
                throw new Exception("Mismatch between VerbInfo predicate leaf number and actual predicate leaf number");
            }

            return(predTree);
        }
        /// <summary>
        /// Loads the propositions file
        /// </summary>
        /// <param name="propsPath">Path to prop.txt file</param>
        private void LoadProps(string propsPath)
        {
            if (!File.Exists(propsPath))
            {
                throw new Exception("Invalid PropBank propositions file:  \"" + propsPath + "\"");
            }

            // reuse existing index files if present
            if (File.Exists(VerbInfoPath) &&
                File.Exists(VerbInfoFilePositionPath) &&
                File.Exists(MrgSentenceInfoPath) &&
                File.Exists(MrgSentenceInfoFilePositionsPath))
            {
                // load verb info positions
                _verbInfoFilePosition = new Dictionary <string, long>();
                StreamReader positionsFile = new StreamReader(VerbInfoFilePositionPath);
                string       line;
                while ((line = positionsFile.ReadLine()) != null)
                {
                    // format:  position verb
                    int spaceLoc = line.IndexOf(' ');
                    _verbInfoFilePosition.Add(line.Substring(spaceLoc + 1), long.Parse(line.Substring(0, spaceLoc)));
                }
                positionsFile.Close();

                // load mrg-sentence verb info positions
                _mrgSentInfoFilePosition = new Dictionary <string, Dictionary <int, long> >();
                positionsFile            = new StreamReader(MrgSentenceInfoFilePositionsPath);
                while ((line = positionsFile.ReadLine()) != null)
                {
                    // each line lists a MRG file then a series of pipe-delimited sentence-position pairs...get MRG file first
                    int    pipeLoc = line.IndexOf('|');
                    string mrgFile = line.Substring(0, pipeLoc);
                    _mrgSentInfoFilePosition.Add(mrgFile, new Dictionary <int, long>());

                    // get sentence-position pairs
                    while (pipeLoc != line.Length - 1)
                    {
                        // find next pipe and space
                        int nextPipe = line.IndexOf('|', pipeLoc + 1);
                        int spaceLoc = line.IndexOf(' ', pipeLoc + 1);

                        // get sentence and position
                        int  sent     = int.Parse(line.Substring(pipeLoc + 1, spaceLoc - pipeLoc - 1));
                        long position = long.Parse(line.Substring(spaceLoc + 1, nextPipe - spaceLoc - 1));

                        // add to index
                        _mrgSentInfoFilePosition[mrgFile].Add(sent, position);

                        pipeLoc = nextPipe;
                    }
                }
                positionsFile.Close();

                return;
            }

            // maps each verb to a list of VerbInfo objects, each of which stores an annotation instance
            Dictionary <string, List <VerbInfo> > verbInfo = new Dictionary <string, List <VerbInfo> >();

            // maps each mrg file and sentence number to a list of VerbInfo objects for that sentence
            Dictionary <string, Dictionary <int, List <VerbInfo> > > mrgSentInfo = new Dictionary <string, Dictionary <int, List <VerbInfo> > >();

            // read propositions file
            StreamReader propFile = new StreamReader(propsPath);
            string       propLine;

            while ((propLine = propFile.ReadLine()) != null)
            {
                VerbInfo vi = new VerbInfo(propLine);

                // add to mapping from verbs to their information
                verbInfo.EnsureContainsKey(vi.Verb, typeof(List <VerbInfo>));
                verbInfo[vi.Verb].Add(vi);

                // add to mapping from file-sentence pairs to their information
                mrgSentInfo.EnsureContainsKey(vi.File, typeof(Dictionary <int, List <VerbInfo> >));
                mrgSentInfo[vi.File].EnsureContainsKey(vi.SentenceNumber, typeof(List <VerbInfo>));
                mrgSentInfo[vi.File][vi.SentenceNumber].Add(vi);
            }

            propFile.Close();

            // write verb index to disk and record file positions of verb info lists
            _verbInfoFilePosition = new Dictionary <string, long>();
            FileStream saveStream = new FileStream(VerbInfoPath, FileMode.Create);

            foreach (string verb in verbInfo.Keys)
            {
                // save position of VerbInfo list
                _verbInfoFilePosition.Add(verb, saveStream.Position);

                WriteVerbInfoList(verbInfo[verb], saveStream);
            }
            saveStream.Close();

            // save file positions
            StreamWriter verbInfoPositionFile = new StreamWriter(VerbInfoFilePositionPath);

            foreach (string verb in _verbInfoFilePosition.Keys)
            {
                verbInfoPositionFile.WriteLine(_verbInfoFilePosition[verb] + " " + verb);
            }
            verbInfoPositionFile.Close();

            // save mrg-sentence info
            _mrgSentInfoFilePosition = new Dictionary <string, Dictionary <int, long> >();
            saveStream = new FileStream(MrgSentenceInfoPath, FileMode.Create);
            foreach (string mrgFile in mrgSentInfo.Keys)
            {
                // add each sentence, recording position
                Dictionary <int, long> sentInfoPosition = new Dictionary <int, long>();
                foreach (int sentNum in mrgSentInfo[mrgFile].Keys)
                {
                    // add index of sentence to file position
                    sentInfoPosition.Add(sentNum, saveStream.Position);

                    // write VerbInfo list for sentence
                    WriteVerbInfoList(mrgSentInfo[mrgFile][sentNum], saveStream);
                }

                _mrgSentInfoFilePosition.Add(mrgFile, sentInfoPosition);
            }
            saveStream.Close();

            // save file positions for MRG file index
            StreamWriter mrgSentInfoPositionsFile = new StreamWriter(MrgSentenceInfoFilePositionsPath);

            foreach (string mrgFile in _mrgSentInfoFilePosition.Keys)
            {
                mrgSentInfoPositionsFile.Write(mrgFile);
                foreach (int sent in _mrgSentInfoFilePosition[mrgFile].Keys)
                {
                    mrgSentInfoPositionsFile.Write("|" + sent + " " + _mrgSentInfoFilePosition[mrgFile][sent]);
                }

                mrgSentInfoPositionsFile.WriteLine("|");
            }
            mrgSentInfoPositionsFile.Close();
        }
Exemple #5
0
        /// <summary>
        /// Marks argument nodes from the current node in the corresponding parse from a different TreeBank. This is used when
        /// transferring PropBank annotations to parse trees other than those distributed in the TreeBank (e.g., those produced
        /// by an automatic syntactic parser).
        /// </summary>
        /// <param name="treeBankEngine">Initialized TreeBank engine from which to pull the parse tree to mark PropBank arguments within</param>
        /// <returns>PropBank node, or null if all arguments couldn't be minimally transferred to the other parse tree. An argument
        /// is minimally transferred if the corresponding node in the other parse tree subsumes precisely the same text as the node in the
        /// current parse tree. Sometimes this is not possible due to parse errors.</returns>
        public PropBankNode MarkArgumentNodesIn(TreeBankEngine treeBankEngine)
        {
            if (!IsRoot)
            {
                throw new Exception("Attempted to transform non-root node");
            }

            // get mrg file in other tree bank
            string treeBankMrgFile = treeBankEngine.GetFullMrgPath(MrgFile.Substring(MrgFile.LastIndexOf(Path.DirectorySeparatorChar) + 1));

            // need a PropBank root to mark arguments within
            PropBankNode pbRoot = new PropBankNode(treeBankEngine.GetParseTree(treeBankMrgFile, SentenceNumber));

            // make sure we got the right sentence
            if (pbRoot.SurfaceText != SurfaceText)
            {
                throw new Exception("Failed to convert root to Charniak-parsed version");
            }

            // Add information to root. Ignore leaf number and argument info for now - we'll set them at the end.
            treeBankMrgFile = treeBankMrgFile.Substring(treeBankEngine.MrgPath.Length);
            VerbInfo pbInfo = Information;

            pbRoot.Information = new VerbInfo(pbInfo.Verb, treeBankMrgFile, pbInfo.SentenceNumber,
                                              -1, pbInfo.Tagger, pbInfo.RoleSetId,
                                              pbInfo.VForm, pbInfo.VTense, pbInfo.VAspect,
                                              pbInfo.VPerson, pbInfo.VVoice, "");

            // transfer all argument node lists
            foreach (PropBankLabeledNodeCollection nodeCollection in LabeledNodeCollections)
            {
                // new node collection
                PropBankLabeledNodeCollection otherNodeCollection = new PropBankLabeledNodeCollection(new PropBankNodeLabel(nodeCollection.Label.Type, nodeCollection.Label.Feature, nodeCollection.Label.Confidence));

                // get single nodes
                foreach (PropBankNode singleNode in nodeCollection.SingleNodes)
                {
                    if (!singleNode.IsNullElement)
                    {
                        // get argument node from other parse tree
                        PropBankNode otherArgNode = (PropBankNode)pbRoot.GetMinimallySubsumingNode(singleNode.FirstToken, singleNode.LastToken);
                        if (otherArgNode == null)
                        {
                            return(null);
                        }

                        otherNodeCollection.AddSingleNode(otherArgNode);
                    }
                }

                // get split arguments
                foreach (List <TreeBankNode> splitArg in nodeCollection.SplitNodes)
                {
                    List <TreeBankNode> otherSplitArg = new List <TreeBankNode>();

                    // get each node in the split argument
                    foreach (PropBankNode splitArgNode in splitArg)
                    {
                        if (!splitArgNode.IsNullElement)
                        {
                            // get split node in other tree
                            PropBankNode otherSplitArgNode = (PropBankNode)pbRoot.GetMinimallySubsumingNode(splitArgNode.FirstToken, splitArgNode.LastToken);
                            if (otherSplitArgNode == null)
                            {
                                return(null);
                            }

                            otherSplitArg.Add(otherSplitArgNode);
                        }
                    }

                    // if only one node of the split arg was non-null, at that node as a single
                    if (otherSplitArg.Count == 1)
                    {
                        otherNodeCollection.AddSingleNode(otherSplitArg.First());
                    }
                    // otherwise, add the split arg normally
                    else if (otherSplitArg.Count >= 2)
                    {
                        otherNodeCollection.AddSplitNode(otherSplitArg);
                    }
                }

                // add coref list if we found non-null nodes
                if (otherNodeCollection.SingleNodes.Count > 0 || otherNodeCollection.SplitNodes.Count > 0)
                {
                    pbRoot.LabeledNodeCollections.Add(otherNodeCollection);
                }
            }

            // return null if we didn't find any argument node lists with non-null nodes
            if (pbRoot.LabeledNodeCollections.Count == 0)
            {
                return(null);
            }

            // set leaf number and argument information
            pbRoot.Information.LeafNumber           = pbRoot.PredicateNodes.First().Leaves[0].LeafNumber;
            pbRoot.Information.LabeledNodeLocations = pbRoot.LabeledNodeLocations;

            return(pbRoot);
        }