/// <summary>
        /// Gets the next training node
        /// </summary>
        /// <returns>Training node</returns>
        public override ClassifiableEntity GetNextInstance()
        {
            // try to move to next node in current sentence
            while (!_nodeEnum.MoveNext())
            {
                // try to move to next sentence in current MRG file
                while (!_sentEnum.MoveNext())
                {
                    // try to move to next MRG file...if there are none, we're done
                    if (!MoveToNextValidMrgFile())
                    {
                        return(null);
                    }

                    // start at first sentence of next MRG file
                    _sentEnum = TreeBankEngine.GetSentenceNumbers(CurrentMrgFile).GetEnumerator();
                }

                // filter all nodes in the tree, keeping the good ones
                TreeBankNode        root          = GetTrainingInstanceParseTree(CurrentMrgFile, _sentEnum.Current);
                List <TreeBankNode> filteredNodes = new List <TreeBankNode>();
                foreach (TreeBankNode n in root.AllNodes)
                {
                    if (Filter(n))
                    {
                        filteredNodes.Add(n);
                    }
                }

                _nodeEnum = filteredNodes.GetEnumerator();
            }

            return(_nodeEnum.Current);
        }
        /// <summary>
        /// Runs the current search for the first match of the current rule among the children of a node
        /// </summary>
        /// <param name="node">Node whose children should be searched</param>
        /// <param name="excludeNodes">Nodes to exclude from the search</param>
        /// <returns>First child that matches the current rule</returns>
        public TreeBankNode Run(TreeBankNode node, Set <TreeBankNode> excludeNodes)
        {
            // get list of children to search, reversing the order if we're searching from right to left
            List <TreeBankNode> children = new List <TreeBankNode>();

            for (int i = 0; i < node.ChildCount; ++i)
            {
                children.Add(node.GetChild(i));
            }

            if (_direction == SearchDirection.RightToLeft)
            {
                children.Reverse();
            }

            // search for each category in the search list
            foreach (string cat in _searchList)
            {
                TreeBankEngine.SyntacticCategory searchCat = TreeBankEngine.GetSyntacticCategory(cat);
                foreach (TreeBankNode child in children)
                {
                    if (child.Category == searchCat)
                    {
                        if (excludeNodes == null || !excludeNodes.Contains(child))
                        {
                            return(child);
                        }
                    }
                }
            }

            return(null);
        }
        private void resetMachineSpecificPathsMenuItem_Click(object sender, EventArgs e)
        {
            Settings.Default.DotPath           = "";
            Settings.Default.TreeBankPath      = "";
            Settings.Default.TreeBankIndexPath = "";
            Settings.Default.Save();

            _dot                 = null;
            _treeBankEngine      = null;
            mrgFile.Enabled      = sentence.Enabled = viewTree.Enabled = false;
            loadTreeBank.Enabled = true;
        }
 private void createGraphFromDef_Click(object sender, EventArgs e)
 {
     try
     {
         if (treeDef.Text != "")
         {
             CreateGraph(TreeBankEngine.ExtractNode(treeDef.Text, true));
         }
     }
     catch (Exception ex)
     {
         MessageBox.Show("Error:  " + ex);
     }
 }
Beispiel #5
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="treeBankEngine">TreeBank engine to draw training nodes from</param>
        /// <param name="instanceFilter">Instance filter to apply to training instances</param>
        /// <param name="sections">TreeBank sections to draw instances from (null for all sections)</param>
        public TreeBankInstanceProvider(TreeBankEngine treeBankEngine, InstanceFilterDelegate instanceFilter, Set <int> sections)
            : base(instanceFilter)
        {
            _treeBankEngine = treeBankEngine;
            if (_treeBankEngine == null)
            {
                throw new Exception("TreeBank engine cannot be null");
            }

            _sections = sections;
            if (_sections != null && _sections.Count == 0)
            {
                throw new Exception("It makes no sense to restrict TreeBank training sections to the empty set");
            }
        }
Beispiel #6
0
        private void loadTbBtn_Click(object sender, EventArgs e)
        {
            string root = Directory.GetDirectoryRoot(".");

            _treeBankEngine = new TreeBankEngine(root + @"NLP\Resources\PennTreeBank_3\PARSED\MRG\WSJ",
                                                 root + @"NLP\Resources\Indexes\treebank_index");

            // populate MRG file combo box
            foreach (string mrgPath in _treeBankEngine.IndexedMrgFiles)
            {
                mrgFileCombo.Items.Add(Path.GetFileName(mrgPath) + " (" + _treeBankEngine.GetSentenceNumbers(mrgPath).Count + " sentences)");
            }

            treeBankGroupBox.Enabled = true;
            loadTbBtn.Enabled        = false;
        }
        private void loadTreeBank_Click(object sender, EventArgs e)
        {
            try
            {
                if (Directory.Exists(Settings.Default.TreeBankPath) &&
                    Directory.Exists(Settings.Default.TreeBankIndexPath))
                {
                    _treeBankEngine = new TreeBankEngine(Settings.Default.TreeBankPath,
                                                         Settings.Default.TreeBankIndexPath);
                }
                else
                {
                    folderBrowserDialog.Description = "Select TreeBank MRG directory path";
                    if (folderBrowserDialog.ShowDialog() == DialogResult.OK &&
                        Directory.Exists(folderBrowserDialog.SelectedPath))
                    {
                        string treeBankPath = folderBrowserDialog.SelectedPath;

                        folderBrowserDialog.Description = "Select TreeBank index directory";
                        if (folderBrowserDialog.ShowDialog() == DialogResult.OK)
                        {
                            string treeBankIndexPath = folderBrowserDialog.SelectedPath;
                            _treeBankEngine = new TreeBankEngine(treeBankPath, treeBankIndexPath);

                            Settings.Default.TreeBankPath      = treeBankPath;
                            Settings.Default.TreeBankIndexPath = treeBankIndexPath;
                            Settings.Default.Save();
                        }
                    }
                }

                mrgFile.Items.Clear();
                foreach (string mrgFilePath in _treeBankEngine.IndexedMrgFiles)
                {
                    mrgFile.Items.Add(Path.GetFileNameWithoutExtension(mrgFilePath));
                }

                loadTreeBank.Enabled = false;
                mrgFile.Enabled      = sentence.Enabled = viewTree.Enabled = true;
            }
            catch (Exception ex)
            {
                MessageBox.Show("Failed to load TreeBank index:  " + ex);
            }
        }
Beispiel #8
0
        /// <summary>
        /// Moves a MRG file enumerator to the next valid MRG file based on the training section restriction (if any)
        /// </summary>
        /// <returns>True if valid MRG file was found, false otherwise</returns>
        protected bool MoveToNextValidMrgFile()
        {
            // try moving to next file
            if (!_fileEnum.MoveNext())
            {
                return(false);
            }

            // impose section restriction if there is one
            if (_sections != null)
            {
                while (!_sections.Contains(TreeBankEngine.GetSectionNumber(_fileEnum.Current)))
                {
                    if (!_fileEnum.MoveNext())
                    {
                        return(false);
                    }
                }
            }

            return(true);
        }
        /// <summary>
        /// Moves a NounInfo enumerator to the next valid entry
        /// </summary>
        /// <param name="nounInfoEnum">NounInfo enumerator to move</param>
        /// <returns>True if valid NounInfo was found, false otherwise</returns>
        private bool MoveToNextValidNounInfo(ref List <NounInfo> .Enumerator nounInfoEnum)
        {
            // move to next NounInfo for the current noun...if we're out, quit looking
            if (!nounInfoEnum.MoveNext())
            {
                return(false);
            }

            // move to the next NounInfo that satisfies the TreeBank section constraint
            if (Sections != null)
            {
                while (!Sections.Contains(TreeBankEngine.GetSectionNumber(nounInfoEnum.Current.File)))
                {
                    // if we're out of NounInfo, return
                    if (!nounInfoEnum.MoveNext())
                    {
                        return(false);
                    }
                }
            }

            return(true);
        }
 /// <summary>
 /// Gets parse tree containing training instance nodes
 /// </summary>
 /// <param name="mrgFile">MRG file containing tree</param>
 /// <param name="sentenceNumber">Sentence number of tree</param>
 /// <returns>Training parse tree</returns>
 protected virtual TreeBankNode GetTrainingInstanceParseTree(string mrgFile, int sentenceNumber)
 {
     return(TreeBankEngine.GetParseTree(mrgFile, sentenceNumber));
 }
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="treeBankEngine">TreeBank engine to draw parse tree nodes from</param>
 /// <param name="instanceFilter">Instance filter to apply to nodes</param>
 /// <param name="sections">Sections to draw nodes from</param>
 public TreeBankParseTreeInstanceProvider(TreeBankEngine treeBankEngine, InstanceFilterDelegate instanceFilter, Set <int> sections)
     : base(treeBankEngine, instanceFilter, sections)
 {
 }
Beispiel #12
0
        /// <summary>
        /// Marks argument nodes from the current node in the corresponding parse from a different TreeBank. This is used when
        /// transferring PropBank annotations to parse trees other than those distributed in the TreeBank (e.g., those produced
        /// by an automatic syntactic parser).
        /// </summary>
        /// <param name="treeBankEngine">Initialized TreeBank engine from which to pull the parse tree to mark PropBank arguments within</param>
        /// <returns>PropBank node, or null if all arguments couldn't be minimally transferred to the other parse tree. An argument
        /// is minimally transferred if the corresponding node in the other parse tree subsumes precisely the same text as the node in the
        /// current parse tree. Sometimes this is not possible due to parse errors.</returns>
        public PropBankNode MarkArgumentNodesIn(TreeBankEngine treeBankEngine)
        {
            if (!IsRoot)
            {
                throw new Exception("Attempted to transform non-root node");
            }

            // get mrg file in other tree bank
            string treeBankMrgFile = treeBankEngine.GetFullMrgPath(MrgFile.Substring(MrgFile.LastIndexOf(Path.DirectorySeparatorChar) + 1));

            // need a PropBank root to mark arguments within
            PropBankNode pbRoot = new PropBankNode(treeBankEngine.GetParseTree(treeBankMrgFile, SentenceNumber));

            // make sure we got the right sentence
            if (pbRoot.SurfaceText != SurfaceText)
            {
                throw new Exception("Failed to convert root to Charniak-parsed version");
            }

            // Add information to root. Ignore leaf number and argument info for now - we'll set them at the end.
            treeBankMrgFile = treeBankMrgFile.Substring(treeBankEngine.MrgPath.Length);
            VerbInfo pbInfo = Information;

            pbRoot.Information = new VerbInfo(pbInfo.Verb, treeBankMrgFile, pbInfo.SentenceNumber,
                                              -1, pbInfo.Tagger, pbInfo.RoleSetId,
                                              pbInfo.VForm, pbInfo.VTense, pbInfo.VAspect,
                                              pbInfo.VPerson, pbInfo.VVoice, "");

            // transfer all argument node lists
            foreach (PropBankLabeledNodeCollection nodeCollection in LabeledNodeCollections)
            {
                // new node collection
                PropBankLabeledNodeCollection otherNodeCollection = new PropBankLabeledNodeCollection(new PropBankNodeLabel(nodeCollection.Label.Type, nodeCollection.Label.Feature, nodeCollection.Label.Confidence));

                // get single nodes
                foreach (PropBankNode singleNode in nodeCollection.SingleNodes)
                {
                    if (!singleNode.IsNullElement)
                    {
                        // get argument node from other parse tree
                        PropBankNode otherArgNode = (PropBankNode)pbRoot.GetMinimallySubsumingNode(singleNode.FirstToken, singleNode.LastToken);
                        if (otherArgNode == null)
                        {
                            return(null);
                        }

                        otherNodeCollection.AddSingleNode(otherArgNode);
                    }
                }

                // get split arguments
                foreach (List <TreeBankNode> splitArg in nodeCollection.SplitNodes)
                {
                    List <TreeBankNode> otherSplitArg = new List <TreeBankNode>();

                    // get each node in the split argument
                    foreach (PropBankNode splitArgNode in splitArg)
                    {
                        if (!splitArgNode.IsNullElement)
                        {
                            // get split node in other tree
                            PropBankNode otherSplitArgNode = (PropBankNode)pbRoot.GetMinimallySubsumingNode(splitArgNode.FirstToken, splitArgNode.LastToken);
                            if (otherSplitArgNode == null)
                            {
                                return(null);
                            }

                            otherSplitArg.Add(otherSplitArgNode);
                        }
                    }

                    // if only one node of the split arg was non-null, at that node as a single
                    if (otherSplitArg.Count == 1)
                    {
                        otherNodeCollection.AddSingleNode(otherSplitArg.First());
                    }
                    // otherwise, add the split arg normally
                    else if (otherSplitArg.Count >= 2)
                    {
                        otherNodeCollection.AddSplitNode(otherSplitArg);
                    }
                }

                // add coref list if we found non-null nodes
                if (otherNodeCollection.SingleNodes.Count > 0 || otherNodeCollection.SplitNodes.Count > 0)
                {
                    pbRoot.LabeledNodeCollections.Add(otherNodeCollection);
                }
            }

            // return null if we didn't find any argument node lists with non-null nodes
            if (pbRoot.LabeledNodeCollections.Count == 0)
            {
                return(null);
            }

            // set leaf number and argument information
            pbRoot.Information.LeafNumber           = pbRoot.PredicateNodes.First().Leaves[0].LeafNumber;
            pbRoot.Information.LabeledNodeLocations = pbRoot.LabeledNodeLocations;

            return(pbRoot);
        }
Beispiel #13
0
        /// <summary>
        /// Marks argument nodes from the current node in the corresponding parse from a different TreeBank. This is used when
        /// transferring NomBank annotations to parse trees other than those distributed in the TreeBank (e.g., those produced
        /// by an automatic syntactic parser).
        /// </summary>
        /// <param name="treeBankEngine">Initialized TreeBank engine from which to pull the parse tree to mark NomBank arguments within</param>
        /// <returns>NomBank node, or null if all arguments couldn't be minimally transferred to the other parse tree. An argument
        /// is minimally transferred if the corresponding node in the other parse tree subsumes precisely the same text as the node
        /// in the current parse tree. Sometimes this is not possible due to parse errors.</returns>
        public NomBankNode MarkArgumentNodesIn(TreeBankEngine treeBankEngine)
        {
            // make sure we're marking arguments using a root node
            if (!IsRoot)
            {
                throw new Exception("Must pass root node");
            }

            // get mrg file in other tree bank
            string treeBankMrgFile = treeBankEngine.GetFullMrgPath(MrgFile);

            // need a NomBank root to mark arguments within
            NomBankNode nbRoot = new NomBankNode(treeBankEngine.GetParseTree(treeBankMrgFile, SentenceNumber));

            // make sure we got the right sentence
            if (nbRoot.SurfaceText != SurfaceText)
            {
                throw new Exception("Failed to get same parse tree");
            }

            // Add information to root. Ignore leaf number and argument information - we'll set them at the end.
            treeBankMrgFile = treeBankMrgFile.Substring(treeBankEngine.MrgPath.Length).Trim(Path.DirectorySeparatorChar);
            NounInfo currInfo = Information;

            nbRoot.Information = new NounInfo(currInfo.Noun, treeBankMrgFile, currInfo.SentenceNumber, -1, currInfo.RoleSetId, "");

            // transfer all argument node lists
            foreach (NomBankLabeledNodeCollection corefList in LabeledNodeCollections)
            {
                // new node list
                NomBankLabeledNodeCollection otherCorefList = new NomBankLabeledNodeCollection(corefList.Label.Copy());

                // get single nodes
                foreach (NomBankNode singleNode in corefList.SingleNodes)
                {
                    if (!singleNode.IsNullElement)
                    {
                        // get argument node from other parse tree
                        NomBankNode otherArgNode = nbRoot.GetMinimallySubsumingNode(singleNode.FirstToken, singleNode.LastToken) as NomBankNode;
                        if (otherArgNode == null)
                        {
                            return(null);
                        }

                        otherCorefList.AddSingleNode(otherArgNode);
                    }
                }

                // get split arguments
                foreach (List <TreeBankNode> splitNode in corefList.SplitNodes)
                {
                    List <TreeBankNode> otherSplitArg = new List <TreeBankNode>();

                    // get each node in the split argument
                    foreach (NomBankNode node in splitNode)
                    {
                        if (!node.IsNullElement)
                        {
                            // get split node in other tree
                            NomBankNode otherSplitArgNode = nbRoot.GetMinimallySubsumingNode(node.FirstToken, node.LastToken) as NomBankNode;
                            if (otherSplitArgNode == null)
                            {
                                return(null);
                            }

                            otherSplitArg.Add(otherSplitArgNode);
                        }
                    }

                    // if only one node of the split arg was non-null, at that node as a single
                    if (otherSplitArg.Count == 1)
                    {
                        otherCorefList.AddSingleNode(otherSplitArg.First());
                    }
                    // otherwise, add the split arg normally
                    else if (otherSplitArg.Count >= 2)
                    {
                        otherCorefList.AddSplitNode(otherSplitArg);
                    }
                }

                // make sure all hyphen indexes were applied
                if (otherCorefList.Label.HyphenIndexes.Count != otherCorefList.AppliedIndexes.Count)
                {
                    throw new Exception("Not all hyphen indexes were applied");
                }

                // add coref list if we found non-null nodes
                if (otherCorefList.SingleNodes.Count > 0 || otherCorefList.SplitNodes.Count > 0)
                {
                    nbRoot.LabeledNodeCollections.Add(otherCorefList);
                }
            }

            // return null if we didn't find any argument node lists with non-null nodes
            if (nbRoot.LabeledNodeCollections.Count == 0)
            {
                return(null);
            }

            // set leaf number and argument locations in the information object
            nbRoot.Information.LeafNumber           = nbRoot.PredicateNode.Leaves[0].LeafNumber;
            nbRoot.Information.LabeledNodeLocations = nbRoot.LabeledNodeLocations;

            return(nbRoot);
        }