/// <summary> /// Gets the next training node /// </summary> /// <returns>Training node</returns> public override ClassifiableEntity GetNextInstance() { // try to move to next node in current sentence while (!_nodeEnum.MoveNext()) { // try to move to next sentence in current MRG file while (!_sentEnum.MoveNext()) { // try to move to next MRG file...if there are none, we're done if (!MoveToNextValidMrgFile()) { return(null); } // start at first sentence of next MRG file _sentEnum = TreeBankEngine.GetSentenceNumbers(CurrentMrgFile).GetEnumerator(); } // filter all nodes in the tree, keeping the good ones TreeBankNode root = GetTrainingInstanceParseTree(CurrentMrgFile, _sentEnum.Current); List <TreeBankNode> filteredNodes = new List <TreeBankNode>(); foreach (TreeBankNode n in root.AllNodes) { if (Filter(n)) { filteredNodes.Add(n); } } _nodeEnum = filteredNodes.GetEnumerator(); } return(_nodeEnum.Current); }
/// <summary> /// Runs the current search for the first match of the current rule among the children of a node /// </summary> /// <param name="node">Node whose children should be searched</param> /// <param name="excludeNodes">Nodes to exclude from the search</param> /// <returns>First child that matches the current rule</returns> public TreeBankNode Run(TreeBankNode node, Set <TreeBankNode> excludeNodes) { // get list of children to search, reversing the order if we're searching from right to left List <TreeBankNode> children = new List <TreeBankNode>(); for (int i = 0; i < node.ChildCount; ++i) { children.Add(node.GetChild(i)); } if (_direction == SearchDirection.RightToLeft) { children.Reverse(); } // search for each category in the search list foreach (string cat in _searchList) { TreeBankEngine.SyntacticCategory searchCat = TreeBankEngine.GetSyntacticCategory(cat); foreach (TreeBankNode child in children) { if (child.Category == searchCat) { if (excludeNodes == null || !excludeNodes.Contains(child)) { return(child); } } } } return(null); }
private void resetMachineSpecificPathsMenuItem_Click(object sender, EventArgs e) { Settings.Default.DotPath = ""; Settings.Default.TreeBankPath = ""; Settings.Default.TreeBankIndexPath = ""; Settings.Default.Save(); _dot = null; _treeBankEngine = null; mrgFile.Enabled = sentence.Enabled = viewTree.Enabled = false; loadTreeBank.Enabled = true; }
private void createGraphFromDef_Click(object sender, EventArgs e) { try { if (treeDef.Text != "") { CreateGraph(TreeBankEngine.ExtractNode(treeDef.Text, true)); } } catch (Exception ex) { MessageBox.Show("Error: " + ex); } }
/// <summary> /// Constructor /// </summary> /// <param name="treeBankEngine">TreeBank engine to draw training nodes from</param> /// <param name="instanceFilter">Instance filter to apply to training instances</param> /// <param name="sections">TreeBank sections to draw instances from (null for all sections)</param> public TreeBankInstanceProvider(TreeBankEngine treeBankEngine, InstanceFilterDelegate instanceFilter, Set <int> sections) : base(instanceFilter) { _treeBankEngine = treeBankEngine; if (_treeBankEngine == null) { throw new Exception("TreeBank engine cannot be null"); } _sections = sections; if (_sections != null && _sections.Count == 0) { throw new Exception("It makes no sense to restrict TreeBank training sections to the empty set"); } }
private void loadTbBtn_Click(object sender, EventArgs e) { string root = Directory.GetDirectoryRoot("."); _treeBankEngine = new TreeBankEngine(root + @"NLP\Resources\PennTreeBank_3\PARSED\MRG\WSJ", root + @"NLP\Resources\Indexes\treebank_index"); // populate MRG file combo box foreach (string mrgPath in _treeBankEngine.IndexedMrgFiles) { mrgFileCombo.Items.Add(Path.GetFileName(mrgPath) + " (" + _treeBankEngine.GetSentenceNumbers(mrgPath).Count + " sentences)"); } treeBankGroupBox.Enabled = true; loadTbBtn.Enabled = false; }
private void loadTreeBank_Click(object sender, EventArgs e) { try { if (Directory.Exists(Settings.Default.TreeBankPath) && Directory.Exists(Settings.Default.TreeBankIndexPath)) { _treeBankEngine = new TreeBankEngine(Settings.Default.TreeBankPath, Settings.Default.TreeBankIndexPath); } else { folderBrowserDialog.Description = "Select TreeBank MRG directory path"; if (folderBrowserDialog.ShowDialog() == DialogResult.OK && Directory.Exists(folderBrowserDialog.SelectedPath)) { string treeBankPath = folderBrowserDialog.SelectedPath; folderBrowserDialog.Description = "Select TreeBank index directory"; if (folderBrowserDialog.ShowDialog() == DialogResult.OK) { string treeBankIndexPath = folderBrowserDialog.SelectedPath; _treeBankEngine = new TreeBankEngine(treeBankPath, treeBankIndexPath); Settings.Default.TreeBankPath = treeBankPath; Settings.Default.TreeBankIndexPath = treeBankIndexPath; Settings.Default.Save(); } } } mrgFile.Items.Clear(); foreach (string mrgFilePath in _treeBankEngine.IndexedMrgFiles) { mrgFile.Items.Add(Path.GetFileNameWithoutExtension(mrgFilePath)); } loadTreeBank.Enabled = false; mrgFile.Enabled = sentence.Enabled = viewTree.Enabled = true; } catch (Exception ex) { MessageBox.Show("Failed to load TreeBank index: " + ex); } }
/// <summary> /// Moves a MRG file enumerator to the next valid MRG file based on the training section restriction (if any) /// </summary> /// <returns>True if valid MRG file was found, false otherwise</returns> protected bool MoveToNextValidMrgFile() { // try moving to next file if (!_fileEnum.MoveNext()) { return(false); } // impose section restriction if there is one if (_sections != null) { while (!_sections.Contains(TreeBankEngine.GetSectionNumber(_fileEnum.Current))) { if (!_fileEnum.MoveNext()) { return(false); } } } return(true); }
/// <summary> /// Moves a NounInfo enumerator to the next valid entry /// </summary> /// <param name="nounInfoEnum">NounInfo enumerator to move</param> /// <returns>True if valid NounInfo was found, false otherwise</returns> private bool MoveToNextValidNounInfo(ref List <NounInfo> .Enumerator nounInfoEnum) { // move to next NounInfo for the current noun...if we're out, quit looking if (!nounInfoEnum.MoveNext()) { return(false); } // move to the next NounInfo that satisfies the TreeBank section constraint if (Sections != null) { while (!Sections.Contains(TreeBankEngine.GetSectionNumber(nounInfoEnum.Current.File))) { // if we're out of NounInfo, return if (!nounInfoEnum.MoveNext()) { return(false); } } } return(true); }
/// <summary> /// Gets parse tree containing training instance nodes /// </summary> /// <param name="mrgFile">MRG file containing tree</param> /// <param name="sentenceNumber">Sentence number of tree</param> /// <returns>Training parse tree</returns> protected virtual TreeBankNode GetTrainingInstanceParseTree(string mrgFile, int sentenceNumber) { return(TreeBankEngine.GetParseTree(mrgFile, sentenceNumber)); }
/// <summary> /// Constructor /// </summary> /// <param name="treeBankEngine">TreeBank engine to draw parse tree nodes from</param> /// <param name="instanceFilter">Instance filter to apply to nodes</param> /// <param name="sections">Sections to draw nodes from</param> public TreeBankParseTreeInstanceProvider(TreeBankEngine treeBankEngine, InstanceFilterDelegate instanceFilter, Set <int> sections) : base(treeBankEngine, instanceFilter, sections) { }
/// <summary> /// Marks argument nodes from the current node in the corresponding parse from a different TreeBank. This is used when /// transferring PropBank annotations to parse trees other than those distributed in the TreeBank (e.g., those produced /// by an automatic syntactic parser). /// </summary> /// <param name="treeBankEngine">Initialized TreeBank engine from which to pull the parse tree to mark PropBank arguments within</param> /// <returns>PropBank node, or null if all arguments couldn't be minimally transferred to the other parse tree. An argument /// is minimally transferred if the corresponding node in the other parse tree subsumes precisely the same text as the node in the /// current parse tree. Sometimes this is not possible due to parse errors.</returns> public PropBankNode MarkArgumentNodesIn(TreeBankEngine treeBankEngine) { if (!IsRoot) { throw new Exception("Attempted to transform non-root node"); } // get mrg file in other tree bank string treeBankMrgFile = treeBankEngine.GetFullMrgPath(MrgFile.Substring(MrgFile.LastIndexOf(Path.DirectorySeparatorChar) + 1)); // need a PropBank root to mark arguments within PropBankNode pbRoot = new PropBankNode(treeBankEngine.GetParseTree(treeBankMrgFile, SentenceNumber)); // make sure we got the right sentence if (pbRoot.SurfaceText != SurfaceText) { throw new Exception("Failed to convert root to Charniak-parsed version"); } // Add information to root. Ignore leaf number and argument info for now - we'll set them at the end. treeBankMrgFile = treeBankMrgFile.Substring(treeBankEngine.MrgPath.Length); VerbInfo pbInfo = Information; pbRoot.Information = new VerbInfo(pbInfo.Verb, treeBankMrgFile, pbInfo.SentenceNumber, -1, pbInfo.Tagger, pbInfo.RoleSetId, pbInfo.VForm, pbInfo.VTense, pbInfo.VAspect, pbInfo.VPerson, pbInfo.VVoice, ""); // transfer all argument node lists foreach (PropBankLabeledNodeCollection nodeCollection in LabeledNodeCollections) { // new node collection PropBankLabeledNodeCollection otherNodeCollection = new PropBankLabeledNodeCollection(new PropBankNodeLabel(nodeCollection.Label.Type, nodeCollection.Label.Feature, nodeCollection.Label.Confidence)); // get single nodes foreach (PropBankNode singleNode in nodeCollection.SingleNodes) { if (!singleNode.IsNullElement) { // get argument node from other parse tree PropBankNode otherArgNode = (PropBankNode)pbRoot.GetMinimallySubsumingNode(singleNode.FirstToken, singleNode.LastToken); if (otherArgNode == null) { return(null); } otherNodeCollection.AddSingleNode(otherArgNode); } } // get split arguments foreach (List <TreeBankNode> splitArg in nodeCollection.SplitNodes) { List <TreeBankNode> otherSplitArg = new List <TreeBankNode>(); // get each node in the split argument foreach (PropBankNode splitArgNode in splitArg) { if (!splitArgNode.IsNullElement) { // get split node in other tree PropBankNode otherSplitArgNode = (PropBankNode)pbRoot.GetMinimallySubsumingNode(splitArgNode.FirstToken, splitArgNode.LastToken); if (otherSplitArgNode == null) { return(null); } otherSplitArg.Add(otherSplitArgNode); } } // if only one node of the split arg was non-null, at that node as a single if (otherSplitArg.Count == 1) { otherNodeCollection.AddSingleNode(otherSplitArg.First()); } // otherwise, add the split arg normally else if (otherSplitArg.Count >= 2) { otherNodeCollection.AddSplitNode(otherSplitArg); } } // add coref list if we found non-null nodes if (otherNodeCollection.SingleNodes.Count > 0 || otherNodeCollection.SplitNodes.Count > 0) { pbRoot.LabeledNodeCollections.Add(otherNodeCollection); } } // return null if we didn't find any argument node lists with non-null nodes if (pbRoot.LabeledNodeCollections.Count == 0) { return(null); } // set leaf number and argument information pbRoot.Information.LeafNumber = pbRoot.PredicateNodes.First().Leaves[0].LeafNumber; pbRoot.Information.LabeledNodeLocations = pbRoot.LabeledNodeLocations; return(pbRoot); }
/// <summary> /// Marks argument nodes from the current node in the corresponding parse from a different TreeBank. This is used when /// transferring NomBank annotations to parse trees other than those distributed in the TreeBank (e.g., those produced /// by an automatic syntactic parser). /// </summary> /// <param name="treeBankEngine">Initialized TreeBank engine from which to pull the parse tree to mark NomBank arguments within</param> /// <returns>NomBank node, or null if all arguments couldn't be minimally transferred to the other parse tree. An argument /// is minimally transferred if the corresponding node in the other parse tree subsumes precisely the same text as the node /// in the current parse tree. Sometimes this is not possible due to parse errors.</returns> public NomBankNode MarkArgumentNodesIn(TreeBankEngine treeBankEngine) { // make sure we're marking arguments using a root node if (!IsRoot) { throw new Exception("Must pass root node"); } // get mrg file in other tree bank string treeBankMrgFile = treeBankEngine.GetFullMrgPath(MrgFile); // need a NomBank root to mark arguments within NomBankNode nbRoot = new NomBankNode(treeBankEngine.GetParseTree(treeBankMrgFile, SentenceNumber)); // make sure we got the right sentence if (nbRoot.SurfaceText != SurfaceText) { throw new Exception("Failed to get same parse tree"); } // Add information to root. Ignore leaf number and argument information - we'll set them at the end. treeBankMrgFile = treeBankMrgFile.Substring(treeBankEngine.MrgPath.Length).Trim(Path.DirectorySeparatorChar); NounInfo currInfo = Information; nbRoot.Information = new NounInfo(currInfo.Noun, treeBankMrgFile, currInfo.SentenceNumber, -1, currInfo.RoleSetId, ""); // transfer all argument node lists foreach (NomBankLabeledNodeCollection corefList in LabeledNodeCollections) { // new node list NomBankLabeledNodeCollection otherCorefList = new NomBankLabeledNodeCollection(corefList.Label.Copy()); // get single nodes foreach (NomBankNode singleNode in corefList.SingleNodes) { if (!singleNode.IsNullElement) { // get argument node from other parse tree NomBankNode otherArgNode = nbRoot.GetMinimallySubsumingNode(singleNode.FirstToken, singleNode.LastToken) as NomBankNode; if (otherArgNode == null) { return(null); } otherCorefList.AddSingleNode(otherArgNode); } } // get split arguments foreach (List <TreeBankNode> splitNode in corefList.SplitNodes) { List <TreeBankNode> otherSplitArg = new List <TreeBankNode>(); // get each node in the split argument foreach (NomBankNode node in splitNode) { if (!node.IsNullElement) { // get split node in other tree NomBankNode otherSplitArgNode = nbRoot.GetMinimallySubsumingNode(node.FirstToken, node.LastToken) as NomBankNode; if (otherSplitArgNode == null) { return(null); } otherSplitArg.Add(otherSplitArgNode); } } // if only one node of the split arg was non-null, at that node as a single if (otherSplitArg.Count == 1) { otherCorefList.AddSingleNode(otherSplitArg.First()); } // otherwise, add the split arg normally else if (otherSplitArg.Count >= 2) { otherCorefList.AddSplitNode(otherSplitArg); } } // make sure all hyphen indexes were applied if (otherCorefList.Label.HyphenIndexes.Count != otherCorefList.AppliedIndexes.Count) { throw new Exception("Not all hyphen indexes were applied"); } // add coref list if we found non-null nodes if (otherCorefList.SingleNodes.Count > 0 || otherCorefList.SplitNodes.Count > 0) { nbRoot.LabeledNodeCollections.Add(otherCorefList); } } // return null if we didn't find any argument node lists with non-null nodes if (nbRoot.LabeledNodeCollections.Count == 0) { return(null); } // set leaf number and argument locations in the information object nbRoot.Information.LeafNumber = nbRoot.PredicateNode.Leaves[0].LeafNumber; nbRoot.Information.LabeledNodeLocations = nbRoot.LabeledNodeLocations; return(nbRoot); }