public virtual Tree UntransformTree(Tree tree)
        {
            ITreeFactory tf = tree.TreeFactory();

            if (tree.IsPrePreTerminal())
            {
                if (tree.FirstChild().Label().Value().Matches(".*_."))
                {
                    StringBuilder word = new StringBuilder();
                    for (int i = 0; i < tree.Children().Length; i++)
                    {
                        Tree child = tree.Children()[i];
                        word.Append(child.FirstChild().Label().Value());
                    }
                    Tree newChild = tf.NewLeaf(word.ToString());
                    tree.SetChildren(Java.Util.Collections.SingletonList(newChild));
                }
            }
            else
            {
                for (int i = 0; i < tree.Children().Length; i++)
                {
                    Tree child = tree.Children()[i];
                    UntransformTree(child);
                }
            }
            return(tree);
        }
        /// <summary>Read parse trees from a Reader.</summary>
        /// <param name="filename"/>
        /// <param name="in">
        /// The
        /// <c>Reader</c>
        /// </param>
        /// <param name="simplifiedTagset">
        /// If `true`, convert part-of-speech labels to a
        /// simplified version of the EAGLES tagset, where the tags do not
        /// include extensive morphological analysis
        /// </param>
        /// <param name="aggressiveNormalization">
        /// Perform aggressive "normalization"
        /// on the trees read from the provided corpus documents:
        /// split multi-word tokens into their constituent words (and
        /// infer parts of speech of the constituent words).
        /// </param>
        /// <param name="retainNER">
        /// Retain NER information in preterminals (for later
        /// use in `MultiWordPreprocessor) and add NER-specific
        /// parents to single-word NE tokens
        /// </param>
        /// <param name="detailedAnnotations">
        /// Retain detailed tree node annotations. These
        /// annotations on parse tree constituents may be useful for
        /// e.g. training a parser.
        /// </param>
        public SpanishXMLTreeReader(string filename, Reader @in, bool simplifiedTagset, bool aggressiveNormalization, bool retainNER, bool detailedAnnotations)
        {
            // Constituent annotations
            ITreebankLanguagePack tlp = new SpanishTreebankLanguagePack();

            this.simplifiedTagset    = simplifiedTagset;
            this.detailedAnnotations = detailedAnnotations;
            stream         = new ReaderInputStream(@in, tlp.GetEncoding());
            treeFactory    = new LabeledScoredTreeFactory();
            treeNormalizer = new SpanishTreeNormalizer(simplifiedTagset, aggressiveNormalization, retainNER);
            DocumentBuilder parser = XMLUtils.GetXmlParser();

            try
            {
                IDocument xml  = parser.Parse(stream);
                IElement  root = xml.GetDocumentElement();
                sentences = root.GetElementsByTagName(NodeSent);
                sentIdx   = 0;
            }
            catch (SAXException e)
            {
                log.Info("Parse exception while reading " + filename);
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Пример #3
0
 public frmExpData(TreeNodeEx theNode)
 {
     InitializeComponent();
     treeFactory = new TreeFactory();
     this.Text  += "-" + Globals.Projectname;
     TheNode     = theNode;
 }
Пример #4
0
        public FrmScan(TreeNodeEx node, Form PartentForm, ITreeFactory Itree)
        {
            treeFactory = Itree;
            try
            {
                InitializeComponent();
            }
            catch
            {
                MessageBox.Show("没有安装扫描控件,请打开安装包安装!");
                this.DialogResult = DialogResult.OK;
                return;
            }
            tssLabel1.Text = "就绪";
            tssLabel2.Text = Globals.AppTitle;
            tssLabel3.Text = Globals.LoginUser;
            string strSql = "select filestatus from T_FileList where ProjectNO='" + Globals.ProjectNO + "' and treepath='" + Functions.OpeartPath(node) + "'";
            object tmp    = Digi.DBUtility.DbHelperOleDb.GetSingle(strSql);

            if (tmp != null && (tmp.ToString() == "4" || tmp.ToString() == "5"))
            {
                MessageBox.Show("该模板已提交,不能扫描或编辑之下的文件。");
                this.DialogResult = DialogResult.OK;
            }
            NewNode = node;
            getAllFileFromTemplateNode(node);
            _parentForm = PartentForm;
            FileID      = Guid.NewGuid().ToString();
        }
Пример #5
0
 public FileRegistInfo(bool siVisale, ITreeFactory Itree)
 {
     InitializeComponent();
     SetControls(siVisale);
     treeFactory = Itree;
     BindUnit();
 }
Пример #6
0
 public frmAddTemplet(TreeNodeEx Node, Form ParentForm, ITreeFactory Itree, bool fileOrFlord_flg)
 {
     InitializeComponent();
     _parentForm       = ParentForm;
     treeFactory       = Itree;
     checkBox1.Checked = fileOrFlord_flg;
 }
Пример #7
0
 public TreeAnnotator(IHeadFinder hf, ITreebankLangParserParams tlpp, Options op)
 {
     this.tlpParams    = tlpp;
     this.hf           = hf;
     this.tf           = new LabeledScoredTreeFactory();
     this.trainOptions = op.trainOptions;
 }
 public override Tree NormalizeWholeTree(Tree tree, ITreeFactory tf)
 {
     tree = tree.Prune(emptyFilter, tf).SpliceOut(aOverAFilter, tf);
     foreach (Tree t in tree)
     {
         //Map punctuation tags back like the PTB
         if (t.IsPreTerminal())
         {
             string posStr = NormalizePreterminal(t);
             t.SetValue(posStr);
             if (t.Label() is IHasTag)
             {
                 ((IHasTag)t.Label()).SetTag(posStr);
             }
         }
         else
         {
             if (t.IsLeaf())
             {
                 //Strip off morphological analyses and place them in the OriginalTextAnnotation, which is
                 //specified by HasContext.
                 if (t.Value().Contains(MorphoFeatureSpecification.MorphoMark))
                 {
                     string[] toks = t.Value().Split(MorphoFeatureSpecification.MorphoMark);
                     if (toks.Length != 2)
                     {
                         System.Console.Error.Printf("%s: Word contains malformed morph annotation: %s%n", this.GetType().FullName, t.Value());
                     }
                     else
                     {
                         if (t.Label() is CoreLabel)
                         {
                             ((CoreLabel)t.Label()).SetValue(string.Intern(toks[0].Trim()));
                             ((CoreLabel)t.Label()).SetWord(string.Intern(toks[0].Trim()));
                             ((CoreLabel)t.Label()).SetOriginalText(string.Intern(toks[1].Trim()));
                         }
                         else
                         {
                             System.Console.Error.Printf("%s: Cannot store morph analysis in non-CoreLabel: %s%n", this.GetType().FullName, t.Label().GetType().FullName);
                         }
                     }
                 }
             }
         }
     }
     //Add start symbol so that the root has only one sub-state. Escape any enclosing brackets.
     //If the "tree" consists entirely of enclosing brackets e.g. ((())) then this method
     //will return null. In this case, readers e.g. PennTreeReader will try to read the next tree.
     while (tree != null && (tree.Value() == null || tree.Value().Equals(string.Empty)) && tree.NumChildren() <= 1)
     {
         tree = tree.FirstChild();
     }
     //Ensure that the tree has a top-level unary rewrite
     if (tree != null && !tree.Value().Equals(rootLabel))
     {
         tree = tf.NewTreeNode(rootLabel, Collections.SingletonList(tree));
     }
     return(tree);
 }
Пример #9
0
 public override Tree NormalizeWholeTree(Tree tree, ITreeFactory tf)
 {
     foreach (TreeNormalizer tn in tns)
     {
         tree = tn.NormalizeWholeTree(tree, tf);
     }
     return(tree);
 }
Пример #10
0
 public frmRegistSearch(TreeView t, FileStatus r, frmFileMain _parentForm, ITreeFactory Itree)
 {
     InitializeComponent();
     this.txtSearchTitle._TextBox.TextChanged += txtSearchTitle_TextChanged;
     tree        = t;
     treeEnum    = r;
     ParentForm  = _parentForm;
     treeFactory = Itree;
 }
 public ExhaustiveDependencyParser(IDependencyGrammar dg, ILexicon lex, Options op, IIndex <string> wordIndex, IIndex <string> tagIndex)
 {
     this.dg        = dg;
     this.lex       = lex;
     this.op        = op;
     this.tlp       = op.Langpack();
     this.wordIndex = wordIndex;
     this.tagIndex  = tagIndex;
     tf             = new LabeledScoredTreeFactory();
 }
 public MultiWordProcessor(AnCoraProcessor _enclosing, IFactory <TreeNormalizer> tnf, ITreeFactory tf, bool ner)
 {
     this._enclosing = _enclosing;
     // NB: TreeNormalizer is not thread-safe, and so we need to accept + store a
     // TreeNormalizer factory instead
     this.tnf = tnf;
     this.tn  = tnf.Create();
     this.tf  = tf;
     this.ner = ner;
 }
        public TreeNode(ITreeNode <T> parent, T value, ITreeFactory <T> treeFactory)
        {
            if (treeFactory == null)
            {
                throw new ArgumentNullException(nameof(treeFactory));
            }

            this.Children = treeFactory.CreateCollection(this);
            this._parent  = parent;
            this._value   = value;
        }
 /// <summary>Normalize a whole tree.</summary>
 /// <remarks>
 /// Normalize a whole tree.
 /// TueBa-D/Z adaptation. Fixes trees with non-unary roots, does nothing else.
 /// </remarks>
 public override Tree NormalizeWholeTree(Tree tree, ITreeFactory tf)
 {
     if (tree.Label().Value().Equals(root) && tree.Children().Length > 1)
     {
         Tree underRoot = tree.TreeFactory().NewTreeNode(root, tree.GetChildrenAsList());
         tree.SetChildren(new Tree[1]);
         tree.SetChild(0, underRoot);
     }
     // we just want the non-unary root fixed.
     return(tree);
 }
Пример #15
0
 internal virtual Tree TransformRoot(Tree tree, ITreeFactory tf)
 {
     // XXXX TODO: use tlp and don't assume 1 daughter of ROOT!
     // leave the root intact
     // if (tlp.isStartSymbol(tlp.basicCategory(tree.label().value())))
     if (tree.Label().ToString().StartsWith("ROOT"))
     {
         return(tf.NewTreeNode(tree.Label(), Java.Util.Collections.SingletonList(TransformNode(tree.Children()[0], tf))));
     }
     return(TransformNode(tree, tf));
 }
 public ArabicRawTreeNormalizer(ATBArabicDataset _enclosing, PrintWriter outFile, PrintWriter flatFile)
 {
     this._enclosing   = _enclosing;
     this.encodingMap  = (this._enclosing.encoding == Dataset.Encoding.Utf8) ? new Buckwalter() : new Buckwalter(true);
     this.outfile      = outFile;
     this.flatFile     = flatFile;
     this.nullFilter   = new ArabicTreeNormalizer.ArabicEmptyFilter();
     this.aOverAFilter = new BobChrisTreeNormalizer.AOverAFilter();
     this.tf           = new LabeledScoredTreeFactory();
     this.tlp          = new ArabicTreebankLanguagePack();
 }
Пример #17
0
 // todo [cdm 2015]: These next two methods duplicate the Tree.valueOf methods!
 /// <summary>Simple tree reading utility method.</summary>
 /// <remarks>Simple tree reading utility method.  Given a tree formatted as a PTB string, returns a Tree made by a specific TreeFactory.</remarks>
 public static Tree ReadTree(string ptbTreeString, ITreeFactory treeFactory)
 {
     try
     {
         PennTreeReader ptr = new PennTreeReader(new StringReader(ptbTreeString), treeFactory);
         return(ptr.ReadTree());
     }
     catch (IOException ex)
     {
         throw new Exception(ex);
     }
 }
        /// <summary>
        /// Normalize a whole tree -- one can assume that this is the
        /// root.
        /// </summary>
        /// <remarks>
        /// Normalize a whole tree -- one can assume that this is the
        /// root.  This implementation deletes empty elements (ones with
        /// nonterminal tag label '-NONE-') from the tree, and splices out
        /// unary A over A nodes.  It assumes that it is not given a
        /// null tree, but it may return one if there are no real words.
        /// </remarks>
        public override Tree NormalizeWholeTree(Tree tree, ITreeFactory tf)
        {
            Tree middle = tree.Prune(emptyFilter, tf);

            if (middle == null)
            {
                return(null);
            }
            else
            {
                return(middle.SpliceOut(aOverAFilter, tf));
            }
        }
        //  static Set preterminals = new HashSet();
        public override Tree TransformTree(Tree tree)
        {
            ITreeFactory tf  = tree.TreeFactory();
            string       tag = tree.Label().Value();

            if (tree.IsPreTerminal())
            {
                string       word        = tree.FirstChild().Label().Value();
                IList <Tree> newPreterms = new List <Tree>();
                for (int i = 0; i < size; i++)
                {
                    string singleCharLabel = new string(new char[] { word[i] });
                    Tree   newLeaf         = tf.NewLeaf(singleCharLabel);
                    string suffix;
                    if (word.Length == 1)
                    {
                        suffix = "_S";
                    }
                    else
                    {
                        if (i == 0)
                        {
                            suffix = "_B";
                        }
                        else
                        {
                            if (i == word.Length - 1)
                            {
                                suffix = "_E";
                            }
                            else
                            {
                                suffix = "_M";
                            }
                        }
                    }
                    newPreterms.Add(tf.NewTreeNode(tag + suffix, Java.Util.Collections.SingletonList <Tree>(newLeaf)));
                }
                return(tf.NewTreeNode(tag, newPreterms));
            }
            else
            {
                IList <Tree> newChildren = new List <Tree>();
                for (int i = 0; i < tree.Children().Length; i++)
                {
                    Tree child = tree.Children()[i];
                    newChildren.Add(TransformTree(child));
                }
                return(tf.NewTreeNode(tag, newChildren));
            }
        }
Пример #20
0
 public static Tree NormalizeTree(Tree tree, TreeNormalizer tn, ITreeFactory tf)
 {
     foreach (Tree node in tree)
     {
         if (node.IsLeaf())
         {
             node.Label().SetValue(tn.NormalizeTerminal(node.Label().Value()));
         }
         else
         {
             node.Label().SetValue(tn.NormalizeNonterminal(node.Label().Value()));
         }
     }
     return(tn.NormalizeWholeTree(tree, tf));
 }
Пример #21
0
        public MainView(ITreeFactory treeFactory, ISaveInformationService saveService, FileViewProvider provider)
        {
            InitializeComponent();

            _treeFactory = treeFactory ?? throw new ArgumentNullException(nameof(treeFactory));
            _saveService = saveService ?? throw new ArgumentNullException(nameof(saveService));
            _provider    = provider ?? throw new ArgumentNullException(nameof(provider));

            this.tsmOpen.Click          += TsmOpenClick;
            this.tsmSave.Click          += TsmSaveClick;
            this.treeView1.BeforeSelect += TreeViewBeforeSelect;

            this.label1.Text = "Total size: 0";
            this.label2.Text = "Selected files: 0";
        }
Пример #22
0
 /// <summary>
 /// Normalize a whole tree -- one can assume that this is the
 /// root.
 /// </summary>
 /// <remarks>
 /// Normalize a whole tree -- one can assume that this is the
 /// root.  This implementation deletes empty elements (ones with
 /// nonterminal tag label starting with '*T') from the tree.  It
 /// does work for a null tree.
 /// </remarks>
 public override Tree NormalizeWholeTree(Tree tree, ITreeFactory tf)
 {
     // add an extra root to non-unary roots
     if (tree.Value() == null)
     {
         tree = FixNonUnaryRoot(tree, tf);
     }
     else
     {
         if (!tree.Value().Equals(tlp.StartSymbol()))
         {
             tree = tf.NewTreeNode(tlp.StartSymbol(), Java.Util.Collections.SingletonList(tree));
         }
     }
     tree = tree.Prune(emptyFilter, tf).SpliceOut(aOverAFilter, tf);
     // insert NPs in PPs if you're supposed to do that
     if (insertNPinPP)
     {
         InsertNPinPPall(tree);
     }
     foreach (Tree t in tree)
     {
         if (t.IsLeaf() || t.IsPreTerminal())
         {
             continue;
         }
         if (t.Value() == null || t.Value().Equals(string.Empty))
         {
             t.SetValue("DUMMY");
         }
         // there's also a '--' category
         if (t.Value().Matches("--.*"))
         {
             continue;
         }
         // fix a bug in the ACL08 German tiger treebank
         string cat = t.Value();
         if (cat == null || cat.Equals(string.Empty))
         {
             if (t.NumChildren() == 3 && t.FirstChild().Label().Value().Equals("NN") && t.GetChild(1).Label().Value().Equals("$."))
             {
                 log.Info("Correcting treebank error: giving phrase label DL to " + t);
                 t.Label().SetValue("DL");
             }
         }
     }
     return(tree);
 }
 public override Tree NormalizeWholeTree(Tree tree, ITreeFactory tf)
 {
     tree = tree.Prune(hebrewEmptyFilter, tf).SpliceOut(aOverAFilter, tf);
     //Add start symbol so that the root has only one sub-state. Escape any enclosing brackets.
     //If the "tree" consists entirely of enclosing brackets e.g. ((())) then this method
     //will return null. In this case, readers e.g. PennTreeReader will try to read the next tree.
     while (tree != null && (tree.Value() == null || tree.Value().Equals(string.Empty)) && tree.NumChildren() <= 1)
     {
         tree = tree.FirstChild();
     }
     if (tree != null && !tree.Value().Equals(tlp.StartSymbol()))
     {
         tree = tf.NewTreeNode(tlp.StartSymbol(), Collections.SingletonList(tree));
     }
     return(tree);
 }
Пример #24
0
 /// <summary>Build a custom binarizer for Trees.</summary>
 /// <param name="hf">the HeadFinder to use in binarization</param>
 /// <param name="tlp">the TreebankLanguagePack to use</param>
 /// <param name="insideFactor">whether to do inside markovization</param>
 /// <param name="markovFactor">whether to markovize the binary rules</param>
 /// <param name="markovOrder">the markov order to use; only relevant with markovFactor=true</param>
 /// <param name="useWrappingLabels">whether to use state names (labels) that allow wrapping from right to left</param>
 /// <param name="unaryAtTop">
 /// Whether to actually materialize the unary that rewrites
 /// a passive state to the active rule at the top of an original local
 /// tree.  This is used only when compaction is happening
 /// </param>
 /// <param name="selectiveSplitThreshold">if selective split is used, this will be the threshold used to decide which state splits to keep</param>
 /// <param name="markFinalStates">whether or not to make the state names (labels) of the final active states distinctive</param>
 /// <param name="noRebinarization">if true, a node which already has exactly two children is not altered</param>
 public TreeBinarizer(IHeadFinder hf, ITreebankLanguagePack tlp, bool insideFactor, bool markovFactor, int markovOrder, bool useWrappingLabels, bool unaryAtTop, double selectiveSplitThreshold, bool markFinalStates, bool simpleLabels, bool noRebinarization
                      )
 {
     this.hf                      = hf;
     this.tlp                     = tlp;
     this.tf                      = new LabeledScoredTreeFactory(new CategoryWordTagFactory());
     this.insideFactor            = insideFactor;
     this.markovFactor            = markovFactor;
     this.markovOrder             = markovOrder;
     this.useWrappingLabels       = useWrappingLabels;
     this.unaryAtTop              = unaryAtTop;
     this.selectiveSplitThreshold = selectiveSplitThreshold;
     this.markFinalStates         = markFinalStates;
     this.simpleLabels            = simpleLabels;
     this.noRebinarization        = noRebinarization;
 }
Пример #25
0
        internal virtual Tree TransformNode(Tree tree, ITreeFactory tf)
        {
            if (tree.IsLeaf())
            {
                return(tf.NewLeaf(tree.Label()));
            }
            if (tree.IsPreTerminal())
            {
                return(tf.NewTreeNode(tree.Label(), Java.Util.Collections.SingletonList(tf.NewLeaf(tree.Children()[0].Label()))));
            }
            IList <Tree>      children    = tree.GetChildrenAsList();
            LinkedList <Tree> newChildren = new LinkedList <Tree>();

            // promote lower punctuation
            foreach (Tree child in children)
            {
                LinkedList <Tree> preTerms = PreTerms(child);
                while (!preTerms.IsEmpty() && IsPunc(preTerms.GetFirst()))
                {
                    newChildren.Add(preTerms.GetFirst());
                    preTerms.RemoveFirst();
                }
                Tree newChild          = TransformNode(child, tf);
                LinkedList <Tree> temp = new LinkedList <Tree>();
                if (newChild.Children().Length > 0)
                {
                    newChildren.Add(newChild);
                }
                while (!preTerms.IsEmpty() && IsPunc(preTerms.GetLast()))
                {
                    temp.AddFirst(preTerms.GetLast());
                    preTerms.RemoveLast();
                }
                Sharpen.Collections.AddAll(newChildren, temp);
            }
            // remove local punctuation
            while (!newChildren.IsEmpty() && IsPunc(newChildren.GetFirst()))
            {
                newChildren.RemoveFirst();
            }
            while (!newChildren.IsEmpty() && IsPunc(newChildren.GetLast()))
            {
                newChildren.RemoveLast();
            }
            return(tf.NewTreeNode(tree.Label(), newChildren));
        }
Пример #26
0
        /// <summary>
        /// The build batch tree.
        /// </summary>
        /// <param name="factory">
        /// The factory.
        /// </param>
        /// <param name="degree">
        /// The degree.
        /// </param>
        /// <typeparam name="T">
        /// Parameter type string/double.
        /// </typeparam>
        public void BuildBatchTree <T>(ITreeFactory factory, int degree)
        {
            List <List <T> > batchData = this.GetBatchData <T>();

            foreach (var data in batchData)
            {
                ITree <T> genericTree = factory.GetTree <T>(degree);
                int       count       = data.Count;
                for (int i = 0; i < count; i++)
                {
                    genericTree.Add(data[i]);
                    this.CalculateProgress(i, count);
                }

                this.AddToBatchList(genericTree);
            }
        }
Пример #27
0
        private Tree FixNonUnaryRoot(Tree t, ITreeFactory tf)
        {
            IList <Tree> kids = t.GetChildrenAsList();

            if (kids.Count == 2 && t.FirstChild().IsPhrasal() && tlp.IsSentenceFinalPunctuationTag(t.LastChild().Value()))
            {
                IList <Tree> grandKids = t.FirstChild().GetChildrenAsList();
                grandKids.Add(t.LastChild());
                t.FirstChild().SetChildren(grandKids);
                kids.Remove(kids.Count - 1);
                t.SetChildren(kids);
                t.SetValue(tlp.StartSymbol());
            }
            else
            {
                t.SetValue(nonUnaryRoot);
                t = tf.NewTreeNode(tlp.StartSymbol(), Java.Util.Collections.SingletonList(t));
            }
            return(t);
        }
Пример #28
0
 public TreeAnnotatorAndBinarizer(IHeadFinder annotationHF, IHeadFinder binarizationHF, ITreebankLangParserParams tlpParams, bool forceCNF, bool insideFactor, bool doSubcategorization, Options op)
 {
     this.trainOptions = op.trainOptions;
     if (doSubcategorization)
     {
         annotator = new TreeAnnotator(annotationHF, tlpParams, op);
     }
     else
     {
         annotator = new TreeAnnotatorAndBinarizer.TreeNullAnnotator(annotationHF);
     }
     binarizer = new TreeBinarizer(binarizationHF, tlpParams.TreebankLanguagePack(), insideFactor, trainOptions.markovFactor, trainOptions.markovOrder, trainOptions.CompactGrammar() > 0, trainOptions.CompactGrammar() > 1, trainOptions.HselCut, trainOptions
                                   .markFinalStates, trainOptions.simpleBinarizedLabels, trainOptions.noRebinarization);
     if (trainOptions.selectivePostSplit)
     {
         postSplitter = new PostSplitter(tlpParams, op);
     }
     else
     {
         postSplitter = null;
     }
     this.tf       = new LabeledScoredTreeFactory(new CategoryWordTagFactory());
     this.tlp      = tlpParams.TreebankLanguagePack();
     this.forceCNF = forceCNF;
     if (trainOptions.printAnnotatedRuleCounts)
     {
         annotatedRuleCounts = new ClassicCounter <Tree>();
     }
     else
     {
         annotatedRuleCounts = null;
     }
     if (trainOptions.printAnnotatedStateCounts)
     {
         annotatedStateCounts = new ClassicCounter <string>();
     }
     else
     {
         annotatedStateCounts = null;
     }
 }
Пример #29
0
        // Nested nominal group containing period punctuation
        // Match boundaries for subtrees created
        // Match candidate preposition
        // Headed by a group that was generated from
        // multi-word token expansion and that we
        // wish to expand further
        // With an NP on the left (-> this is a
        // prep. phrase) and not preceded by any
        // other prepositions
        // Match candidate preposition
        // Which is the first child in a group that
        // was generated from multi-word token
        // expansion and that we wish to expand
        // further
        // With an NP on the left (-> this is a
        // prep. phrase) and not preceded by any
        // other prepositions
        // In one of our expanded phrases (match
        // bounds of this expanded phrase; these form
        // the left edge of first new subtree and the
        // right edge of the second new subtree)
        // Fetch more bounds: node to immediate left
        // of cc is the right edge of the first new
        // subtree, and node to right of cc is the
        // left edge of the second new subtree
        //
        // NB: left1 may the same as right1; likewise
        // for the second tree
        // "en opinion del X," "además del Y"
        // -> "(en opinion de) (el X)," "(además de) (el Y)"
        // "del X al Y"
        // ---------
        // Final cleanup operations
        // Should be first-ish
        // Should not happen until the last moment! The function words
        // being targeted have weaker "scope" than others earlier
        // targeted, and so we don't want to clump things around them
        // until we know we have the right to clump
        // Verb phrase-related cleanup.. order is important!
        // Fixes for specific common phrases
        // Lastly..
        //
        // These final fixes are not at all linguistically motivated -- just need to make the trees less dirty
        /// <summary>
        /// Recognize candidate patterns for expansion in the given tree and
        /// perform the expansions.
        /// </summary>
        /// <remarks>
        /// Recognize candidate patterns for expansion in the given tree and
        /// perform the expansions. See the class documentation for more
        /// information.
        /// </remarks>
        public virtual Tree ExpandPhrases(Tree t, TreeNormalizer tn, ITreeFactory tf)
        {
            // Keep running this sequence of patterns until no changes are
            // affected. We need this for nested expressions like "para tratar
            // de regresar al empleo." This first step produces lots of
            // "intermediate" tree structures which need to be cleaned up later.
            Tree oldTree;

            do
            {
                oldTree = t.DeepCopy();
                t       = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ProcessPatternsOnTree(firstStepExpansions, t);
            }while (!t.Equals(oldTree));
            // Now clean up intermediate tree structures
            t = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ProcessPatternsOnTree(intermediateExpansions, t);
            // Normalize first to allow for contraction expansion, etc.
            t = tn.NormalizeWholeTree(t, tf);
            // Final cleanup
            t = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ProcessPatternsOnTree(finalCleanup, t);
            return(t);
        }
Пример #30
0
        /// <summary>
        /// Read parse trees from a Reader.
        /// </summary>
        /// <param name="input">The Reader</param>
        /// <param name="tf">TreeFactory -- factory to create some kind of Tree</param>
        /// <param name="tn">the method of normalizing trees</param>
        /// <param name="st">Tokenizer that divides up Reader</param>
        public PennTreeReader(TextReader input, ITreeFactory tf, TreeNormalizer tn, ITokenizer<string> st)
        {
            reader = input;
            treeFactory = tf;
            treeNormalizer = tn;
            tokenizer = st;

            // check for whacked out headers still present in Brown corpus in Treebank 3
            string first = (st.HasNext() ? st.Peek() : null);
            if (first != null && first.StartsWith("*x*x*x"))
            {
                int foundCount = 0;
                while (foundCount < 4 && st.HasNext())
                {
                    first = st.Next();
                    if (first != null && first.StartsWith("*x*x*x"))
                    {
                        foundCount++;
                    }
                }
            }
        }
        /// <summary>Read parse trees from a Reader.</summary>
        /// <param name="in">Reader</param>
        /// <param name="tf">TreeFactory -- factory to create some kind of Tree</param>
        /// <param name="tn">the method of normalizing trees</param>
        public FrenchXMLTreeReader(Reader @in, ITreeFactory tf, TreeNormalizer tn)
        {
            // Prefix for MWE nodes
            ITreebankLanguagePack tlp = new FrenchTreebankLanguagePack();

            stream         = new ReaderInputStream(@in, tlp.GetEncoding());
            treeFactory    = tf;
            treeNormalizer = tn;
            DocumentBuilder parser = XMLUtils.GetXmlParser();

            try
            {
                IDocument xml  = parser.Parse(stream);
                IElement  root = xml.GetDocumentElement();
                sentences = root.GetElementsByTagName(NodeSent);
                sentIdx   = 0;
            }
            catch (Exception e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
 public TreeController(IGenericRepository<ITree, Guid> treeRepo, ITreeFactory treeFactory)
 {
     _treeRepo = treeRepo;
     _treeFactory = treeFactory;
 }
Пример #33
0
 /// <summary>
 /// Read parse trees from a Reader.
 /// </summary>
 /// <param name="input">The Reader</param>
 /// <param name="tf">TreeFactory -- factory to create some kind of Tree</param>
 /// <param name="tn">the method of normalizing trees</param>
 public PennTreeReader(TextReader input, ITreeFactory tf, TreeNormalizer tn) :
     this(input, tf, tn, new PennTreebankTokenizer(input))
 {
 }
Пример #34
0
         /// <summary>
 /// Read parse trees from a <code>Reader</code>.
 /// </summary>
 /// <param name="input">the Reader</param>
 /// <param name="tf">TreeFactory -- factory to create some kind of Tree</param>
 public PennTreeReader(TextReader input, ITreeFactory tf) :
     this(input, tf, null, new PennTreebankTokenizer(input))
 {
 }
Пример #35
0
 /// <summary>
 /// Normalize a whole tree -- one can assume that this is the
 /// root.  This implementation deletes empty elements (ones with
 /// nonterminal tag label '-NONE-') from the tree, and splices out
 /// unary A over A nodes.  It does work for a null tree.
 /// </summary>
 public override Tree NormalizeWholeTree(Tree tree, ITreeFactory tf)
 {
     return tree.Prune(emptyFilter.Test, tf).SpliceOut(aOverAFilter.Test, tf);
 }
Пример #36
0
 /// <summary>
 /// Specify your own {@link TreeFactory};
 /// uses a {@link PennTreebankTokenizer}, and a {@link TreeNormalizer}.
 /// </summary>
 /// <param name="tf">The TreeFactory to use in building Tree objects to return</param>
 public PennTreeReaderFactory(ITreeFactory tf) :
     this(tf, new TreeNormalizer())
 {
 }
Пример #37
0
 /// <summary>
 /// Normalize a whole tree -- this method assumes that the argument
 /// that it is passed is the root of a complete <code>Tree</code>.
 /// It is normally implemented as a Tree-walking routine.
 /// This method may return <code>null</code>. This is interpreted to
 /// mean that this is a tree that should not be included in further
 /// processing.  PennTreeReader recognizes this return value, and
 /// asks for another Tree from the input Reader.
 /// </summary>
 /// <param name="tree">The tree to be normalized</param>
 /// <param name="tf">the TreeFactory to create new nodes (if needed)</param>
 /// <returns>the normalized tree</returns>
 public virtual Tree NormalizeWholeTree(Tree tree, ITreeFactory tf)
 {
     return tree;
 }
Пример #38
0
 /// <summary>
 /// Specify your own {@link TreeFactory};
 /// uses a {@link PennTreebankTokenizer}, and a {@link TreeNormalizer}.
 /// </summary>
 /// <param name="tf">The TreeFactory to use in building Tree objects to return</param>
 /// <param name="tn">The TreeNormalizer to use</param>
 public PennTreeReaderFactory(ITreeFactory tf, TreeNormalizer tn)
 {
     this.tf = tf;
     this.tn = tn;
 }