Exemple #1
0
 /// <summary>Load a collection of parse trees from a Reader.</summary>
 /// <remarks>
 /// Load a collection of parse trees from a Reader.
 /// Each tree may optionally be encased in parens to allow for Penn
 /// Treebank style trees.
 /// </remarks>
 /// <param name="r">
 /// The reader to read trees from.  (If you want it buffered,
 /// you should already have buffered it!)
 /// </param>
 /// <param name="id">
 /// An ID for where these files come from (arbitrary, but
 /// something like a filename.  Can be <code>null</code> for none.
 /// </param>
 public void Load(Reader r, string id)
 {
     try
     {
         // could throw an IO exception?
         ITreeReader tr        = TreeReaderFactory().NewTreeReader(r);
         int         sentIndex = 0;
         for (Tree pt; (pt = tr.ReadTree()) != null;)
         {
             if (pt.Label() is IHasIndex)
             {
                 // so we can trace where this tree came from
                 IHasIndex hi = (IHasIndex)pt.Label();
                 if (id != null)
                 {
                     hi.SetDocID(id);
                 }
                 hi.SetSentIndex(sentIndex);
             }
             parseTrees.Add(pt);
             sentIndex++;
         }
     }
     catch (IOException e)
     {
         log.Info("load IO Exception: " + e);
     }
 }
            private Tree PrimeNextTree()
            {
                Tree t = null;

                try
                {
                    t = this.tr.ReadTree();
                    if (t == null && this.PrimeNextFile())
                    {
                        //Current file is exhausted
                        t = this.tr.ReadTree();
                    }
                    //Associate this tree with a file and line number
                    if (t != null && t.Label() != null && t.Label() is IHasIndex)
                    {
                        IHasIndex lab = (IHasIndex)t.Label();
                        lab.SetSentIndex(this.curLineId++);
                        lab.SetDocID(this.currentFile.GetName());
                    }
                }
                catch (IOException e)
                {
                    System.Console.Error.Printf("%s: Error reading from file %s:%n%s%n", this.GetType().FullName, this.currentFile.GetPath(), e.ToString());
                    throw new Exception(e);
                }
                return(t);
            }
Exemple #3
0
                // end class DependentPuncWordRejectFilter
                // extra class guarantees correct lazy loading (Bloch p.194)
                public virtual int Compare(IDependency dep1, IDependency dep2)
                {
                    IHasIndex dep1lab = (IHasIndex)dep1.Dependent();
                    IHasIndex dep2lab = (IHasIndex)dep2.Dependent();
                    int       dep1idx = dep1lab.Index();
                    int       dep2idx = dep2lab.Index();

                    return(dep1idx - dep2idx);
                }
        /// <exception cref="Java.Util.NoSuchElementException"/>
        private Tree GetTreeFromInputStream()
        {
            int wordIndex = 1;

            // FSA
            while (tokenizer.MoveNext())
            {
                string token = tokenizer.Current;
                switch (token)
                {
                case leftParen:
                {
                    // cdm 20100225: This next line used to have "" instead of null, but the traditional and current tree normalizers depend on the label being null not "" when there is no label on a tree (like the outermost English PTB level)
                    string label = (tokenizer.Peek().Equals(leftParen)) ? null : tokenizer.Current;
                    if (rightParen.Equals(label))
                    {
                        //Skip past empty trees
                        continue;
                    }
                    else
                    {
                        if (treeNormalizer != null)
                        {
                            label = treeNormalizer.NormalizeNonterminal(label);
                        }
                    }
                    if (label != null)
                    {
                        label = StarPattern.Matcher(label).ReplaceAll("*");
                        label = SlashPattern.Matcher(label).ReplaceAll("/");
                    }
                    Tree newTree = treeFactory.NewTreeNode(label, null);
                    // dtrs are added below
                    if (currentTree == null)
                    {
                        stack.Add(newTree);
                    }
                    else
                    {
                        currentTree.AddChild(newTree);
                        stack.Add(currentTree);
                    }
                    currentTree = newTree;
                    break;
                }

                case rightParen:
                {
                    if (stack.IsEmpty())
                    {
                        // Warn that file has too many right parentheses
                        log.Info("PennTreeReader: warning: file has extra non-matching right parenthesis [ignored]");
                        goto label_break;
                    }
                    //Accept
                    currentTree = stack.Remove(stack.Count - 1);
                    // i.e., stack.pop()
                    if (stack.IsEmpty())
                    {
                        return(currentTree);
                    }
                    break;
                }

                default:
                {
                    if (currentTree == null)
                    {
                        // A careful Reader should warn here, but it's kind of useful to
                        // suppress this because then the TreeReader doesn't print a ton of
                        // messages if there is a README file in a directory of Trees.
                        // log.info("PennTreeReader: warning: file has extra token not in a s-expression tree: " + token + " [ignored]");
                        goto label_break;
                    }
                    string terminal = (treeNormalizer == null) ? token : treeNormalizer.NormalizeTerminal(token);
                    terminal = StarPattern.Matcher(terminal).ReplaceAll("*");
                    terminal = SlashPattern.Matcher(terminal).ReplaceAll("/");
                    Tree leaf = treeFactory.NewLeaf(terminal);
                    if (leaf.Label() is IHasIndex)
                    {
                        IHasIndex hi = (IHasIndex)leaf.Label();
                        hi.SetIndex(wordIndex);
                    }
                    if (leaf.Label() is IHasWord)
                    {
                        IHasWord hw = (IHasWord)leaf.Label();
                        hw.SetWord(leaf.Label().Value());
                    }
                    if (leaf.Label() is IHasTag)
                    {
                        IHasTag ht = (IHasTag)leaf.Label();
                        ht.SetTag(currentTree.Label().Value());
                    }
                    wordIndex++;
                    currentTree.AddChild(leaf);
                    // cdm: Note: this implementation just isn't as efficient as the old recursive descent parser (see 2008 code), where all the daughters are gathered before the tree is made....
                    break;
                }
                }
                label_continue :;
            }
            label_break :;
            //Reject
            if (currentTree != null)
            {
                log.Info("PennTreeReader: warning: incomplete tree (extra left parentheses in input): " + currentTree);
            }
            return(null);
        }
Exemple #5
0
        /// <summary>Load a collection of parse trees from the file of given name.</summary>
        /// <remarks>
        /// Load a collection of parse trees from the file of given name.
        /// Each tree may optionally be encased in parens to allow for Penn
        /// Treebank style trees.
        /// This methods implements the <code>FileProcessor</code> interface.
        /// </remarks>
        /// <param name="file">file to load a tree from</param>
        public void ProcessFile(File file)
        {
            ITreeReader tr = null;
            // SRL stuff
            CollectionValuedMap <int, string> srlMap = null;

            if (this.srlMap != null)
            {
                // there must be a better way ...
                string filename = file.GetAbsolutePath();
                foreach (string suffix in this.srlMap.Keys)
                {
                    if (filename.EndsWith(suffix))
                    {
                        srlMap = this.srlMap[suffix];
                        break;
                    }
                }
                if (srlMap == null)
                {
                    log.Info("could not find SRL entries for file: " + file);
                }
            }
            try
            {
                // maybe print file name to stdout to get some feedback
                // could throw an IO exception if can't open for reading
                tr = TreeReaderFactory().NewTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(file), Encoding())));
                int  sentIndex = 0;
                Tree pt;
                while ((pt = tr.ReadTree()) != null)
                {
                    if (pt.Label() is IHasIndex)
                    {
                        // so we can trace where this tree came from
                        IHasIndex hi = (IHasIndex)pt.Label();
                        hi.SetDocID(file.GetName());
                        hi.SetSentIndex(sentIndex);
                    }
                    if (srlMap == null)
                    {
                        parseTrees.Add(pt);
                    }
                    else
                    {
                        ICollection <string> srls = srlMap[sentIndex];
                        //           pt.pennPrint();
                        //           log.info(srls);
                        parseTrees.Add(pt);
                        if (srls.IsEmpty())
                        {
                        }
                        else
                        {
                            //            parseTrees.add(pt);
                            foreach (string srl in srls)
                            {
                                //              Tree t = pt.deepCopy();
                                string[] bits      = srl.Split("\\s+");
                                int      verbIndex = System.Convert.ToInt32(bits[0]);
                                string   lemma     = bits[2].Split("\\.")[0];
                                //              Tree verb = Trees.getTerminal(t, verbIndex);
                                Tree verb = Edu.Stanford.Nlp.Trees.Trees.GetTerminal(pt, verbIndex);
                                //              ((CoreLabel)verb.label()).set(SRLIDAnnotation.class, SRL_ID.REL);
                                ((CoreLabel)verb.Label()).Set(typeof(CoreAnnotations.CoNLLPredicateAnnotation), true);
                                for (int i = 4; i < bits.Length; i++)
                                {
                                    string   arg = bits[i];
                                    string[] bits1;
                                    if (arg.IndexOf("ARGM") >= 0)
                                    {
                                        bits1 = arg.Split("-");
                                    }
                                    else
                                    {
                                        bits1 = arg.Split("-");
                                    }
                                    string locs    = bits1[0];
                                    string argType = bits1[1];
                                    if (argType.Equals("rel"))
                                    {
                                        continue;
                                    }
                                    foreach (string loc in locs.Split("[*,]"))
                                    {
                                        bits1 = loc.Split(":");
                                        int term   = System.Convert.ToInt32(bits1[0]);
                                        int height = System.Convert.ToInt32(bits1[1]);
                                        //                  Tree t1 = Trees.getPreTerminal(t, term);
                                        Tree t1 = Edu.Stanford.Nlp.Trees.Trees.GetPreTerminal(pt, term);
                                        for (int j = 0; j < height; j++)
                                        {
                                            //                    t1 = t1.parent(t);
                                            t1 = t1.Parent(pt);
                                        }
                                        IDictionary <int, string> roleMap = ((CoreLabel)t1.Label()).Get(typeof(CoreAnnotations.CoNLLSRLAnnotation));
                                        if (roleMap == null)
                                        {
                                            roleMap = Generics.NewHashMap();
                                            ((CoreLabel)t1.Label()).Set(typeof(CoreAnnotations.CoNLLSRLAnnotation), roleMap);
                                        }
                                        roleMap[verbIndex] = argType;
                                    }
                                }
                            }
                        }
                    }
                    //                  ((CoreLabel)t1.label()).set(SRLIDAnnotation.class, SRL_ID.ARG);
                    //               for (Tree t1 : t) {
                    //                 if (t1.isLeaf()) { continue; }
                    //                 CoreLabel fl = (CoreLabel)t1.label();
                    //                 if (fl.value() == null) { continue; }
                    //                 if (!fl.has(SRLIDAnnotation.class)) {
                    //                   boolean allNone = true;
                    //                   for (Tree t2 : t1) {
                    //                     SRL_ID s = ((CoreLabel)t2.label()).get(SRLIDAnnotation.class);
                    //                     if (s == SRL_ID.ARG || s == SRL_ID.REL) {
                    //                       allNone = false;
                    //                       break;
                    //                     }
                    //                   }
                    //                   if (allNone) {
                    //                     fl.set(SRLIDAnnotation.class, SRL_ID.ALL_NO);
                    //                   } else {
                    //                     fl.set(SRLIDAnnotation.class, SRL_ID.NO);
                    //                   }
                    //                 }
                    //               }
                    //              parseTrees.add(t);
                    sentIndex++;
                }
            }
            catch (IOException e)
            {
                throw new RuntimeIOException("MemoryTreebank.processFile IOException in file " + file, e);
            }
            finally
            {
                IOUtils.CloseIgnoringExceptions(tr);
            }
        }