// We don't use valueOf because we sometimes use trees such as
 // (bar (foo (foo 1))), and the default valueOf uses a
 // TreeNormalizer that removes nodes from such a tree
 public static Tree TreeFromString(string s)
 {
     try
     {
         ITreeReader tr = new PennTreeReader(new StringReader(s), new LabeledScoredTreeFactory());
         return(tr.ReadTree());
     }
     catch (IOException e)
     {
         throw new Exception(e);
     }
 }
        /// <summary>Process all the trees in the given directory.</summary>
        /// <remarks>Process all the trees in the given directory. For example, the WSJ section of the Penn Treebank.</remarks>
        /// <param name="name">The name of the directory we are processing.</param>
        /// <param name="directory">The directory we are processing.</param>
        /// <returns>
        /// A dataset of subject/object pairs in the trees in the directory.
        /// This is a list of sentences, such that each sentence has a collection of pairs of spans.
        /// Each pair of spans is a subject/object span pair that constitutes a valid extraction.
        /// </returns>
        /// <exception cref="System.IO.IOException"/>
        private static IList <Pair <ICoreMap, ICollection <Pair <Span, Span> > > > ProcessDirectory(string name, File directory)
        {
            Redwood.Util.ForceTrack("Processing " + name);
            // Prepare the files to iterate over
            IEnumerable <File> files = IOUtils.IterFilesRecursive(directory, "mrg");
            int numTreesProcessed    = 0;
            IList <Pair <ICoreMap, ICollection <Pair <Span, Span> > > > trainingData = new List <Pair <ICoreMap, ICollection <Pair <Span, Span> > > >(1024);

            // Iterate over the files
            foreach (File file in files)
            {
                //      log(file);
                ITreeReader reader = new PennTreeReader(IOUtils.ReaderFromFile(file));
                Tree        tree;
                while ((tree = reader.ReadTree()) != null)
                {
                    try
                    {
                        // Prepare the tree
                        tree.IndexSpans();
                        tree.SetSpans();
                        // Get relevant information from sentence
                        IList <CoreLabel> tokens = tree.GetLeaves().Stream().Map(null).Collect(Collectors.ToList());
                        //            .filter(leaf -> !TRACE_SOURCE_PATTERN.matcher(leaf.word()).matches() && !leaf.tag().equals("-NONE-"))
                        SemanticGraph           graph   = Parse(tree);
                        IDictionary <int, Span> targets = FindTraceTargets(tree);
                        IDictionary <int, int>  sources = FindTraceSources(tree);
                        // Create a sentence object
                        ICoreMap sentence = new _ArrayCoreMap_325(tokens, graph, 4);
                        natlog.DoOneSentence(null, sentence);
                        // Generate training data
                        ICollection <Pair <Span, Span> > trainingDataFromSentence = SubjectObjectPairs(graph, tokens, targets, sources);
                        trainingData.Add(Pair.MakePair(sentence, trainingDataFromSentence));
                        // Debug print
                        numTreesProcessed += 1;
                        if (numTreesProcessed % 100 == 0)
                        {
                            Redwood.Util.Log("[" + new DecimalFormat("00000").Format(numTreesProcessed) + "] " + CountDatums(trainingData) + " known extractions");
                        }
                    }
                    catch (Exception t)
                    {
                        Sharpen.Runtime.PrintStackTrace(t);
                    }
                }
            }
            // End
            Redwood.Util.Log(string.Empty + numTreesProcessed + " trees processed yielding " + CountDatums(trainingData) + " known extractions");
            Redwood.Util.EndTrack("Processing " + name);
            return(trainingData);
        }
        private static SemanticGraph MakeGraph()
        {
            Tree tree;

            try
            {
                tree = new PennTreeReader(new StringReader("(S1 (S (S (S (NP (DT The) (NN CD14) (NN LPS) (NN receptor)) (VP (VBZ is) (, ,) (ADVP (RB however)) (, ,) (ADVP (RB up)) (VP (VBN regulated) (PRN (-LRB- -LRB-) (FRAG (RB not) (ADJP (RB down) (VBN regulated))) (-RRB- -RRB-)) (PP (IN in) (NP (JJ tolerant) (NNS cells)))))) (, ,) (CC and) (S (NP (NN LPS)) (VP (MD can) (, ,) (PP (IN in) (NP (NN fact))) (, ,) (ADVP (RB still)) (VP (VB lead) (PP (TO to) (NP (NP (NN activation)) (PP (IN of) (NP (JJ tolerant) (NNS cells))))) (SBAR (IN as) (S (VP (VBN evidenced) (PP (IN by) (NP (NP (NN mobilization)) (PP (IN of) (NP (DT the) (NN transcription) (NN factor) (NP (NP (JJ nuclear) (NN factor) (NN kappa) (NN B)) (PRN (-LRB- -LRB-) (NP (NN NF-kappa) (NN B)) (-RRB- -RRB-)))))))))))))) (. .)))"
                                                           ), new LabeledScoredTreeFactory()).ReadTree();
            }
            catch (IOException e)
            {
                // the tree should parse correctly
                throw new Exception(e);
            }
            return(SemanticGraphFactory.MakeFromTree(tree, SemanticGraphFactory.Mode.Basic, GrammaticalStructure.Extras.Maximal));
        }
        public virtual void TestRead()
        {
            string         treeText   = "(1 (2 This)) (3 (4 is) (5 a)) (6 (\\* small) (7 \\/test))";
            StringReader   reader     = new StringReader(treeText);
            PennTreeReader treeReader = new PennTreeReader(reader);

            string[] expected = new string[] { "(1 (2 This))", "(3 (4 is) (5 a))", "(6 (* small) (7 /test))" };
            for (int i = 0; i < expected.Length; ++i)
            {
                Tree tree = treeReader.ReadTree();
                NUnit.Framework.Assert.IsTrue(tree != null);
                NUnit.Framework.Assert.AreEqual(expected[i], tree.ToString());
            }
            Tree tree_1 = treeReader.ReadTree();

            NUnit.Framework.Assert.IsFalse(tree_1 != null);
        }
예제 #5
0
        public virtual ITreeReader NewTreeReader(Reader @in)
        {
            ITreeReader tr = null;

            if (noNormalization)
            {
                tr = new PennTreeReader(@in, new LabeledScoredTreeFactory(), new TreeNormalizer(), new ArabicTreebankTokenizer(@in));
            }
            else
            {
                tr = new PennTreeReader(@in, new LabeledScoredTreeFactory(), new ArabicTreeNormalizer(retainNPTmp, retainPRD, changeNoLabels, retainNPSbj, retainPPClr), new ArabicTreebankTokenizer(@in));
            }
            if (filterX)
            {
                tr = new FilteringTreeReader(tr, new ArabicTreeReaderFactory.XFilter());
            }
            return(tr);
        }
        /// <exception cref="System.IO.IOException"/>
        public override Pair <Annotation, InputStream> Read(InputStream @is)
        {
            if (compress && !(@is is GZIPInputStream))
            {
                @is = new GZIPInputStream(@is);
            }
            BufferedReader reader = new BufferedReader(new InputStreamReader(@is));
            Annotation     doc    = new Annotation(string.Empty);
            string         line;
            // read the coref graph (new format)
            IDictionary <int, CorefChain> chains = LoadCorefChains(reader);

            if (chains != null)
            {
                doc.Set(typeof(CorefCoreAnnotations.CorefChainAnnotation), chains);
            }
            // read the coref graph (old format)
            line = reader.ReadLine().Trim();
            if (line.Length > 0)
            {
                string[] bits = line.Split(" ");
                if (bits.Length % 4 != 0)
                {
                    throw new RuntimeIOException("ERROR: Incorrect format for the serialized coref graph: " + line);
                }
                IList <Pair <IntTuple, IntTuple> > corefGraph = new List <Pair <IntTuple, IntTuple> >();
                for (int i = 0; i < bits.Length; i += 4)
                {
                    IntTuple src = new IntTuple(2);
                    IntTuple dst = new IntTuple(2);
                    src.Set(0, System.Convert.ToInt32(bits[i]));
                    src.Set(1, System.Convert.ToInt32(bits[i + 1]));
                    dst.Set(0, System.Convert.ToInt32(bits[i + 2]));
                    dst.Set(1, System.Convert.ToInt32(bits[i + 3]));
                    corefGraph.Add(new Pair <IntTuple, IntTuple>(src, dst));
                }
                doc.Set(typeof(CorefCoreAnnotations.CorefGraphAnnotation), corefGraph);
            }
            // read individual sentences
            IList <ICoreMap> sentences = new List <ICoreMap>();

            while ((line = reader.ReadLine()) != null)
            {
                ICoreMap sentence = new Annotation(string.Empty);
                // first line is the parse tree. construct it with CoreLabels in Tree nodes
                Tree tree = new PennTreeReader(new StringReader(line), new LabeledScoredTreeFactory(CoreLabel.Factory())).ReadTree();
                sentence.Set(typeof(TreeCoreAnnotations.TreeAnnotation), tree);
                // read the dependency graphs
                AnnotationSerializer.IntermediateSemanticGraph intermCollapsedDeps   = LoadDependencyGraph(reader);
                AnnotationSerializer.IntermediateSemanticGraph intermUncollapsedDeps = LoadDependencyGraph(reader);
                AnnotationSerializer.IntermediateSemanticGraph intermCcDeps          = LoadDependencyGraph(reader);
                // the remaining lines until empty line are tokens
                IList <CoreLabel> tokens = new List <CoreLabel>();
                while ((line = reader.ReadLine()) != null)
                {
                    if (line.Length == 0)
                    {
                        break;
                    }
                    CoreLabel token = LoadToken(line, haveExplicitAntecedent);
                    tokens.Add(token);
                }
                sentence.Set(typeof(CoreAnnotations.TokensAnnotation), tokens);
                // convert the intermediate graph to an actual SemanticGraph
                SemanticGraph collapsedDeps = intermCollapsedDeps.ConvertIntermediateGraph(tokens);
                sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation), collapsedDeps);
                SemanticGraph uncollapsedDeps = intermUncollapsedDeps.ConvertIntermediateGraph(tokens);
                sentence.Set(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), uncollapsedDeps);
                SemanticGraph ccDeps = intermCcDeps.ConvertIntermediateGraph(tokens);
                sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation), ccDeps);
                sentences.Add(sentence);
            }
            doc.Set(typeof(CoreAnnotations.SentencesAnnotation), sentences);
            return(Pair.MakePair(doc, @is));
        }