Exemple #1
0
        public override RawCorefSample read()
        {
            RawCorefSample sample = samples.read();

            if (sample != null)
            {
                IList <Parse> enhancedParses = new List <Parse>();

                IList <string[]> sentences = sample.Texts;

                foreach (var sentence in sentences)
                {
                    Parse p = FullParseCorefEnhancerStream.createIncompleteParse(sentence);
                    p.Type = AbstractBottomUpParser.TOP_NODE;

                    Parse[] parseTokens = p.Children;

                    // construct incomplete parse here ..
                    string[] tags = posTagger.tag(sentence);

                    for (int i = 0; i < parseTokens.Length; i++)
                    {
                        p.insert(new Parse(p.Text, parseTokens[i].Span, tags[i], 1d, parseTokens[i].HeadIndex));
                    }

                    // insert tags into incomplete parse
                    Span[] chunks = chunker.chunkAsSpans(sentence, tags);

                    foreach (Span chunk in chunks)
                    {
                        if ("NP".Equals(chunk.Type))
                        {
                            p.insert(new Parse(p.Text, new Span(0, 0), chunk.Type, 1d, p.HeadIndex));
                        }
                    }

                    enhancedParses.Add(p);
                }

                sample.Parses = enhancedParses;

                return(sample);
            }
            else
            {
                return(null);
            }
        }
Exemple #2
0
        public override ObjectStream <CorefSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            ParserModel parserModel = (new ParserModelLoader()).load(@params.ParserModel);
            Parser      parser      = ParserFactory.create(parserModel);

            TokenizerModel tokenizerModel = (new TokenizerModelLoader()).load(@params.TokenizerModel);
            Tokenizer      tokenizer      = new TokenizerME(tokenizerModel);

            ObjectStream <string> mucDocStream = new FileToStringSampleStream(new DirectorySampleStream(@params.Data, new FileFilterAnonymousInnerClassHelper(this), false), Charset.forName("UTF-8"));

            ObjectStream <RawCorefSample> rawSamples = new MucCorefSampleStream(tokenizer, mucDocStream);

            ObjectStream <RawCorefSample> parsedSamples = new FullParseCorefEnhancerStream(parser, rawSamples);


            // How to load all these nameFinder models ?!
            // Lets make a param per model, not that nice, but ok!

            IDictionary <string, Jfile> modelFileTagMap = new Dictionary <string, Jfile>();

            modelFileTagMap["person"]       = @params.PersonModel;
            modelFileTagMap["organization"] = @params.OrganizationModel;

            IList <TokenNameFinder> nameFinders = new List <TokenNameFinder>();
            IList <string>          tags        = new List <string>();

            foreach (KeyValuePair <string, Jfile> entry in modelFileTagMap)
            {
                nameFinders.Add(new NameFinderME((new TokenNameFinderModelLoader()).load(entry.Value)));
                tags.Add(entry.Key);
            }

            return(new MucMentionInserterStream(new NameFinderCorefEnhancerStream(nameFinders.ToArray(), tags.ToArray(), parsedSamples)));
        }