public override RawCorefSample read() { RawCorefSample sample = samples.read(); if (sample != null) { IList <Parse> enhancedParses = new List <Parse>(); IList <string[]> sentences = sample.Texts; foreach (var sentence in sentences) { Parse p = FullParseCorefEnhancerStream.createIncompleteParse(sentence); p.Type = AbstractBottomUpParser.TOP_NODE; Parse[] parseTokens = p.Children; // construct incomplete parse here .. string[] tags = posTagger.tag(sentence); for (int i = 0; i < parseTokens.Length; i++) { p.insert(new Parse(p.Text, parseTokens[i].Span, tags[i], 1d, parseTokens[i].HeadIndex)); } // insert tags into incomplete parse Span[] chunks = chunker.chunkAsSpans(sentence, tags); foreach (Span chunk in chunks) { if ("NP".Equals(chunk.Type)) { p.insert(new Parse(p.Text, new Span(0, 0), chunk.Type, 1d, p.HeadIndex)); } } enhancedParses.Add(p); } sample.Parses = enhancedParses; return(sample); } else { return(null); } }
public override ObjectStream <CorefSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); ParserModel parserModel = (new ParserModelLoader()).load(@params.ParserModel); Parser parser = ParserFactory.create(parserModel); TokenizerModel tokenizerModel = (new TokenizerModelLoader()).load(@params.TokenizerModel); Tokenizer tokenizer = new TokenizerME(tokenizerModel); ObjectStream <string> mucDocStream = new FileToStringSampleStream(new DirectorySampleStream(@params.Data, new FileFilterAnonymousInnerClassHelper(this), false), Charset.forName("UTF-8")); ObjectStream <RawCorefSample> rawSamples = new MucCorefSampleStream(tokenizer, mucDocStream); ObjectStream <RawCorefSample> parsedSamples = new FullParseCorefEnhancerStream(parser, rawSamples); // How to load all these nameFinder models ?! // Lets make a param per model, not that nice, but ok! IDictionary <string, Jfile> modelFileTagMap = new Dictionary <string, Jfile>(); modelFileTagMap["person"] = @params.PersonModel; modelFileTagMap["organization"] = @params.OrganizationModel; IList <TokenNameFinder> nameFinders = new List <TokenNameFinder>(); IList <string> tags = new List <string>(); foreach (KeyValuePair <string, Jfile> entry in modelFileTagMap) { nameFinders.Add(new NameFinderME((new TokenNameFinderModelLoader()).load(entry.Value))); tags.Add(entry.Key); } return(new MucMentionInserterStream(new NameFinderCorefEnhancerStream(nameFinders.ToArray(), tags.ToArray(), parsedSamples))); }