internal void process() { ObjectStream <string> untokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); ObjectStream <string> tokenizedLineStream = new WhitespaceTokenStream(new TokenizerStream(tokenizer, untokenizedLineStream)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string tokenizedLine; while ((tokenizedLine = tokenizedLineStream.read()) != null) { Console.WriteLine(tokenizedLine); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); }
public IObjectStream <DocumentSample> GetSentimentModelStream() { Dispose(); string path = Path.Combine(AppConfig.GetAppBasePath(), AppConfig.GetConfigValue("SENTIMENT_MODEL_FILE")); fileStream = new FileStream(path, FileMode.Open, FileAccess.Read); plainTextStream = new PlainTextByLineStream(fileStream); return(new DocumentSampleStream(plainTextStream)); }
public void TestReadingEvents() { var sample = new StringBuilder(); // First sample sentence sample.Append("word11 tag11 pred11"); sample.Append('\n'); sample.Append("word12 tag12 pred12"); sample.Append('\n'); sample.Append("word13 tag13 pred13"); sample.Append('\n'); // Start next sample sentence sample.Append('\n'); // Second sample sentence sample.Append("word21 tag21 pred21"); sample.Append('\n'); sample.Append("word22 tag22 pred22"); sample.Append('\n'); sample.Append("word23 tag23 pred23"); sample.Append('\n'); var stringStream = new PlainTextByLineStream(new MemoryStream(Encoding.UTF8.GetBytes(sample.ToString()))); var chunkStream = new ChunkSampleStream(stringStream); // read first sample var firstSample = chunkStream.Read(); Assert.AreEqual("word11", firstSample.Sentence[0]); Assert.AreEqual("tag11", firstSample.Tags[0]); Assert.AreEqual("pred11", firstSample.Preds[0]); Assert.AreEqual("word12", firstSample.Sentence[1]); Assert.AreEqual("tag12", firstSample.Tags[1]); Assert.AreEqual("pred12", firstSample.Preds[1]); Assert.AreEqual("word13", firstSample.Sentence[2]); Assert.AreEqual("tag13", firstSample.Tags[2]); Assert.AreEqual("pred13", firstSample.Preds[2]); // read second sample ChunkSample secondSample = chunkStream.Read(); Assert.AreEqual("word21", secondSample.Sentence[0]); Assert.AreEqual("tag21", secondSample.Tags[0]); Assert.AreEqual("pred21", secondSample.Preds[0]); Assert.AreEqual("word22", secondSample.Sentence[1]); Assert.AreEqual("tag22", secondSample.Tags[1]); Assert.AreEqual("pred22", secondSample.Preds[1]); Assert.AreEqual("word23", secondSample.Sentence[2]); Assert.AreEqual("tag23", secondSample.Tags[2]); Assert.AreEqual("pred23", secondSample.Preds[2]); Assert.Null(chunkStream.Read()); }
public override ObjectStream <ChunkSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); CmdLineUtil.checkInputFile("Data", @params.Data); FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data); ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding); return(new ChunkSampleStream(lineStream)); }
public override ObjectStream <NameSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); language = @params.Lang; FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data); ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding); return(new ADNameSampleStream(lineStream, @params.SplitHyphenatedTokens.Value)); }
public override ObjectStream <POSSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); language = @params.Lang; FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data); ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding); ADPOSSampleStream sentenceStream = new ADPOSSampleStream(lineStream, @params.ExpandME.Value, @params.IncludeFeatures.Value); return(sentenceStream); }
public override void run(string[] args) { if (args.Length != 1) { Console.WriteLine(Help); } else { ChunkerModel model = (new ChunkerModelLoader()).load(new File(args[0])); ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE); ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string line; while ((line = lineStream.read()) != null) { POSSample posSample; try { posSample = POSSample.parse(line); } catch (InvalidFormatException) { Console.Error.WriteLine("Invalid format:"); Console.Error.WriteLine(line); continue; } string[] chunks = chunker.chunk(posSample.Sentence, posSample.Tags); Console.WriteLine((new ChunkSample(posSample.Sentence, posSample.Tags, chunks)).nicePrint()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public void TestLineSegmentation() { var testString = new StringBuilder(); testString.Append("line1"); testString.Append('\n'); testString.Append("line2"); testString.Append('\n'); testString.Append("line3"); testString.Append("\r\n"); testString.Append("line4"); testString.Append('\n'); var stream = new PlainTextByLineStream(new MemoryStream(Encoding.UTF8.GetBytes(testString.ToString()))); Assert.AreEqual("line1", stream.Read()); Assert.AreEqual("line2", stream.Read()); Assert.AreEqual("line3", stream.Read()); Assert.AreEqual("line4", stream.Read()); }
public void TestClearAdaptiveData() { var trainingData = new StringBuilder(); trainingData.Append("a\n"); trainingData.Append("b\n"); trainingData.Append("c\n"); trainingData.Append("\n"); trainingData.Append("d\n"); var untokenizedLineStream = new PlainTextByLineStream(new StringReader(trainingData.ToString())); var trainingStream = new NameSampleStream(untokenizedLineStream); Assert.False(trainingStream.Read().ClearAdaptiveData); Assert.False(trainingStream.Read().ClearAdaptiveData); Assert.False(trainingStream.Read().ClearAdaptiveData); Assert.True(trainingStream.Read().ClearAdaptiveData); Assert.Null(trainingStream.Read()); }
public override ObjectStream <POSSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); ObjectStream <string> lineStream; try { lineStream = new PlainTextByLineStream(new InputStreamReader(CmdLineUtil.openInFile(@params.Data), "UTF-8")); //Console.Out = new PrintStream(System.out, true, "UTF-8"); return(new ConllXPOSSampleStream(lineStream)); } catch (UnsupportedEncodingException e) { // this shouldn't happen throw new TerminateToolException(-1, "UTF-8 encoding is not supported: " + e.Message, e); } }
public override void run(string[] args) { if (args.Length != 1) { Console.WriteLine(Help); } else { POSModel model = (new POSModelLoader()).load(new File(args[0])); POSTaggerME tagger = new POSTaggerME(model); ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string line; while ((line = lineStream.read()) != null) { string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line); string[] tags = tagger.tag(whitespaceTokenizerLine); POSSample sample = new POSSample(whitespaceTokenizerLine, tags); Console.WriteLine(sample.ToString()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public override ObjectStream <ChunkSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); language = @params.Lang; FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data); ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding); ADChunkSampleStream sampleStream = new ADChunkSampleStream(lineStream); if (@params.Start != null && @params.Start > -1) { sampleStream.Start = @params.Start.Value; } if (@params.End != null && @params.End > -1) { sampleStream.End = @params.End.Value; } return(sampleStream); }
public override void run(string[] args) { if (args.Length != 1) { Console.WriteLine(Help); } else { Detokenizer detokenizer = new DictionaryDetokenizer((new DetokenizationDictionaryLoader()).load(new File(args[0]))); ObjectStream <string> tokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string tokenizedLine; while ((tokenizedLine = tokenizedLineStream.read()) != null) { // white space tokenize line string[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(tokenizedLine); Console.WriteLine(detokenizer.detokenize(tokens, null)); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public override void run(string[] args) { if (args.Length < 1) { Console.WriteLine(Help); } else { ParserModel model = (new ParserModelLoader()).load(new File(args[args.Length - 1])); int?beamSize = CmdLineUtil.getIntParameter("-bs", args); if (beamSize == null) { beamSize = AbstractBottomUpParser.defaultBeamSize; } int? numParses = CmdLineUtil.getIntParameter("-k", args); bool showTopK; if (numParses == null) { numParses = 1; showTopK = false; } else { showTopK = true; } double?advancePercentage = CmdLineUtil.getDoubleParameter("-ap", args); if (advancePercentage == null) { advancePercentage = AbstractBottomUpParser.defaultAdvancePercentage; } opennlp.tools.parser.Parser parser = ParserFactory.create(model, beamSize.Value, advancePercentage.Value); ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string line; while ((line = lineStream.read()) != null) { if (line.Length == 0) { Console.WriteLine(); } else { Parse[] parses = parseLine(line, parser, numParses.Value); for (int pi = 0, pn = parses.Length; pi < pn; pi++) { if (showTopK) { Console.Write(pi + " " + parses[pi].Prob + " "); } parses[pi].show(); perfMon.incrementCounter(); } } } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public override void run(string[] args) { if (args.Length != 1) { Console.WriteLine(Help); } else { TreebankLinker treebankLinker; try { treebankLinker = new TreebankLinker(args[0], LinkerMode.TEST); } catch (IOException e) { throw new TerminateToolException(-1, "Failed to load all coreferencer models!", e); } ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "parses"); perfMon.start(); try { int sentenceNumber = 0; IList <Mention> document = new List <Mention>(); IList <Parse> parses = new List <Parse>(); string line; while ((line = lineStream.read()) != null) { if (line.Equals("")) { DiscourseEntity[] entities = treebankLinker.getEntities(document.ToArray()); //showEntities(entities); (new CorefParse(this, parses, entities)).show(); sentenceNumber = 0; document.Clear(); parses.Clear(); } else { Parse p = Parse.parseParse(line); parses.Add(p); Mention[] extents = treebankLinker.MentionFinder.getMentions(new DefaultParse(p, sentenceNumber)); //construct new parses for mentions which don't have constituents. for (int ei = 0, en = extents.Length; ei < en; ei++) { //System.err.println("PennTreebankLiner.main: "+ei+" "+extents[ei]); if (extents[ei].Parse == null) { //not sure how to get head index, but its not used at this point. Parse snp = new Parse(p.Text, extents[ei].Span, "NML", 1.0, 0); p.insert(snp); extents[ei].Parse = new DefaultParse(snp, sentenceNumber); } } document.AddRange(Arrays.asList(extents)); sentenceNumber++; } perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public override void run(string[] args) { if (args.Length == 0) { Console.WriteLine(Help); } else { NameFinderME[] nameFinders = new NameFinderME[args.Length]; for (int i = 0; i < nameFinders.Length; i++) { TokenNameFinderModel model = (new TokenNameFinderModelLoader()).load(new File(args[i])); nameFinders[i] = new NameFinderME(model); } ObjectStream <string> untokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string line; while ((line = untokenizedLineStream.read()) != null) { string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line); // A new line indicates a new document, // adaptive data must be cleared for a new document if (whitespaceTokenizerLine.Length == 0) { foreach (NameFinderME nameFinder in nameFinders) { nameFinder.clearAdaptiveData(); } } IList <Span> names = new List <Span>(); foreach (TokenNameFinder nameFinder in nameFinders) { Collections.addAll(names, nameFinder.find(whitespaceTokenizerLine)); } // Simple way to drop intersecting spans, otherwise the // NameSample is invalid Span[] reducedNames = NameFinderME.dropOverlappingSpans(names.ToArray()); NameSample nameSample = new NameSample(whitespaceTokenizerLine, reducedNames, false); Console.WriteLine(nameSample.ToString()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }