Exemplo n.º 1
0
        internal void process()
        {
            ObjectStream <string> untokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

            ObjectStream <string> tokenizedLineStream = new WhitespaceTokenStream(new TokenizerStream(tokenizer, untokenizedLineStream));

            PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");

            perfMon.start();

            try
            {
                string tokenizedLine;
                while ((tokenizedLine = tokenizedLineStream.read()) != null)
                {
                    Console.WriteLine(tokenizedLine);
                    perfMon.incrementCounter();
                }
            }
            catch (IOException e)
            {
                CmdLineUtil.handleStdinIoError(e);
            }

            perfMon.stopAndPrintFinalResult();
        }
        public IObjectStream <DocumentSample> GetSentimentModelStream()
        {
            Dispose();
            string path = Path.Combine(AppConfig.GetAppBasePath(), AppConfig.GetConfigValue("SENTIMENT_MODEL_FILE"));

            fileStream      = new FileStream(path, FileMode.Open, FileAccess.Read);
            plainTextStream = new PlainTextByLineStream(fileStream);
            return(new DocumentSampleStream(plainTextStream));
        }
Exemplo n.º 3
0
        public void TestReadingEvents()
        {
            var sample = new StringBuilder();

            // First sample sentence
            sample.Append("word11 tag11 pred11");
            sample.Append('\n');
            sample.Append("word12 tag12 pred12");
            sample.Append('\n');
            sample.Append("word13 tag13 pred13");
            sample.Append('\n');

            // Start next sample sentence
            sample.Append('\n');

            // Second sample sentence
            sample.Append("word21 tag21 pred21");
            sample.Append('\n');
            sample.Append("word22 tag22 pred22");
            sample.Append('\n');
            sample.Append("word23 tag23 pred23");
            sample.Append('\n');

            var stringStream = new PlainTextByLineStream(new MemoryStream(Encoding.UTF8.GetBytes(sample.ToString())));

            var chunkStream = new ChunkSampleStream(stringStream);

            // read first sample
            var firstSample = chunkStream.Read();

            Assert.AreEqual("word11", firstSample.Sentence[0]);
            Assert.AreEqual("tag11", firstSample.Tags[0]);
            Assert.AreEqual("pred11", firstSample.Preds[0]);
            Assert.AreEqual("word12", firstSample.Sentence[1]);
            Assert.AreEqual("tag12", firstSample.Tags[1]);
            Assert.AreEqual("pred12", firstSample.Preds[1]);
            Assert.AreEqual("word13", firstSample.Sentence[2]);
            Assert.AreEqual("tag13", firstSample.Tags[2]);
            Assert.AreEqual("pred13", firstSample.Preds[2]);


            // read second sample
            ChunkSample secondSample = chunkStream.Read();

            Assert.AreEqual("word21", secondSample.Sentence[0]);
            Assert.AreEqual("tag21", secondSample.Tags[0]);
            Assert.AreEqual("pred21", secondSample.Preds[0]);
            Assert.AreEqual("word22", secondSample.Sentence[1]);
            Assert.AreEqual("tag22", secondSample.Tags[1]);
            Assert.AreEqual("pred22", secondSample.Preds[1]);
            Assert.AreEqual("word23", secondSample.Sentence[2]);
            Assert.AreEqual("tag23", secondSample.Tags[2]);
            Assert.AreEqual("pred23", secondSample.Preds[2]);

            Assert.Null(chunkStream.Read());
        }
Exemplo n.º 4
0
        public override ObjectStream <ChunkSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            CmdLineUtil.checkInputFile("Data", @params.Data);
            FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data);

            ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding);

            return(new ChunkSampleStream(lineStream));
        }
Exemplo n.º 5
0
        public void TestReadingEvents() {
            var sample = new StringBuilder();

            // First sample sentence
            sample.Append("word11 tag11 pred11");
            sample.Append('\n');
            sample.Append("word12 tag12 pred12");
            sample.Append('\n');
            sample.Append("word13 tag13 pred13");
            sample.Append('\n');

            // Start next sample sentence
            sample.Append('\n');

            // Second sample sentence
            sample.Append("word21 tag21 pred21");
            sample.Append('\n');
            sample.Append("word22 tag22 pred22");
            sample.Append('\n');
            sample.Append("word23 tag23 pred23");
            sample.Append('\n');

            var stringStream = new PlainTextByLineStream(new MemoryStream(Encoding.UTF8.GetBytes(sample.ToString())));

            var chunkStream = new ChunkSampleStream(stringStream);

            // read first sample
            var firstSample = chunkStream.Read();
            Assert.AreEqual("word11", firstSample.Sentence[0]);
            Assert.AreEqual("tag11", firstSample.Tags[0]);
            Assert.AreEqual("pred11", firstSample.Preds[0]);
            Assert.AreEqual("word12", firstSample.Sentence[1]);
            Assert.AreEqual("tag12", firstSample.Tags[1]);
            Assert.AreEqual("pred12", firstSample.Preds[1]);
            Assert.AreEqual("word13", firstSample.Sentence[2]);
            Assert.AreEqual("tag13", firstSample.Tags[2]);
            Assert.AreEqual("pred13", firstSample.Preds[2]);


            // read second sample
            ChunkSample secondSample = chunkStream.Read();
            Assert.AreEqual("word21", secondSample.Sentence[0]);
            Assert.AreEqual("tag21", secondSample.Tags[0]);
            Assert.AreEqual("pred21", secondSample.Preds[0]);
            Assert.AreEqual("word22", secondSample.Sentence[1]);
            Assert.AreEqual("tag22", secondSample.Tags[1]);
            Assert.AreEqual("pred22", secondSample.Preds[1]);
            Assert.AreEqual("word23", secondSample.Sentence[2]);
            Assert.AreEqual("tag23", secondSample.Tags[2]);
            Assert.AreEqual("pred23", secondSample.Preds[2]);

            Assert.Null(chunkStream.Read());
        }
        public override ObjectStream <NameSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            language = @params.Lang;

            FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data);

            ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding);

            return(new ADNameSampleStream(lineStream, @params.SplitHyphenatedTokens.Value));
        }
        public override ObjectStream <POSSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            language = @params.Lang;

            FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data);

            ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding);

            ADPOSSampleStream sentenceStream = new ADPOSSampleStream(lineStream, @params.ExpandME.Value, @params.IncludeFeatures.Value);

            return(sentenceStream);
        }
Exemplo n.º 8
0
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                ChunkerModel model = (new ChunkerModelLoader()).load(new File(args[0]));

                ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE);

                ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string line;
                    while ((line = lineStream.read()) != null)
                    {
                        POSSample posSample;
                        try
                        {
                            posSample = POSSample.parse(line);
                        }
                        catch (InvalidFormatException)
                        {
                            Console.Error.WriteLine("Invalid format:");
                            Console.Error.WriteLine(line);
                            continue;
                        }

                        string[] chunks = chunker.chunk(posSample.Sentence, posSample.Tags);

                        Console.WriteLine((new ChunkSample(posSample.Sentence, posSample.Tags, chunks)).nicePrint());

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
        public void TestLineSegmentation() {
            var testString = new StringBuilder();
            testString.Append("line1");
            testString.Append('\n');
            testString.Append("line2");
            testString.Append('\n');
            testString.Append("line3");
            testString.Append("\r\n");
            testString.Append("line4");
            testString.Append('\n');

            var stream = new PlainTextByLineStream(new MemoryStream(Encoding.UTF8.GetBytes(testString.ToString())));

            Assert.AreEqual("line1", stream.Read());
            Assert.AreEqual("line2", stream.Read());
            Assert.AreEqual("line3", stream.Read());
            Assert.AreEqual("line4", stream.Read());
        }
Exemplo n.º 10
0
        public void TestClearAdaptiveData()
        {
            var trainingData = new StringBuilder();

            trainingData.Append("a\n");
            trainingData.Append("b\n");
            trainingData.Append("c\n");
            trainingData.Append("\n");
            trainingData.Append("d\n");

            var untokenizedLineStream = new PlainTextByLineStream(new StringReader(trainingData.ToString()));
            var trainingStream        = new NameSampleStream(untokenizedLineStream);

            Assert.False(trainingStream.Read().ClearAdaptiveData);
            Assert.False(trainingStream.Read().ClearAdaptiveData);
            Assert.False(trainingStream.Read().ClearAdaptiveData);
            Assert.True(trainingStream.Read().ClearAdaptiveData);
            Assert.Null(trainingStream.Read());
        }
Exemplo n.º 11
0
        public override ObjectStream <POSSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            ObjectStream <string> lineStream;

            try
            {
                lineStream = new PlainTextByLineStream(new InputStreamReader(CmdLineUtil.openInFile(@params.Data), "UTF-8"));
                //Console.Out = new PrintStream(System.out, true, "UTF-8");

                return(new ConllXPOSSampleStream(lineStream));
            }
            catch (UnsupportedEncodingException e)
            {
                // this shouldn't happen
                throw new TerminateToolException(-1, "UTF-8 encoding is not supported: " + e.Message, e);
            }
        }
Exemplo n.º 12
0
        public void TestLineSegmentation()
        {
            var testString = new StringBuilder();

            testString.Append("line1");
            testString.Append('\n');
            testString.Append("line2");
            testString.Append('\n');
            testString.Append("line3");
            testString.Append("\r\n");
            testString.Append("line4");
            testString.Append('\n');

            var stream = new PlainTextByLineStream(new MemoryStream(Encoding.UTF8.GetBytes(testString.ToString())));

            Assert.AreEqual("line1", stream.Read());
            Assert.AreEqual("line2", stream.Read());
            Assert.AreEqual("line3", stream.Read());
            Assert.AreEqual("line4", stream.Read());
        }
Exemplo n.º 13
0
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                POSModel model = (new POSModelLoader()).load(new File(args[0]));

                POSTaggerME tagger = new POSTaggerME(model);

                ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string line;
                    while ((line = lineStream.read()) != null)
                    {
                        string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);
                        string[] tags = tagger.tag(whitespaceTokenizerLine);

                        POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
                        Console.WriteLine(sample.ToString());

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
        public override ObjectStream <ChunkSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            language = @params.Lang;

            FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data);

            ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding);

            ADChunkSampleStream sampleStream = new ADChunkSampleStream(lineStream);

            if (@params.Start != null && @params.Start > -1)
            {
                sampleStream.Start = @params.Start.Value;
            }

            if (@params.End != null && @params.End > -1)
            {
                sampleStream.End = @params.End.Value;
            }

            return(sampleStream);
        }
Exemplo n.º 15
0
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                Detokenizer detokenizer = new DictionaryDetokenizer((new DetokenizationDictionaryLoader()).load(new File(args[0])));

                ObjectStream <string> tokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string tokenizedLine;
                    while ((tokenizedLine = tokenizedLineStream.read()) != null)
                    {
                        // white space tokenize line
                        string[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(tokenizedLine);

                        Console.WriteLine(detokenizer.detokenize(tokens, null));

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
Exemplo n.º 16
0
        public override void run(string[] args)
        {
            if (args.Length < 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                ParserModel model = (new ParserModelLoader()).load(new File(args[args.Length - 1]));

                int?beamSize = CmdLineUtil.getIntParameter("-bs", args);
                if (beamSize == null)
                {
                    beamSize = AbstractBottomUpParser.defaultBeamSize;
                }

                int? numParses = CmdLineUtil.getIntParameter("-k", args);
                bool showTopK;
                if (numParses == null)
                {
                    numParses = 1;
                    showTopK  = false;
                }
                else
                {
                    showTopK = true;
                }

                double?advancePercentage = CmdLineUtil.getDoubleParameter("-ap", args);

                if (advancePercentage == null)
                {
                    advancePercentage = AbstractBottomUpParser.defaultAdvancePercentage;
                }

                opennlp.tools.parser.Parser parser = ParserFactory.create(model, beamSize.Value, advancePercentage.Value);

                ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string line;
                    while ((line = lineStream.read()) != null)
                    {
                        if (line.Length == 0)
                        {
                            Console.WriteLine();
                        }
                        else
                        {
                            Parse[] parses = parseLine(line, parser, numParses.Value);

                            for (int pi = 0, pn = parses.Length; pi < pn; pi++)
                            {
                                if (showTopK)
                                {
                                    Console.Write(pi + " " + parses[pi].Prob + " ");
                                }

                                parses[pi].show();

                                perfMon.incrementCounter();
                            }
                        }
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
        public void TestClearAdaptiveData() {
            var trainingData = new StringBuilder();
            trainingData.Append("a\n");
            trainingData.Append("b\n");
            trainingData.Append("c\n");
            trainingData.Append("\n");
            trainingData.Append("d\n");

            var untokenizedLineStream = new PlainTextByLineStream(new StringReader(trainingData.ToString()));
            var trainingStream = new NameSampleStream(untokenizedLineStream);

            Assert.False(trainingStream.Read().ClearAdaptiveData);
            Assert.False(trainingStream.Read().ClearAdaptiveData);
            Assert.False(trainingStream.Read().ClearAdaptiveData);
            Assert.True(trainingStream.Read().ClearAdaptiveData);
            Assert.Null(trainingStream.Read());
        }
Exemplo n.º 18
0
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                TreebankLinker treebankLinker;
                try
                {
                    treebankLinker = new TreebankLinker(args[0], LinkerMode.TEST);
                }
                catch (IOException e)
                {
                    throw new TerminateToolException(-1, "Failed to load all coreferencer models!", e);
                }

                ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "parses");
                perfMon.start();

                try
                {
                    int             sentenceNumber = 0;
                    IList <Mention> document       = new List <Mention>();
                    IList <Parse>   parses         = new List <Parse>();

                    string line;
                    while ((line = lineStream.read()) != null)
                    {
                        if (line.Equals(""))
                        {
                            DiscourseEntity[] entities = treebankLinker.getEntities(document.ToArray());
                            //showEntities(entities);
                            (new CorefParse(this, parses, entities)).show();
                            sentenceNumber = 0;
                            document.Clear();
                            parses.Clear();
                        }
                        else
                        {
                            Parse p = Parse.parseParse(line);
                            parses.Add(p);
                            Mention[] extents = treebankLinker.MentionFinder.getMentions(new DefaultParse(p, sentenceNumber));
                            //construct new parses for mentions which don't have constituents.
                            for (int ei = 0, en = extents.Length; ei < en; ei++)
                            {
                                //System.err.println("PennTreebankLiner.main: "+ei+" "+extents[ei]);

                                if (extents[ei].Parse == null)
                                {
                                    //not sure how to get head index, but its not used at this point.
                                    Parse snp = new Parse(p.Text, extents[ei].Span, "NML", 1.0, 0);
                                    p.insert(snp);
                                    extents[ei].Parse = new DefaultParse(snp, sentenceNumber);
                                }
                            }
                            document.AddRange(Arrays.asList(extents));
                            sentenceNumber++;
                        }

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
Exemplo n.º 19
0
        public override void run(string[] args)
        {
            if (args.Length == 0)
            {
                Console.WriteLine(Help);
            }
            else
            {
                NameFinderME[] nameFinders = new NameFinderME[args.Length];

                for (int i = 0; i < nameFinders.Length; i++)
                {
                    TokenNameFinderModel model = (new TokenNameFinderModelLoader()).load(new File(args[i]));
                    nameFinders[i] = new NameFinderME(model);
                }

                ObjectStream <string> untokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string line;
                    while ((line = untokenizedLineStream.read()) != null)
                    {
                        string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);

                        // A new line indicates a new document,
                        // adaptive data must be cleared for a new document

                        if (whitespaceTokenizerLine.Length == 0)
                        {
                            foreach (NameFinderME nameFinder in nameFinders)
                            {
                                nameFinder.clearAdaptiveData();
                            }
                        }

                        IList <Span> names = new List <Span>();

                        foreach (TokenNameFinder nameFinder in nameFinders)
                        {
                            Collections.addAll(names, nameFinder.find(whitespaceTokenizerLine));
                        }

                        // Simple way to drop intersecting spans, otherwise the
                        // NameSample is invalid
                        Span[] reducedNames = NameFinderME.dropOverlappingSpans(names.ToArray());

                        NameSample nameSample = new NameSample(whitespaceTokenizerLine, reducedNames, false);

                        Console.WriteLine(nameSample.ToString());

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }