Пример #1
0
        public void TestOnlyWithNamesWithTypes()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary <string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = WhitespaceTokenizer.Instance.Tokenize(
                    "Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman Robert Aderholt " +
                    "Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander");

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(new Span(0, 2, "person"), names[0]);
                Assert.AreEqual(new Span(2, 4, "person"), names[1]);
                Assert.AreEqual(new Span(4, 6, "person"), names[2]);
                Assert.True(!HasOtherAsOutcome(model));
            }
        }
Пример #2
0
        public void TestOnlyWithEntitiesWithTypes()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary <string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = WhitespaceTokenizer.Instance.Tokenize("NATO United States Barack Obama");

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(new Span(0, 1, "organization"), names[0]);
                Assert.AreEqual(new Span(1, 3, "location"), names[1]);
                Assert.AreEqual(new Span(3, 5, "person"), names[2]);
                Assert.False(HasOtherAsOutcome(model));
            }
        }
Пример #3
0
        public void TestNameFinderWithMultipleTypes()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/voa1.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary <string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = new [] { "U", ".", "S", ".", "President", "Barack", "Obama", "has",
                                        "arrived", "in", "South", "Korea", ",", "where", "he", "is", "expected", "to",
                                        "show", "solidarity", "with", "the", "country", "'", "s", "president", "in",
                                        "demanding", "North", "Korea", "move", "toward", "ending", "its", "nuclear",
                                        "weapons", "programs", "." };

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(4, names.Length);
                Assert.AreEqual(new Span(0, 4, "location"), names[0]);
                Assert.AreEqual(new Span(5, 7, "person"), names[1]);
                Assert.AreEqual(new Span(10, 12, "location"), names[2]);
                Assert.AreEqual(new Span(28, 30, "location"), names[3]);

                /*
                 * These asserts are not needed because the equality comparer handles the Type
                 * assertEquals("location", names1[0].getType());
                 * assertEquals("person", names1[1].getType());
                 * assertEquals("location", names1[2].getType());
                 * assertEquals("location", names1[3].getType());
                 */

                sentence = new[] {
                    "Scott", "Snyder", "is", "the", "director", "of", "the",
                    "Center", "for", "U", ".", "S", ".", "Korea", "Policy", "."
                };

                names = nameFinder.Find(sentence);

                Assert.AreEqual(2, names.Length);
                Assert.AreEqual(new Span(0, 2, "person"), names[0]);
                Assert.AreEqual(new Span(7, 15, "organization"), names[1]);

                /*
                 *
                 * assertEquals("person", names2[0].getType());
                 * assertEquals("organization", names2[1].getType());
                 *
                 */
            }
        }
Пример #4
0
        public NameFinder(DETECTOR_TYPE finderType = DETECTOR_TYPE.Person)
        {
            string modelPath = "";

            switch (finderType)
            {
            //case DETECTOR_TYPE.Date:
            //    this.nameFinder = new NameFinderME(new TokenNameFinderModel(new FileStream(Environment.CurrentDirectory + DATE_MODEL_PATH, FileMode.Open, FileAccess.Read)));
            //    break;
            //case DETECTOR_TYPE.Location:
            //    this.nameFinder = new NameFinderME(new TokenNameFinderModel(new FileStream(Environment.CurrentDirectory + LOCATION_MODEL_PATH, FileMode.Open, FileAccess.Read)));
            //    break;
            //case DETECTOR_TYPE.Money:
            //    this.nameFinder = new NameFinderME(new TokenNameFinderModel(new FileStream(Environment.CurrentDirectory + MONEY_MODEL_PATH, FileMode.Open, FileAccess.Read)));
            //    break;
            //case DETECTOR_TYPE.Organization:
            //    this.nameFinder = new NameFinderME(new TokenNameFinderModel(new FileStream(Environment.CurrentDirectory + ORGANIZATION_MODEL_PATH, FileMode.Open, FileAccess.Read)));
            //    break;
            //case DETECTOR_TYPE.Percentage:
            //    this.nameFinder = new NameFinderME(new TokenNameFinderModel(new FileStream(Environment.CurrentDirectory + PERCENTAGE_MODEL_PATH, FileMode.Open, FileAccess.Read)));
            //    break;
            //case DETECTOR_TYPE.Person:
            //    this.nameFinder = new NameFinderME(new TokenNameFinderModel(new FileStream(Environment.CurrentDirectory + PERSON_MODEL_PATH, FileMode.Open, FileAccess.Read)));
            //    break;
            //case DETECTOR_TYPE.Time:
            //    this.nameFinder = new NameFinderME(new TokenNameFinderModel(new FileStream(Environment.CurrentDirectory + TIME_MODEL_PATH, FileMode.Open, FileAccess.Read)));
            //    break;
            case DETECTOR_TYPE.Date:
                modelPath = DATE_MODEL_PATH;
                break;

            case DETECTOR_TYPE.Location:
                modelPath = LOCATION_MODEL_PATH;
                break;

            case DETECTOR_TYPE.Money:
                modelPath = MONEY_MODEL_PATH;
                break;

            case DETECTOR_TYPE.Organization:
                modelPath = ORGANIZATION_MODEL_PATH;
                break;

            case DETECTOR_TYPE.Percentage:
                modelPath = PERCENTAGE_MODEL_PATH;
                break;

            case DETECTOR_TYPE.Person:
                modelPath = PERSON_MODEL_PATH;
                break;

            case DETECTOR_TYPE.Time:
                modelPath = TIME_MODEL_PATH;
                break;
            }

            this.nameFinder = new NameFinderME(new TokenNameFinderModel(new FileStream(Environment.CurrentDirectory + modelPath, FileMode.Open, FileAccess.Read)));
        }
Пример #5
0
 public ILexer InitNow()
 {
     Console.WriteLine("Loading...");
     _tokenizer = prepareTokenizer();
     _nameFinder = prepareNameFinder();
     _locationFinder = prepareLocationFinder();
     _timeFinder = prepareTimeFinder();
     return this;
 }
Пример #6
0
        public void TestNameFinder()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1"));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary <string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = new[] {
                    "Alisa",
                    "appreciated",
                    "the",
                    "hint",
                    "and",
                    "enjoyed",
                    "a",
                    "delicious",
                    "traditional",
                    "meal."
                };

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(1, names.Length);
                Assert.AreEqual(new Span(0, 1, Type), names[0]);

                sentence = new[] {
                    "Hi",
                    "Mike",
                    ",",
                    "it's",
                    "Stefanie",
                    "Schmidt",
                    "."
                };

                names = nameFinder.Find(sentence);

                Assert.AreEqual(2, names.Length);
                Assert.AreEqual(new Span(1, 2, Type), names[0]);
                Assert.AreEqual(new Span(4, 6, Type), names[1]);
            }
        }
Пример #7
0
        public void MultithreadingTest()
        {
            const int threadCount = 100;

            // The expensive part of the code is to load the model!
            // but the model file can be shared.

            var fileStream = Tests.OpenFile(fileName);

            modelFile = new TokenNameFinderModel(fileStream);

            var fileContents = File.ReadAllText(Tests.GetFullPath("/opennlp/tools/sentdetect/Sentences.txt"));
            var sentences    = fileContents.Split(new [] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
            var rnd          = new Random();

            var count = 0;

            var delegates = new List <ThreadStart>(threadCount);

            for (var i = 0; i < threadCount; i++)
            {
                delegates.Add(() => {
                    // Use ONE NameFinderME instance per thread !

                    var nameFinder = new NameFinderME(modelFile);
                    var tokens     = WhitespaceTokenizer.Instance.Tokenize(sentences[rnd.Next(0, sentences.Length - 1)]);

                    Thread.Sleep(rnd.Next(100, 300));

                    var names = nameFinder.Find(tokens);

                    count += names.Length;
                });
            }

            var threads = delegates.Select(d => new CrossThreadTestRunner(d)).ToList();

            foreach (var thread in threads)
            {
                thread.Start();
            }

            foreach (var thread in threads)
            {
                thread.Join();
            }

            Assert.That(count, Is.GreaterThan(0));
        }
Пример #8
0
        public override void run(string[] args)
        {
            if (args.Length == 0)
            {
                Console.WriteLine(Help);
            }
            else
            {
                NameFinderME[] nameFinders = new NameFinderME[args.Length];

                for (int i = 0; i < nameFinders.Length; i++)
                {
                    TokenNameFinderModel model = (new TokenNameFinderModelLoader()).load(new File(args[i]));
                    nameFinders[i] = new NameFinderME(model);
                }

                ObjectStream <string> untokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string line;
                    while ((line = untokenizedLineStream.read()) != null)
                    {
                        string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);

                        // A new line indicates a new document,
                        // adaptive data must be cleared for a new document

                        if (whitespaceTokenizerLine.Length == 0)
                        {
                            foreach (NameFinderME nameFinder in nameFinders)
                            {
                                nameFinder.clearAdaptiveData();
                            }
                        }

                        IList <Span> names = new List <Span>();

                        foreach (TokenNameFinder nameFinder in nameFinders)
                        {
                            Collections.addAll(names, nameFinder.find(whitespaceTokenizerLine));
                        }

                        // Simple way to drop intersecting spans, otherwise the
                        // NameSample is invalid
                        Span[] reducedNames = NameFinderME.dropOverlappingSpans(names.ToArray());

                        NameSample nameSample = new NameSample(whitespaceTokenizerLine, reducedNames, false);

                        Console.WriteLine(nameSample.ToString());

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
Пример #9
0
        public Dictionary <string, List <string> > Main(string line)
        {
            //debug sentence
            // line = "Show me the sales of Kean Cola .25ltr Bottle in Nicosia from January 2017 to October 2017 as a line chart.";
            matchedWords?.Clear();
            nounPhrases?.Clear();
            nouns?.Clear();
            adjectivePhrases?.Clear();
            verbPhrases?.Clear();
            InputStream modelIn = new FileInputStream(HttpRuntime.AppDomainAppPath + "\\Models\\en-parser-chunking.bin");

            InputStream modelIn1  = new FileInputStream(HttpRuntime.AppDomainAppPath + "\\Models\\en-ner-date.bin");
            InputStream modelIn2  = new FileInputStream(HttpRuntime.AppDomainAppPath + "\\Models\\en-token.bin");
            ParserModel model     = new ParserModel(modelIn);
            var         myParser  = ParserFactory.create(model);
            var         topParses = ParserTool.parseLine(line, myParser, 1);

            foreach (var p in topParses)
            {
                GetSentenceParts(p);
            }


            try
            {
                TokenizerModel       model1 = new TokenizerModel(modelIn2);
                TokenNameFinderModel model2 = new TokenNameFinderModel(modelIn1);

                Tokenizer tokenizer  = new TokenizerME(model1);
                var       nameFinder = new NameFinderME(model2);

                var tokens    = tokenizer.tokenize(line);
                var nameSpans = nameFinder.find(tokens);

                var array = Span.spansToStrings(nameSpans, tokens);

                //
                //                foreach (var v in array)
                //                {
                //                    System.Diagnostics.Debug.WriteLine(v);
                //                }

                dates = new HashSet <string>(array);



                PrintSets();
//                System.Diagnostics.Debug.WriteLine("\nProcessing Presentation type");
//
//                if (nouns.Contains("table"))
//                {
//                    matchedWords.Add(new Tuple<string, string>("PRESENTATION_TYPE", "table"));
//                }
//                if (nounPhrases.Contains("bar chart"))
//                {
//                    matchedWords.Add(new Tuple<string, string>("PRESENTATION_TYPE", "bar chart"));
//                }
//                if (nounPhrases.Contains("line chart"))
//                {
//                    matchedWords.Add(new Tuple<string, string>("PRESENTATION_TYPE", "line chart"));
//                }
                //TODO IF NO OPTION IS FOUND ASK THE USER TO GIVE YOU ONE. IMPLEMENT IT IN THE WEB VERSION SOON

                System.Diagnostics.Debug.WriteLine("\nProcessing Dates");

                if (dates.Count == 2)
                {
                    if (dates.ElementAt(0).contains("from"))
                    {
                        var           a       = dates.ElementAt(0).replace("from", "");
                        List <string> newList = new List <string>();
                        newList.Add("START_PERIOD");
                        matchedWords.Add(a, newList);
                        newList = new List <string>();
                        newList.Add("END_PERIOD");
                        //todo fix when the date is the same here
                        matchedWords.Add(dates.ElementAt(1), newList);
                    }
                    else
                    {
                        List <string> newList = new List <string>();
                        newList.Add("START_PERIOD");
                        matchedWords.Add(dates.ElementAt(0), newList);
                        newList = new List <string>();
                        newList.Add("END_PERIOD");
                        //todo fix when the date is the same here
                        matchedWords.Add(dates.ElementAt(1), newList);
                    }
                }

                if (dates.Count == 1)
                {
                    if (dates.ElementAt(0).contains("from"))
                    {
                        var a   = dates.ElementAt(0).replace("from", "");
                        var dts = a.Split(new[] { " to " }, StringSplitOptions.None);

                        List <string> newList = new List <string>();
                        newList.Add("START_PERIOD");
                        matchedWords.Add(dts[0], newList);
                        newList = new List <string>();
                        newList.Add("END_PERIOD");
                        //todo fix when the date is the same here
                        matchedWords.Add(dts[1], newList);
                    }
                    else
                    {
                        List <string> newList = new List <string>();
                        newList.Add("START_PERIOD");

                        newList.Add("END_PERIOD");
                        //todo fix when the date is the same here
                        matchedWords.Add(dates.ElementAt(0), newList);
                    }
                }

                System.Diagnostics.Debug.WriteLine("\nProcessing noun phrases");

                //                var manager = new Manager();
                //                var serializer = new XmlSerializer(typeof(Manager.language));
                //                var loadStream = new FileStream("file2.xml", FileMode.Open, FileAccess.Read);
                //                var loadedObject = (Manager.language) serializer.Deserialize(loadStream);


                var doc = new XmlDocument();
//                System.Diagnostics.Debug.WriteLine(HttpRuntime.AppDomainAppPath);
//                System.Diagnostics.Debug.WriteLine(HttpRuntime.AppDomainAppPath);
//                System.Diagnostics.Debug.WriteLine(HttpRuntime.AppDomainAppPath);
//                System.Diagnostics.Debug.WriteLine(HttpRuntime.AppDomainAppPath);
                doc.Load(HttpRuntime.AppDomainAppPath + "\\file2.xml");


                var root = doc.SelectSingleNode("*");
                FindMatchingNodesFromXml(root, nounPhrases);


                foreach (var item in nouns.ToList())
                {
                    foreach (var VARIABLE in matchedWords)
                    {
                        if (VARIABLE.Key.Contains(item))
                        {
                            nouns.Remove(item);    //Will work!
                        }
                    }
                }

                FindMatchingNodesFromXml(root, verbPhrases);
                // FindMatchingNodesFromXml(root, nouns);



                System.Diagnostics.Debug.WriteLine("\nProcessing verb phrases ");


                System.Diagnostics.Debug.WriteLine("\nProcessing nouns ");



                // construct the dictionary object and open it
                var directory = Directory.GetCurrentDirectory() + "\\wordnet\\";
                foreach (var variable in matchedWords)
                {
                    System.Diagnostics.Debug.WriteLine(variable.Value + "\t\t" + variable.Key);
                }

                foreach (var variable in matchedWords)
                {
                    string a = variable.Key;
                    if (line.Contains(a))
                    {
                        line = line.replace(a, "");
                    }
                }

                foreach (var variable in stopWordsofwordnet)
                {
                    string a = " " + variable.toLowerCase() + " ";
                    if (line.Contains(a))
                    {
                        line = line.replace(a, " ");
                    }
                }
                if (line.contains("."))
                {
                    line = line.replace(".", "");
                }
                if (line.contains("-"))
                {
                    line = line.replace("-", " ");
                }
                System.Diagnostics.Debug.WriteLine("/////////////");
                System.Diagnostics.Debug.WriteLine("SECOND PARSE STRING " + line);
                System.Diagnostics.Debug.WriteLine("/////////////");
                line      = line.Trim();
                topParses = ParserTool.parseLine(line, myParser, 1);
                nounPhrases?.Clear();
                dates?.Clear();
                verbPhrases?.Clear();
                nouns?.Clear();
                foreach (var p in topParses)
                {
                    //p.show();
                    GetSentenceParts(p);
                }

                FindMatchingNodesFromXml(root, nounPhrases);



                foreach (var item in nouns.ToList())
                {
                    foreach (var VARIABLE in matchedWords)
                    {
                        if (VARIABLE.Key.Contains(item))
                        {
                            nouns.Remove(item);    //Will work!
                        }
                    }
                }
                FindMatchingNodesFromXml(root, verbPhrases);
                FindMatchingNodesFromXml(root, nouns);


                tokens    = tokenizer.tokenize(line);
                nameSpans = nameFinder.find(tokens);

                array = Span.spansToStrings(nameSpans, tokens);
                dates = new HashSet <string>(array);



                PrintSets();

                System.Diagnostics.Debug.WriteLine("\nProcessing Dates");


                if (dates.Count == 2)
                {
                    if (dates.ElementAt(0).contains("from"))
                    {
                        var           a       = dates.ElementAt(0).replace("from", "");
                        List <string> newList = new List <string>();
                        newList.Add("START_PERIOD");
                        matchedWords.Add(a, newList);
                        newList = new List <string>();
                        newList.Add("END_PERIOD");
                        //todo fix when the date is the same here
                        matchedWords.Add(dates.ElementAt(1), newList);
                    }
                    else
                    {
                        List <string> newList = new List <string>();
                        newList.Add("START_PERIOD");
                        matchedWords.Add(dates.ElementAt(0), newList);
                        newList = new List <string>();
                        newList.Add("END_PERIOD");
                        //todo fix when the date is the same here
                        matchedWords.Add(dates.ElementAt(1), newList);
                    }
                }

                if (dates.Count == 1)
                {
                    if (dates.ElementAt(0).contains("from"))
                    {
                        var a   = dates.ElementAt(0).replace("from", "");
                        var dts = a.Split(new[] { " to " }, StringSplitOptions.None);

                        List <string> newList = new List <string>();
                        newList.Add("START_PERIOD");
                        matchedWords.Add(dts[0], newList);
                        newList = new List <string>();
                        newList.Add("END_PERIOD");
                        //todo fix when the date is the same here
                        matchedWords.Add(dts[1], newList);
                    }
                    else
                    {
                        List <string> newList = new List <string>();
                        newList.Add("START_PERIOD");

                        newList.Add("END_PERIOD");
                        //todo fix when the date is the same here
                        matchedWords.Add(dates.ElementAt(0), newList);
                    }
                }

                System.Diagnostics.Debug.WriteLine("\nProcessing noun phrases");

                //                var manager = new Manager();
                //                var serializer = new XmlSerializer(typeof(Manager.language));
                //                var loadStream = new FileStream("file2.xml", FileMode.Open, FileAccess.Read);
                //                var loadedObject = (Manager.language) serializer.Deserialize(loadStream);



                FindMatchingNodesFromXml(root, nounPhrases);
                FindMatchingNodesFromXml(root, verbPhrases);
                FindMatchingNodesFromXml(root, nouns);

                foreach (var variable in matchedWords)
                {
                    System.Diagnostics.Debug.WriteLine(variable.Value + "\t\t" + variable.Key);
                }

                doc = null;
                GC.Collect();
                GC.WaitForPendingFinalizers();
                //MATCHING WITH WORD NET
                System.Diagnostics.Debug.WriteLine(directory);
                //                var wordNet = new WordNetEngine();
                //
                //                wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.adj")), PartOfSpeech.Adjective);
                //                wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.adv")), PartOfSpeech.Adverb);
                //                wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.noun")), PartOfSpeech.Noun);
                //                wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.verb")), PartOfSpeech.Verb);
                //
                //                wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adj")), PartOfSpeech.Adjective);
                //                wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adv")), PartOfSpeech.Adverb);
                //                wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.noun")), PartOfSpeech.Noun);
                //                wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.verb")), PartOfSpeech.Verb);
                //
                //                System.Diagnostics.Debug.WriteLine("Loading database...");
                //                wordNet.Load();
                //                System.Diagnostics.Debug.WriteLine("Load completed.");
                //                while (true)
                //                {
                //                    System.Diagnostics.Debug.WriteLine("\nType first word");
                //
                //                    var word = System.Diagnostics.Debug.ReadLine();
                //                    var synSetList = wordNet.GetSynSets(word);
                //
                //                    if (synSetList.Count == 0) System.Diagnostics.Debug.WriteLine($"No SynSet found for '{word}'");
                //
                //                    foreach (var synSet in synSetList)
                //                    {
                //                        var words = string.Join(", ", synSet.Words);
                //
                //                        System.Diagnostics.Debug.WriteLine($"\nWords: {words}");
                //                    }
                //                }
            }
            catch (IOException e)
            {
                e.printStackTrace();
            }
            finally
            {
                if (modelIn1 != null)
                {
                    try
                    {
                        modelIn1.close();
                    }
                    catch (IOException e)
                    {
                    }
                }

                if (modelIn2 != null)
                {
                    try
                    {
                        modelIn2.close();
                    }
                    catch (IOException e)
                    {
                    }
                }



                //            truncateLists(ref nounPhrases);
                //            truncateLists(ref nouns);
                //            truncateLists(ref dates);
                //            truncateLists(ref verbPhrases);
            }



            return(matchedWords);
        }
Пример #10
0
        public NameFinder(FileStream modelStream)
        {
            TokenNameFinderModel model = new TokenNameFinderModel(modelStream);

            this.nameFinder = new NameFinderME(model);
        }
Пример #11
0
 public NameFinder(TokenNameFinderModel model)
 {
     this.nameFinder = new NameFinderME(model);
 }
Пример #12
0
        // Constructors and finalizers:
        private Repository()
        {
            _assemblyName = Regex.Match(_assemblyFullName, "^(.*?),.*$").Result("$1");

            _rootDrive = ("/usr/project/xtmp/dp195/Poetix18/").Replace(@"\", Dsc);
            _nlpFolder = ("rhetorica/nlp/").Replace(@"\", Dsc);

            _openNlpModelsFolder = ("OpenNLP/models/").Replace(@"\", Dsc);
            _openNlpModelsPath   = RootDrive + _nlpFolder + _openNlpModelsFolder;

            _wordNetFolder = ("WordNet_3/").Replace(@"\", Dsc);
            _wordNetPath   = RootDrive + _nlpFolder + _wordNetFolder;

            _grammarFolder = ("StanfordParser/grammar/").Replace(@"\", Dsc);
            _grammarPath   = RootDrive + _nlpFolder + _grammarFolder;

            _dataFolder   = ("data/").Replace(@"\", Dsc);
            _nlpTextsPath = RootDrive + _dataFolder;

            string[] localTextDirectoryParts =
            {
                CurrentAssemblyDirectoryPath,
                "..",                        "..","..", "data"
                //"..", "..", "text"
            };
            _localTextPath = Path.Combine(localTextDirectoryParts) + "/"; // For development use

            // WordNet engine:
            Console.Write("Loading WordNet engine.... ");
            _wordNetEngine = new WordNetEngine(WordNetPath, true);
            Console.WriteLine("Done.");

            // OpenNLP sentence detector:
            Console.Write("Loading OpenNLP sentence detector.... ");
            java.io.FileInputStream modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-sent.bin");
            _sentenceModel = new SentenceModel(modelInputStream);
            modelInputStream.close();
            _sentenceDetector = new SentenceDetectorME(_sentenceModel);
            Console.WriteLine("Done.");

            // OpenNLP tokenizer:
            Console.Write("Loading OpenNLP tokenizer.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-token.bin");
            _tokenizerModel  = new opennlp.tools.tokenize.TokenizerModel(modelInputStream);
            modelInputStream.close();
            _tokenizer = new opennlp.tools.tokenize.TokenizerME(_tokenizerModel);
            Console.WriteLine("Done.");

            // OpenNLP name finder:
            Console.Write("Loading OpenNLP name finder.... ");
            modelInputStream      = new java.io.FileInputStream(OpenNlpModelsPath + "en-ner-person.bin");
            _tokenNameFinderModel = new TokenNameFinderModel(modelInputStream);
            modelInputStream.close();
            _nameFinder = new NameFinderME(_tokenNameFinderModel);
            Console.WriteLine("Done.");

            // OpenNLP POS tagger:
            Console.Write("Loading OpenNLP POS tagger.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-pos-maxent.bin");
            _posModel        = new POSModel(modelInputStream);
            modelInputStream.close();
            _tagger = new POSTaggerME(_posModel);
            Console.WriteLine("Done.");

            // OpenNLP chunker:
            Console.Write("Loading OpenNLP chunker.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-chunker.bin");
            _chunkerModel    = new ChunkerModel(modelInputStream);
            modelInputStream.close();
            _chunker = new ChunkerME(_chunkerModel);
            Console.WriteLine("Done.");

            // OpenNLP parser:
            if (_loadParser)
            {
                Console.Write("Loading OpenNLP parser.... ");
                modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-parser-chunking.bin");
                _parserModel     = new ParserModel(modelInputStream);
                modelInputStream.close();
                _parser = ParserFactory.create(_parserModel);
                Console.WriteLine("Done.");
            }

            // Stanford parser:
            //_stanfordParser = new LexicalizedParser(GrammarPath + "englishPCFG.ser.gz"); // Obsolete method
            _stanfordParser = LexicalizedParser.loadModel(GrammarPath + "englishPCFG.ser.gz");

            // Porter stemmer:
            _porterStemmer = new PorterStemmer();
        }