private void LoadModel(string modelFile) { using var stream = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, modelFile), FileMode.Open, FileAccess.Read, FileShare.Read); var model = new TokenNameFinderModel(stream); nameFinders.Add(new NameFinderME(model)); }
private NameFinderME PrepareNameFinder() { var modelInputStream = new FileInputStream(_nameFinderModelPath); //load the name model into a stream var model = new TokenNameFinderModel(modelInputStream); //load the model return(new NameFinderME(model)); //create the namefinder }
public static TokenNameFinderModel GetNERTaggerModel(string modelName, IResourceLoader loader) { if (!nerModels.TryGetValue(modelName, out TokenNameFinderModel model) || model == null) { using (Stream resource = loader.OpenResource(modelName)) { model = new TokenNameFinderModel(new ikvm.io.InputStreamWrapper(resource)); } nerModels[modelName] = model; } return(model); }
public void MultithreadingTest() { const int threadCount = 100; // The expensive part of the code is to load the model! // but the model file can be shared. var fileStream = Tests.OpenFile(fileName); modelFile = new TokenNameFinderModel(fileStream); var fileContents = File.ReadAllText(Tests.GetFullPath("/opennlp/tools/sentdetect/Sentences.txt")); var sentences = fileContents.Split(new [] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); var rnd = new Random(); var count = 0; var delegates = new List <ThreadStart>(threadCount); for (var i = 0; i < threadCount; i++) { delegates.Add(() => { // Use ONE NameFinderME instance per thread ! var nameFinder = new NameFinderME(modelFile); var tokens = WhitespaceTokenizer.Instance.Tokenize(sentences[rnd.Next(0, sentences.Length - 1)]); Thread.Sleep(rnd.Next(100, 300)); var names = nameFinder.Find(tokens); count += names.Length; }); } var threads = delegates.Select(d => new CrossThreadTestRunner(d)).ToList(); foreach (var thread in threads) { thread.Start(); } foreach (var thread in threads) { thread.Join(); } Assert.That(count, Is.GreaterThan(0)); }
private NameFinderME PrepareNameFinder() { var modelInputStream = new FileInputStream(_nameFinderModelPath); var model = new TokenNameFinderModel(modelInputStream); modelInputStream.close(); return new NameFinderME(model); }
private static bool HasOtherAsOutcome(TokenNameFinderModel nameFinderModel) { var outcomes = nameFinderModel.NameFinderSequenceModel.GetOutcomes(); return(outcomes.Any(s => s.Equals(NameFinderME.Other))); }
public NLPNERTaggerOp(TokenNameFinderModel model) { this.nameFinder = new NameFinderME(model); }
public override void run(string[] args) { if (args.Length == 0) { Console.WriteLine(Help); } else { NameFinderME[] nameFinders = new NameFinderME[args.Length]; for (int i = 0; i < nameFinders.Length; i++) { TokenNameFinderModel model = (new TokenNameFinderModelLoader()).load(new File(args[i])); nameFinders[i] = new NameFinderME(model); } ObjectStream <string> untokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string line; while ((line = untokenizedLineStream.read()) != null) { string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line); // A new line indicates a new document, // adaptive data must be cleared for a new document if (whitespaceTokenizerLine.Length == 0) { foreach (NameFinderME nameFinder in nameFinders) { nameFinder.clearAdaptiveData(); } } IList <Span> names = new List <Span>(); foreach (TokenNameFinder nameFinder in nameFinders) { Collections.addAll(names, nameFinder.find(whitespaceTokenizerLine)); } // Simple way to drop intersecting spans, otherwise the // NameSample is invalid Span[] reducedNames = NameFinderME.dropOverlappingSpans(names.ToArray()); NameSample nameSample = new NameSample(whitespaceTokenizerLine, reducedNames, false); Console.WriteLine(nameSample.ToString()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public Dictionary <string, List <string> > Main(string line) { //debug sentence // line = "Show me the sales of Kean Cola .25ltr Bottle in Nicosia from January 2017 to October 2017 as a line chart."; matchedWords?.Clear(); nounPhrases?.Clear(); nouns?.Clear(); adjectivePhrases?.Clear(); verbPhrases?.Clear(); InputStream modelIn = new FileInputStream(HttpRuntime.AppDomainAppPath + "\\Models\\en-parser-chunking.bin"); InputStream modelIn1 = new FileInputStream(HttpRuntime.AppDomainAppPath + "\\Models\\en-ner-date.bin"); InputStream modelIn2 = new FileInputStream(HttpRuntime.AppDomainAppPath + "\\Models\\en-token.bin"); ParserModel model = new ParserModel(modelIn); var myParser = ParserFactory.create(model); var topParses = ParserTool.parseLine(line, myParser, 1); foreach (var p in topParses) { GetSentenceParts(p); } try { TokenizerModel model1 = new TokenizerModel(modelIn2); TokenNameFinderModel model2 = new TokenNameFinderModel(modelIn1); Tokenizer tokenizer = new TokenizerME(model1); var nameFinder = new NameFinderME(model2); var tokens = tokenizer.tokenize(line); var nameSpans = nameFinder.find(tokens); var array = Span.spansToStrings(nameSpans, tokens); // // foreach (var v in array) // { // System.Diagnostics.Debug.WriteLine(v); // } dates = new HashSet <string>(array); PrintSets(); // System.Diagnostics.Debug.WriteLine("\nProcessing Presentation type"); // // if (nouns.Contains("table")) // { // matchedWords.Add(new Tuple<string, string>("PRESENTATION_TYPE", "table")); // } // if (nounPhrases.Contains("bar chart")) // { // matchedWords.Add(new Tuple<string, string>("PRESENTATION_TYPE", "bar chart")); // } // if (nounPhrases.Contains("line chart")) // { // matchedWords.Add(new Tuple<string, string>("PRESENTATION_TYPE", "line chart")); // } //TODO IF NO OPTION IS FOUND ASK THE USER TO GIVE YOU ONE. IMPLEMENT IT IN THE WEB VERSION SOON System.Diagnostics.Debug.WriteLine("\nProcessing Dates"); if (dates.Count == 2) { if (dates.ElementAt(0).contains("from")) { var a = dates.ElementAt(0).replace("from", ""); List <string> newList = new List <string>(); newList.Add("START_PERIOD"); matchedWords.Add(a, newList); newList = new List <string>(); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dates.ElementAt(1), newList); } else { List <string> newList = new List <string>(); newList.Add("START_PERIOD"); matchedWords.Add(dates.ElementAt(0), newList); newList = new List <string>(); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dates.ElementAt(1), newList); } } if (dates.Count == 1) { if (dates.ElementAt(0).contains("from")) { var a = dates.ElementAt(0).replace("from", ""); var dts = a.Split(new[] { " to " }, StringSplitOptions.None); List <string> newList = new List <string>(); newList.Add("START_PERIOD"); matchedWords.Add(dts[0], newList); newList = new List <string>(); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dts[1], newList); } else { List <string> newList = new List <string>(); newList.Add("START_PERIOD"); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dates.ElementAt(0), newList); } } System.Diagnostics.Debug.WriteLine("\nProcessing noun phrases"); // var manager = new Manager(); // var serializer = new XmlSerializer(typeof(Manager.language)); // var loadStream = new FileStream("file2.xml", FileMode.Open, FileAccess.Read); // var loadedObject = (Manager.language) serializer.Deserialize(loadStream); var doc = new XmlDocument(); // System.Diagnostics.Debug.WriteLine(HttpRuntime.AppDomainAppPath); // System.Diagnostics.Debug.WriteLine(HttpRuntime.AppDomainAppPath); // System.Diagnostics.Debug.WriteLine(HttpRuntime.AppDomainAppPath); // System.Diagnostics.Debug.WriteLine(HttpRuntime.AppDomainAppPath); doc.Load(HttpRuntime.AppDomainAppPath + "\\file2.xml"); var root = doc.SelectSingleNode("*"); FindMatchingNodesFromXml(root, nounPhrases); foreach (var item in nouns.ToList()) { foreach (var VARIABLE in matchedWords) { if (VARIABLE.Key.Contains(item)) { nouns.Remove(item); //Will work! } } } FindMatchingNodesFromXml(root, verbPhrases); // FindMatchingNodesFromXml(root, nouns); System.Diagnostics.Debug.WriteLine("\nProcessing verb phrases "); System.Diagnostics.Debug.WriteLine("\nProcessing nouns "); // construct the dictionary object and open it var directory = Directory.GetCurrentDirectory() + "\\wordnet\\"; foreach (var variable in matchedWords) { System.Diagnostics.Debug.WriteLine(variable.Value + "\t\t" + variable.Key); } foreach (var variable in matchedWords) { string a = variable.Key; if (line.Contains(a)) { line = line.replace(a, ""); } } foreach (var variable in stopWordsofwordnet) { string a = " " + variable.toLowerCase() + " "; if (line.Contains(a)) { line = line.replace(a, " "); } } if (line.contains(".")) { line = line.replace(".", ""); } if (line.contains("-")) { line = line.replace("-", " "); } System.Diagnostics.Debug.WriteLine("/////////////"); System.Diagnostics.Debug.WriteLine("SECOND PARSE STRING " + line); System.Diagnostics.Debug.WriteLine("/////////////"); line = line.Trim(); topParses = ParserTool.parseLine(line, myParser, 1); nounPhrases?.Clear(); dates?.Clear(); verbPhrases?.Clear(); nouns?.Clear(); foreach (var p in topParses) { //p.show(); GetSentenceParts(p); } FindMatchingNodesFromXml(root, nounPhrases); foreach (var item in nouns.ToList()) { foreach (var VARIABLE in matchedWords) { if (VARIABLE.Key.Contains(item)) { nouns.Remove(item); //Will work! } } } FindMatchingNodesFromXml(root, verbPhrases); FindMatchingNodesFromXml(root, nouns); tokens = tokenizer.tokenize(line); nameSpans = nameFinder.find(tokens); array = Span.spansToStrings(nameSpans, tokens); dates = new HashSet <string>(array); PrintSets(); System.Diagnostics.Debug.WriteLine("\nProcessing Dates"); if (dates.Count == 2) { if (dates.ElementAt(0).contains("from")) { var a = dates.ElementAt(0).replace("from", ""); List <string> newList = new List <string>(); newList.Add("START_PERIOD"); matchedWords.Add(a, newList); newList = new List <string>(); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dates.ElementAt(1), newList); } else { List <string> newList = new List <string>(); newList.Add("START_PERIOD"); matchedWords.Add(dates.ElementAt(0), newList); newList = new List <string>(); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dates.ElementAt(1), newList); } } if (dates.Count == 1) { if (dates.ElementAt(0).contains("from")) { var a = dates.ElementAt(0).replace("from", ""); var dts = a.Split(new[] { " to " }, StringSplitOptions.None); List <string> newList = new List <string>(); newList.Add("START_PERIOD"); matchedWords.Add(dts[0], newList); newList = new List <string>(); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dts[1], newList); } else { List <string> newList = new List <string>(); newList.Add("START_PERIOD"); newList.Add("END_PERIOD"); //todo fix when the date is the same here matchedWords.Add(dates.ElementAt(0), newList); } } System.Diagnostics.Debug.WriteLine("\nProcessing noun phrases"); // var manager = new Manager(); // var serializer = new XmlSerializer(typeof(Manager.language)); // var loadStream = new FileStream("file2.xml", FileMode.Open, FileAccess.Read); // var loadedObject = (Manager.language) serializer.Deserialize(loadStream); FindMatchingNodesFromXml(root, nounPhrases); FindMatchingNodesFromXml(root, verbPhrases); FindMatchingNodesFromXml(root, nouns); foreach (var variable in matchedWords) { System.Diagnostics.Debug.WriteLine(variable.Value + "\t\t" + variable.Key); } doc = null; GC.Collect(); GC.WaitForPendingFinalizers(); //MATCHING WITH WORD NET System.Diagnostics.Debug.WriteLine(directory); // var wordNet = new WordNetEngine(); // // wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.adj")), PartOfSpeech.Adjective); // wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.adv")), PartOfSpeech.Adverb); // wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.noun")), PartOfSpeech.Noun); // wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.verb")), PartOfSpeech.Verb); // // wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adj")), PartOfSpeech.Adjective); // wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adv")), PartOfSpeech.Adverb); // wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.noun")), PartOfSpeech.Noun); // wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.verb")), PartOfSpeech.Verb); // // System.Diagnostics.Debug.WriteLine("Loading database..."); // wordNet.Load(); // System.Diagnostics.Debug.WriteLine("Load completed."); // while (true) // { // System.Diagnostics.Debug.WriteLine("\nType first word"); // // var word = System.Diagnostics.Debug.ReadLine(); // var synSetList = wordNet.GetSynSets(word); // // if (synSetList.Count == 0) System.Diagnostics.Debug.WriteLine($"No SynSet found for '{word}'"); // // foreach (var synSet in synSetList) // { // var words = string.Join(", ", synSet.Words); // // System.Diagnostics.Debug.WriteLine($"\nWords: {words}"); // } // } } catch (IOException e) { e.printStackTrace(); } finally { if (modelIn1 != null) { try { modelIn1.close(); } catch (IOException e) { } } if (modelIn2 != null) { try { modelIn2.close(); } catch (IOException e) { } } // truncateLists(ref nounPhrases); // truncateLists(ref nouns); // truncateLists(ref dates); // truncateLists(ref verbPhrases); } return(matchedWords); }
public static IDictionary <string, object> loadResources(Jfile resourcePath) { IDictionary <string, object> resources = new Dictionary <string, object>(); if (resourcePath != null) { IDictionary <string, ArtifactSerializer> artifactSerializers = TokenNameFinderModel.createArtifactSerializers(); File[] resourceFiles = resourcePath.listFiles(); // TODO: Filter files, also files with start with a dot foreach (File resourceFile in resourceFiles) { // TODO: Move extension extracting code to method and // write unit test for it // extract file ending string resourceName = resourceFile.Name; int lastDot = resourceName.LastIndexOf('.'); if (lastDot == -1) { continue; } string ending = resourceName.Substring(lastDot + 1); // lookup serializer from map ArtifactSerializer serializer = artifactSerializers[ending]; // TODO: Do different? For now just ignore .... if (serializer == null) { continue; } InputStream resoruceIn = CmdLineUtil.openInFile(resourceFile); try { resources[resourceName] = serializer.create(resoruceIn); } catch (InvalidFormatException e) { // TODO: Fix exception handling Console.WriteLine(e.ToString()); Console.Write(e.StackTrace); } catch (IOException e) { // TODO: Fix exception handling Console.WriteLine(e.ToString()); Console.Write(e.StackTrace); } finally { try { resoruceIn.close(); } catch (IOException) { } } } } return(resources); }
public NameFinder(FileStream modelStream) { TokenNameFinderModel model = new TokenNameFinderModel(modelStream); this.nameFinder = new NameFinderME(model); }
public NameFinder(TokenNameFinderModel model) { this.nameFinder = new NameFinderME(model); }
// Constructors and finalizers: private Repository() { _assemblyName = Regex.Match(_assemblyFullName, "^(.*?),.*$").Result("$1"); _rootDrive = ("/usr/project/xtmp/dp195/Poetix18/").Replace(@"\", Dsc); _nlpFolder = ("rhetorica/nlp/").Replace(@"\", Dsc); _openNlpModelsFolder = ("OpenNLP/models/").Replace(@"\", Dsc); _openNlpModelsPath = RootDrive + _nlpFolder + _openNlpModelsFolder; _wordNetFolder = ("WordNet_3/").Replace(@"\", Dsc); _wordNetPath = RootDrive + _nlpFolder + _wordNetFolder; _grammarFolder = ("StanfordParser/grammar/").Replace(@"\", Dsc); _grammarPath = RootDrive + _nlpFolder + _grammarFolder; _dataFolder = ("data/").Replace(@"\", Dsc); _nlpTextsPath = RootDrive + _dataFolder; string[] localTextDirectoryParts = { CurrentAssemblyDirectoryPath, "..", "..","..", "data" //"..", "..", "text" }; _localTextPath = Path.Combine(localTextDirectoryParts) + "/"; // For development use // WordNet engine: Console.Write("Loading WordNet engine.... "); _wordNetEngine = new WordNetEngine(WordNetPath, true); Console.WriteLine("Done."); // OpenNLP sentence detector: Console.Write("Loading OpenNLP sentence detector.... "); java.io.FileInputStream modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-sent.bin"); _sentenceModel = new SentenceModel(modelInputStream); modelInputStream.close(); _sentenceDetector = new SentenceDetectorME(_sentenceModel); Console.WriteLine("Done."); // OpenNLP tokenizer: Console.Write("Loading OpenNLP tokenizer.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-token.bin"); _tokenizerModel = new opennlp.tools.tokenize.TokenizerModel(modelInputStream); modelInputStream.close(); _tokenizer = new opennlp.tools.tokenize.TokenizerME(_tokenizerModel); Console.WriteLine("Done."); // OpenNLP name finder: Console.Write("Loading OpenNLP name finder.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-ner-person.bin"); _tokenNameFinderModel = new TokenNameFinderModel(modelInputStream); modelInputStream.close(); _nameFinder = new NameFinderME(_tokenNameFinderModel); Console.WriteLine("Done."); // OpenNLP POS tagger: Console.Write("Loading OpenNLP POS tagger.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-pos-maxent.bin"); _posModel = new POSModel(modelInputStream); modelInputStream.close(); _tagger = new POSTaggerME(_posModel); Console.WriteLine("Done."); // OpenNLP chunker: Console.Write("Loading OpenNLP chunker.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-chunker.bin"); _chunkerModel = new ChunkerModel(modelInputStream); modelInputStream.close(); _chunker = new ChunkerME(_chunkerModel); Console.WriteLine("Done."); // OpenNLP parser: if (_loadParser) { Console.Write("Loading OpenNLP parser.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-parser-chunking.bin"); _parserModel = new ParserModel(modelInputStream); modelInputStream.close(); _parser = ParserFactory.create(_parserModel); Console.WriteLine("Done."); } // Stanford parser: //_stanfordParser = new LexicalizedParser(GrammarPath + "englishPCFG.ser.gz"); // Obsolete method _stanfordParser = LexicalizedParser.loadModel(GrammarPath + "englishPCFG.ser.gz"); // Porter stemmer: _porterStemmer = new PorterStemmer(); }
private static NameFinderME prepareTimeFinder() { var modelInputStream = new java.io.FileInputStream(@"c:\projects\OTAutocompleteDSL\dep\en-ner-time.bin"); //load the name model into a stream var model = new TokenNameFinderModel(modelInputStream); //load the model return new NameFinderME(model); //create the namefinder }
private NameFinderME PrepareNameFinder() { var modelInputStream = new FileInputStream(_nameFinderModelPath); //load the name model into a stream var model = new TokenNameFinderModel(modelInputStream); //load the model return new NameFinderME(model); //create the namefinder }
public override void run(string format, string[] args) { base.run(format, args); TokenNameFinderModel model = (new TokenNameFinderModelLoader()).load(@params.Model); IList <EvaluationMonitor <NameSample> > listeners = new LinkedList <EvaluationMonitor <NameSample> >(); if (@params.Misclassified.Value) { listeners.Add(new NameEvaluationErrorListener()); } TokenNameFinderDetailedFMeasureListener detailedFListener = null; if (@params.DetailedF.Value) { detailedFListener = new TokenNameFinderDetailedFMeasureListener(); listeners.Add(detailedFListener); } TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(new NameFinderME(model), listeners.ToArray()); PerformanceMonitor monitor = new PerformanceMonitor("sent"); ObjectStream <NameSample> measuredSampleStream = new ObjectStreamAnonymousInnerClassHelper(this, monitor); monitor.startAndPrintThroughput(); try { evaluator.evaluate(measuredSampleStream); } catch (IOException e) { Console.Error.WriteLine("failed"); throw new TerminateToolException(-1, "IO error while reading test data: " + e.Message, e); } finally { try { measuredSampleStream.close(); } catch (IOException) { // sorry that this can fail } } monitor.stopAndPrintFinalResult(); Console.WriteLine(); if (detailedFListener == null) { Console.WriteLine(evaluator.FMeasure); } else { Console.WriteLine(detailedFListener.ToString()); } }