public virtual void TestInform() { //IResourceLoader loader = new ClasspathResourceLoader(typeof(TestStopFilter)); IResourceLoader loader = new ClasspathResourceLoader(typeof(TestAnalyzers), "Lucene.Net"); // LUCENENET: Need to set to a type that is in the same path as the files assertTrue("loader is null and it shouldn't be", loader != null); CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory)TokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.CommonWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (CommonGramsQueryFilterFactory)TokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.CommonWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (CommonGramsQueryFilterFactory)TokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.CommonWords; assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); }
public virtual void TestInform() { IResourceLoader loader = new ClasspathResourceLoader(this.GetType()); assertTrue("loader is null and it shouldn't be", loader != null); StopFilterFactory factory = (StopFilterFactory)TokenFilterFactory("Stop", "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.StopWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (StopFilterFactory)TokenFilterFactory("Stop", "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.StopWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (StopFilterFactory)TokenFilterFactory("Stop", "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.StopWords; assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); // defaults factory = (StopFilterFactory)TokenFilterFactory("Stop"); assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.StopWords); assertEquals(false, factory.IgnoreCase); }
public virtual void TestInform() { //IResourceLoader loader = new ClasspathResourceLoader(typeof(TestStopFilter)); IResourceLoader loader = new ClasspathResourceLoader(typeof(TestAnalyzers), "Lucene.Net"); // LUCENENET: Need to set to a type that is in the same path as the files assertTrue("loader is null and it shouldn't be", loader != null); CommonGramsFilterFactory factory = (CommonGramsFilterFactory)TokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.CommonWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (CommonGramsFilterFactory)TokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.CommonWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (CommonGramsFilterFactory)TokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.CommonWords; assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); }
public void TestBasic() { var loader = new ClasspathResourceLoader(GetType()); Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) => { var tokenizerFactory = new OpenNLPTokenizerFactory(new Dictionary <string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } }); tokenizerFactory.Inform(loader); var tokenizer = tokenizerFactory.Create(reader); var filter1Factory = new OpenNLPPOSFilterFactory(new Dictionary <string, string> { { "posTaggerModel", posTaggerModelFile } }); filter1Factory.Inform(loader); var filter1 = filter1Factory.Create(tokenizer); return(new TokenStreamComponents(tokenizer, filter1)); }); // CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(GetType())) //.withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) //.addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) //.build(); AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets); }
public void Test1SentenceDictionaryOnly() { Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) => { var loader = new ClasspathResourceLoader(GetType()); var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary <string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } }); opennlpFactory.Inform(loader); var opennlp = opennlpFactory.Create(reader); var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary <string, string> { { "posTaggerModel", posTaggerModelFile } }); opennlpPOSFilterFactory.Inform(loader); var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp); var opennlpLemmatizerFilterFactory = new OpenNLPLemmatizerFilterFactory(new Dictionary <string, string> { { "dictionary", lemmatizerDictFile } }); opennlpLemmatizerFilterFactory.Inform(loader); var opennlpLemmatizerFilter = opennlpLemmatizerFilterFactory.Create(opennlpPOSFilter); return(new TokenStreamComponents(opennlp, opennlpLemmatizerFilter)); }); //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) //.withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) //.addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin") //.addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict") //.build(); AssertAnalyzesTo(analyzer, SENTENCE, SENTENCE_dict_punc, null, null, SENTENCE_posTags, null, null, true); }
public void TestMissingDictionary() { IResourceLoader loader = new ClasspathResourceLoader(typeof(TestMorfologikFilterFactory)); IOException expected = NUnit.Framework.Assert.Throws <IOException>(() => { IDictionary <String, String> @params = new JCG.Dictionary <String, String>(); @params[MorfologikFilterFactory.DICTIONARY_ATTRIBUTE] = "missing-dictionary-resource.dict"; MorfologikFilterFactory factory = new MorfologikFilterFactory(@params); factory.Inform(loader); }); assertTrue(expected.Message.Contains("Resource not found")); }
public virtual void TestInform() { IResourceLoader loader = new ClasspathResourceLoader(this.GetType()); assertTrue("loader is null and it shouldn't be", loader != null); KeepWordFilterFactory factory = (KeepWordFilterFactory)TokenFilterFactory("KeepWord", "words", "keep-1.txt", "ignoreCase", "true"); CharArraySet words = factory.Words; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); factory = (KeepWordFilterFactory)TokenFilterFactory("KeepWord", "words", "keep-1.txt, keep-2.txt", "ignoreCase", "true"); words = factory.Words; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); }
public void TestExplicitDictionary() { IResourceLoader loader = new ClasspathResourceLoader(typeof(TestMorfologikFilterFactory)); StringReader reader = new StringReader("inflected1 inflected2"); IDictionary <String, String> @params = new JCG.Dictionary <string, string>(); @params[MorfologikFilterFactory.DICTIONARY_ATTRIBUTE] = "custom-dictionary.dict"; MorfologikFilterFactory factory = new MorfologikFilterFactory(@params); factory.Inform(loader); TokenStream stream = new MockTokenizer(reader); // whitespaceMockTokenizer(reader); stream = factory.Create(stream); AssertTokenStreamContents(stream, new String[] { "lemma1", "lemma2" }); }
public void TestKeywordAttributeAwarenessDictionaryAndMaxEnt() { Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) => { var loader = new ClasspathResourceLoader(GetType()); var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary <string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } }); opennlpFactory.Inform(loader); var opennlp = opennlpFactory.Create(reader); var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary <string, string> { { "posTaggerModel", posTaggerModelFile } }); opennlpPOSFilterFactory.Inform(loader); var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp); var keywordRepeatFilterFactory = new KeywordRepeatFilterFactory(new Dictionary <string, string>()); var keywordRepeatFilter = keywordRepeatFilterFactory.Create(opennlpPOSFilter); var opennlpLemmatizerFilterFactory = new OpenNLPLemmatizerFilterFactory(new Dictionary <string, string> { { "dictionary", lemmatizerDictFile }, { "lemmatizerModel", lemmatizerModelFile } }); opennlpLemmatizerFilterFactory.Inform(loader); var opennlpLemmatizerFilter = opennlpLemmatizerFilterFactory.Create(keywordRepeatFilter); var removeDuplicatesTokenFilterFactory = new RemoveDuplicatesTokenFilterFactory(new Dictionary <string, string>()); var removeDuplicatesTokenFilter = removeDuplicatesTokenFilterFactory.Create(opennlpLemmatizerFilter); return(new TokenStreamComponents(opennlp, removeDuplicatesTokenFilter)); }); //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) // .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) // .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) // .addTokenFilter(KeywordRepeatFilterFactory.class) // .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile, "lemmatizerModel", lemmatizerModelFile) // .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class) // .build(); AssertAnalyzesTo(analyzer, SENTENCES_both, SENTENCES_both_keep_orig_punc, null, null, SENTENCES_both_keep_orig_posTags, null, null, true); }
public void TestPayloads() { //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) //.withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) //.addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) //.addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile) //.addTokenFilter(TypeAsPayloadTokenFilterFactory.class) //.build(); Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { var loader = new ClasspathResourceLoader(GetType()); var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary <string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } }); opennlpFactory.Inform(loader); var opennlp = opennlpFactory.Create(NewAttributeFactory(), reader); //new OpenNLPTokenizer(reader, new Tools.NLPSentenceDetectorOp(sentenceModelFile), new Tools.NLPTokenizerOp(tokenizerModelFile)); var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary <string, string> { { "posTaggerModel", posTaggerModelFile } }); opennlpPOSFilterFactory.Inform(loader); var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp); //new OpenNLPPOSFilter(opennlp, new Tools.NLPPOSTaggerOp(posTaggerModelFile)); var opennlpChunkerFilterFactory = new OpenNLPChunkerFilterFactory(new Dictionary <string, string> { { "chunkerModel", chunkerModelFile } }); opennlpChunkerFilterFactory.Inform(loader); var opennlpChunkerFilter = opennlpChunkerFilterFactory.Create(opennlpPOSFilter); //new OpenNLPChunkerFilter(filter1, new Tools.NLPChunkerOp(chunkerModelFile)); var typeAsPayloadFilterFactory = new TypeAsPayloadTokenFilterFactory(new Dictionary <string, string>()); var typeAsPayloadFilter = typeAsPayloadFilterFactory.Create(opennlpChunkerFilter); return(new TokenStreamComponents(opennlp, typeAsPayloadFilter)); }); AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets, null, null, null, true, ToPayloads(SENTENCES_chunks)); }
/// <summary> /// Gets a resource from the classpath as <seealso cref="File"/>. this method should only /// be used, if a real file is needed. To get a stream, code should prefer /// <seealso cref="Class#getResourceAsStream"/> using {@code this.getClass()}. /// </summary> protected Stream GetDataFile(string name) { try { var resourceLoader = new ClasspathResourceLoader(this.GetType(), "Lucene.Net"); return resourceLoader.OpenResource(name); } catch (Exception e) { throw new IOException("Cannot find resource: " + name); } }