public void TestPOS() { var loader = new ClasspathResourceLoader(GetType()); Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) => { var tokenizerFactory = new OpenNLPTokenizerFactory(new Dictionary <string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } }); tokenizerFactory.Inform(loader); var tokenizer = tokenizerFactory.Create(reader); var filter1Factory = new OpenNLPPOSFilterFactory(new Dictionary <string, string> { { "posTaggerModel", posTaggerModelFile } }); filter1Factory.Inform(loader); var filter1 = filter1Factory.Create(tokenizer); return(new TokenStreamComponents(tokenizer, filter1)); }); // CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(GetType())) // .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) // .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) // .build(); AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets, SENTENCES_posTags, null, null, true); analyzer = Analyzer.NewAnonymous(createComponents: (fieldname, reader) => { var tokenizerFactory = new OpenNLPTokenizerFactory(new Dictionary <string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } }); tokenizerFactory.Inform(loader); var tokenizer = tokenizerFactory.Create(reader); var filter1Factory = new OpenNLPPOSFilterFactory(new Dictionary <string, string> { { "posTaggerModel", posTaggerModelFile } }); filter1Factory.Inform(loader); var filter1 = filter1Factory.Create(tokenizer); var filter2Factory = new TypeAsPayloadTokenFilterFactory(new Dictionary <string, string>()); var filter2 = filter2Factory.Create(filter1); return(new TokenStreamComponents(tokenizer, filter2)); }); //analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(GetType())) // .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) // .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) // .addTokenFilter(TypeAsPayloadTokenFilterFactory.class) //.build(); AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets, null, null, null, true, ToPayloads(SENTENCES_posTags)); }
public void TestPayloads() { //CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) //.withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) //.addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) //.addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile) //.addTokenFilter(TypeAsPayloadTokenFilterFactory.class) //.build(); Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { var loader = new ClasspathResourceLoader(GetType()); var opennlpFactory = new OpenNLPTokenizerFactory(new Dictionary <string, string> { { "tokenizerModel", tokenizerModelFile }, { "sentenceModel", sentenceModelFile } }); opennlpFactory.Inform(loader); var opennlp = opennlpFactory.Create(NewAttributeFactory(), reader); //new OpenNLPTokenizer(reader, new Tools.NLPSentenceDetectorOp(sentenceModelFile), new Tools.NLPTokenizerOp(tokenizerModelFile)); var opennlpPOSFilterFactory = new OpenNLPPOSFilterFactory(new Dictionary <string, string> { { "posTaggerModel", posTaggerModelFile } }); opennlpPOSFilterFactory.Inform(loader); var opennlpPOSFilter = opennlpPOSFilterFactory.Create(opennlp); //new OpenNLPPOSFilter(opennlp, new Tools.NLPPOSTaggerOp(posTaggerModelFile)); var opennlpChunkerFilterFactory = new OpenNLPChunkerFilterFactory(new Dictionary <string, string> { { "chunkerModel", chunkerModelFile } }); opennlpChunkerFilterFactory.Inform(loader); var opennlpChunkerFilter = opennlpChunkerFilterFactory.Create(opennlpPOSFilter); //new OpenNLPChunkerFilter(filter1, new Tools.NLPChunkerOp(chunkerModelFile)); var typeAsPayloadFilterFactory = new TypeAsPayloadTokenFilterFactory(new Dictionary <string, string>()); var typeAsPayloadFilter = typeAsPayloadFilterFactory.Create(opennlpChunkerFilter); return(new TokenStreamComponents(opennlp, typeAsPayloadFilter)); }); AssertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets, null, null, null, true, ToPayloads(SENTENCES_chunks)); }