public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams != null) { if (!TrainUtil.isValid(mlParams.Settings)) { throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!"); } if (TrainUtil.isSequenceTraining(mlParams.Settings)) { throw new TerminateToolException(1, "Sequence training is not supported!"); } } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile); TokenizerModel model; try { Dictionary dict = loadDict(@params.AbbDict); TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null); model = opennlp.tools.tokenize.TokenizerME.train(sampleStream, tokFactory, mlParams); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("tokenizer", modelOutFile, model); }
public override TokenStreamInfo getStream(string fieldName, Reader reader) { Tokenizer tk = tokenizer.create(charStream(reader)); TokenStream ts = tk; for (int i = 0; i < filters.Length; i++) { ts = filters[i].create(ts); } return(new TokenStreamInfo(tk, ts)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tf = tokenizer.create(reader); if (tokenfilter != null) { return(new TokenStreamComponents(tf, tokenfilter.create(tf))); } else { return(new TokenStreamComponents(tf)); } }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } TokenizerCrossValidator validator; TokenizerEvaluationMonitor listener = null; if (@params.Misclassified.Value) { listener = new TokenEvaluationErrorListener(); } try { Dictionary dict = TokenizerTrainerTool.loadDict(@params.AbbDict); TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null); validator = new TokenizerCrossValidator(mlParams, tokFactory, listener); validator.evaluate(sampleStream, @params.Folds.Value); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } FMeasure result = validator.FMeasure; Console.WriteLine(result.ToString()); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void test() throws Exception public virtual void test() { IList <Type> analysisClasses = new List <Type>(); ((List <Type>)analysisClasses).AddRange(TestRandomChains.getClassesForPackage("org.apache.lucene.analysis")); ((List <Type>)analysisClasses).AddRange(TestRandomChains.getClassesForPackage("org.apache.lucene.collation")); foreach (Class c in analysisClasses) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int modifiers = c.getModifiers(); int modifiers = c.Modifiers; if (Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers) || c.Synthetic || c.AnonymousClass || c.MemberClass || c.Interface || testComponents.Contains(c) || crazyComponents.Contains(c) || oddlyNamedComponents.Contains(c) || deprecatedDuplicatedComponents.Contains(c) || c.isAnnotationPresent(typeof(Deprecated)) || !(c.IsSubclassOf(typeof(Tokenizer)) || c.IsSubclassOf(typeof(TokenFilter)) || c.IsSubclassOf(typeof(CharFilter)))) { // deprecated ones are typically back compat hacks // don't waste time with abstract classes continue; } IDictionary <string, string> args = new Dictionary <string, string>(); args["luceneMatchVersion"] = TEST_VERSION_CURRENT.ToString(); if (c.IsSubclassOf(typeof(Tokenizer))) { string clazzName = c.SimpleName; assertTrue(clazzName.EndsWith("Tokenizer", StringComparison.Ordinal)); string simpleName = clazzName.Substring(0, clazzName.Length - 9); assertNotNull(TokenizerFactory.lookupClass(simpleName)); TokenizerFactory instance = null; try { instance = TokenizerFactory.forName(simpleName, args); assertNotNull(instance); if (instance is ResourceLoaderAware) { ((ResourceLoaderAware)instance).inform(loader); } assertSame(c, instance.create(new StringReader("")).GetType()); } catch (System.ArgumentException e) { if (e.InnerException is NoSuchMethodException) { // there is no corresponding ctor available throw e; } // TODO: For now pass because some factories have not yet a default config that always works } } else if (c.IsSubclassOf(typeof(TokenFilter))) { string clazzName = c.SimpleName; assertTrue(clazzName.EndsWith("Filter", StringComparison.Ordinal)); string simpleName = clazzName.Substring(0, clazzName.Length - (clazzName.EndsWith("TokenFilter", StringComparison.Ordinal) ? 11 : 6)); assertNotNull(TokenFilterFactory.lookupClass(simpleName)); TokenFilterFactory instance = null; try { instance = TokenFilterFactory.forName(simpleName, args); assertNotNull(instance); if (instance is ResourceLoaderAware) { ((ResourceLoaderAware)instance).inform(loader); } //JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: //ORIGINAL LINE: Class<? extends org.apache.lucene.analysis.TokenStream> createdClazz = instance.create(new KeywordTokenizer(new java.io.StringReader(""))).getClass(); Type <?> createdClazz = instance.create(new KeywordTokenizer(new StringReader(""))).GetType(); // only check instance if factory have wrapped at all! if (typeof(KeywordTokenizer) != createdClazz) { assertSame(c, createdClazz); } } catch (System.ArgumentException e) { if (e.InnerException is NoSuchMethodException) { // there is no corresponding ctor available throw e; } // TODO: For now pass because some factories have not yet a default config that always works } } else if (c.IsSubclassOf(typeof(CharFilter))) { string clazzName = c.SimpleName; assertTrue(clazzName.EndsWith("CharFilter", StringComparison.Ordinal)); string simpleName = clazzName.Substring(0, clazzName.Length - 10); assertNotNull(CharFilterFactory.lookupClass(simpleName)); CharFilterFactory instance = null; try { instance = CharFilterFactory.forName(simpleName, args); assertNotNull(instance); if (instance is ResourceLoaderAware) { ((ResourceLoaderAware)instance).inform(loader); } //JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: //ORIGINAL LINE: Class<? extends java.io.Reader> createdClazz = instance.create(new java.io.StringReader("")).getClass(); Type <?> createdClazz = instance.create(new StringReader("")).GetType(); // only check instance if factory have wrapped at all! if (typeof(StringReader) != createdClazz) { assertSame(c, createdClazz); } } catch (System.ArgumentException e) { if (e.InnerException is NoSuchMethodException) { // there is no corresponding ctor available throw e; } // TODO: For now pass because some factories have not yet a default config that always works } } } }
protected internal override Analyzer.TokenStreamComponents CreateComponents(string fieldName, Reader reader) { Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader); TokenStream stream = outerInstance.ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer; return(new Analyzer.TokenStreamComponents(tokenizer, stream)); }
private static TokenStream loadTokenizer(TokenizerFactory tokFactory, Reader reader) { return(tokFactory.create(reader)); }
public HandleClientRequest(TcpClient clientConnected, TokenizerFactory <T> tocFactory) { this._clientSocket = clientConnected; _tokenizer = tocFactory.create(); }