Ejemplo n.º 1
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);

            if (mlParams != null)
            {
                if (!TrainUtil.isValid(mlParams.Settings))
                {
                    throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!");
                }

                if (TrainUtil.isSequenceTraining(mlParams.Settings))
                {
                    throw new TerminateToolException(1, "Sequence training is not supported!");
                }
            }

            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            File modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile);

            TokenizerModel model;

            try
            {
                Dictionary dict = loadDict(@params.AbbDict);

                TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null);
                model = opennlp.tools.tokenize.TokenizerME.train(sampleStream, tokFactory, mlParams);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("tokenizer", modelOutFile, model);
        }
Ejemplo n.º 2
0
        public override TokenStreamInfo getStream(string fieldName, Reader reader)
        {
            Tokenizer   tk = tokenizer.create(charStream(reader));
            TokenStream ts = tk;

            for (int i = 0; i < filters.Length; i++)
            {
                ts = filters[i].create(ts);
            }
            return(new TokenStreamInfo(tk, ts));
        }
Ejemplo n.º 3
0
            protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
            {
                Tokenizer tf = tokenizer.create(reader);

                if (tokenfilter != null)
                {
                    return(new TokenStreamComponents(tf, tokenfilter.create(tf)));
                }
                else
                {
                    return(new TokenStreamComponents(tf));
                }
            }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            TokenizerCrossValidator validator;

            TokenizerEvaluationMonitor listener = null;

            if (@params.Misclassified.Value)
            {
                listener = new TokenEvaluationErrorListener();
            }

            try
            {
                Dictionary dict = TokenizerTrainerTool.loadDict(@params.AbbDict);

                TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null);
                validator = new TokenizerCrossValidator(mlParams, tokFactory, listener);

                validator.evaluate(sampleStream, @params.Folds.Value);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            FMeasure result = validator.FMeasure;

            Console.WriteLine(result.ToString());
        }
Ejemplo n.º 5
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void test() throws Exception
        public virtual void test()
        {
            IList <Type> analysisClasses = new List <Type>();

            ((List <Type>)analysisClasses).AddRange(TestRandomChains.getClassesForPackage("org.apache.lucene.analysis"));
            ((List <Type>)analysisClasses).AddRange(TestRandomChains.getClassesForPackage("org.apache.lucene.collation"));

            foreach (Class c in analysisClasses)
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int modifiers = c.getModifiers();
                int modifiers = c.Modifiers;
                if (Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers) || c.Synthetic || c.AnonymousClass || c.MemberClass || c.Interface || testComponents.Contains(c) || crazyComponents.Contains(c) || oddlyNamedComponents.Contains(c) || deprecatedDuplicatedComponents.Contains(c) || c.isAnnotationPresent(typeof(Deprecated)) || !(c.IsSubclassOf(typeof(Tokenizer)) || c.IsSubclassOf(typeof(TokenFilter)) || c.IsSubclassOf(typeof(CharFilter))))
                {   // deprecated ones are typically back compat hacks
                    // don't waste time with abstract classes
                    continue;
                }

                IDictionary <string, string> args = new Dictionary <string, string>();
                args["luceneMatchVersion"] = TEST_VERSION_CURRENT.ToString();

                if (c.IsSubclassOf(typeof(Tokenizer)))
                {
                    string clazzName = c.SimpleName;
                    assertTrue(clazzName.EndsWith("Tokenizer", StringComparison.Ordinal));
                    string simpleName = clazzName.Substring(0, clazzName.Length - 9);
                    assertNotNull(TokenizerFactory.lookupClass(simpleName));
                    TokenizerFactory instance = null;
                    try
                    {
                        instance = TokenizerFactory.forName(simpleName, args);
                        assertNotNull(instance);
                        if (instance is ResourceLoaderAware)
                        {
                            ((ResourceLoaderAware)instance).inform(loader);
                        }
                        assertSame(c, instance.create(new StringReader("")).GetType());
                    }
                    catch (System.ArgumentException e)
                    {
                        if (e.InnerException is NoSuchMethodException)
                        {
                            // there is no corresponding ctor available
                            throw e;
                        }
                        // TODO: For now pass because some factories have not yet a default config that always works
                    }
                }
                else if (c.IsSubclassOf(typeof(TokenFilter)))
                {
                    string clazzName = c.SimpleName;
                    assertTrue(clazzName.EndsWith("Filter", StringComparison.Ordinal));
                    string simpleName = clazzName.Substring(0, clazzName.Length - (clazzName.EndsWith("TokenFilter", StringComparison.Ordinal) ? 11 : 6));
                    assertNotNull(TokenFilterFactory.lookupClass(simpleName));
                    TokenFilterFactory instance = null;
                    try
                    {
                        instance = TokenFilterFactory.forName(simpleName, args);
                        assertNotNull(instance);
                        if (instance is ResourceLoaderAware)
                        {
                            ((ResourceLoaderAware)instance).inform(loader);
                        }
//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
//ORIGINAL LINE: Class<? extends org.apache.lucene.analysis.TokenStream> createdClazz = instance.create(new KeywordTokenizer(new java.io.StringReader(""))).getClass();
                        Type <?> createdClazz = instance.create(new KeywordTokenizer(new StringReader(""))).GetType();
                        // only check instance if factory have wrapped at all!
                        if (typeof(KeywordTokenizer) != createdClazz)
                        {
                            assertSame(c, createdClazz);
                        }
                    }
                    catch (System.ArgumentException e)
                    {
                        if (e.InnerException is NoSuchMethodException)
                        {
                            // there is no corresponding ctor available
                            throw e;
                        }
                        // TODO: For now pass because some factories have not yet a default config that always works
                    }
                }
                else if (c.IsSubclassOf(typeof(CharFilter)))
                {
                    string clazzName = c.SimpleName;
                    assertTrue(clazzName.EndsWith("CharFilter", StringComparison.Ordinal));
                    string simpleName = clazzName.Substring(0, clazzName.Length - 10);
                    assertNotNull(CharFilterFactory.lookupClass(simpleName));
                    CharFilterFactory instance = null;
                    try
                    {
                        instance = CharFilterFactory.forName(simpleName, args);
                        assertNotNull(instance);
                        if (instance is ResourceLoaderAware)
                        {
                            ((ResourceLoaderAware)instance).inform(loader);
                        }
//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
//ORIGINAL LINE: Class<? extends java.io.Reader> createdClazz = instance.create(new java.io.StringReader("")).getClass();
                        Type <?> createdClazz = instance.create(new StringReader("")).GetType();
                        // only check instance if factory have wrapped at all!
                        if (typeof(StringReader) != createdClazz)
                        {
                            assertSame(c, createdClazz);
                        }
                    }
                    catch (System.ArgumentException e)
                    {
                        if (e.InnerException is NoSuchMethodException)
                        {
                            // there is no corresponding ctor available
                            throw e;
                        }
                        // TODO: For now pass because some factories have not yet a default config that always works
                    }
                }
            }
        }
Ejemplo n.º 6
0
            protected internal override Analyzer.TokenStreamComponents CreateComponents(string fieldName, Reader reader)
            {
                Tokenizer   tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
                TokenStream stream    = outerInstance.ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;

                return(new Analyzer.TokenStreamComponents(tokenizer, stream));
            }
Ejemplo n.º 7
0
 private static TokenStream loadTokenizer(TokenizerFactory tokFactory, Reader reader)
 {
     return(tokFactory.create(reader));
 }
Ejemplo n.º 8
0
 public HandleClientRequest(TcpClient clientConnected, TokenizerFactory <T> tocFactory)
 {
     this._clientSocket = clientConnected;
     _tokenizer         = tocFactory.create();
 }