protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
            {
                Tokenizer   tokenizer = new KeywordTokenizer(reader);
                TokenFilter filter    = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);

                return(new TokenStreamComponents(tokenizer, filter));
            }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testHyphenationCompoundWordsDA() throws Exception
        public virtual void testHyphenationCompoundWordsDA()
        {
            CharArraySet dict = makeDictionary("læse", "hest");

            InputSource     @is        = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);

            HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);

            assertTokenStreamContents(tf, new string[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 });
        }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testHyphenationCompoundWordsDELongestMatch() throws Exception
        public virtual void testHyphenationCompoundWordsDELongestMatch()
        {
            CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");

            InputSource     @is        = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);

            // the word basket will not be added due to the longest match option
            HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);

            assertTokenStreamContents(tf, new string[] { "basketballkurv", "basketball", "ball", "kurv" }, new int[] { 1, 0, 0, 0 });
        }
Beispiel #4
0
 public object Create(Random random)
 {
     // TODO: make nastier
     try
     {
         using (Stream @is = typeof(TestCompoundWordTokenFilter).getResourceAsStream("da_UTF8.xml"))
         {
             HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
             return(hyphenator);
         }
     }
     catch (Exception ex)
     {
         throw ex;
         return(null); // unreachable code
     }
 }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testEmptyTerm() throws Exception
        public virtual void testEmptyTerm()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            Analyzer     a    = new AnalyzerAnonymousInnerClassHelper4(this, dict);

            checkOneTerm(a, "", "");

            InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
            Analyzer        b          = new AnalyzerAnonymousInnerClassHelper5(this, hyphenator);

            checkOneTerm(b, "", "");
        }
            public object Create(Random random)
            {
                // TODO: make nastier
                try
                {
                    using Stream @is = typeof(TestCompoundWordTokenFilter).getResourceAsStream("da_UTF8.xml");
                    HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
                    return(hyphenator);
                }
                catch (Exception /*ex*/)
                {
                    throw;        // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
#pragma warning disable 162
                    return(null); // unreachable code

#pragma warning restore 162
                }
            }
        /// <summary>
        /// With hyphenation-only, you can get a lot of nonsense tokens.
        /// This can be controlled with the min/max subword size.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testHyphenationOnly() throws Exception
        public virtual void testHyphenationOnly()
        {
            InputSource     @is        = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);

            HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 2, 4);

            // min=2, max=4
            assertTokenStreamContents(tf, new string[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" });

            tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 6);

            // min=4, max=6
            assertTokenStreamContents(tf, new string[] { "basketballkurv", "basket", "sket", "ball", "lkurv", "kurv" });

            tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 10);

            // min=4, max=10
            assertTokenStreamContents(tf, new string[] { "basketballkurv", "basket", "basketbal", "basketball", "sket", "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv" });
        }
 protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
 {
     Tokenizer tokenizer = new KeywordTokenizer(reader);
     TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
     return new TokenStreamComponents(tokenizer, filter);
 }
        /// <summary>
        /// With hyphenation-only, you can get a lot of nonsense tokens.
        /// This can be controlled with the min/max subword size.
        /// </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testHyphenationOnly() throws Exception
        public virtual void testHyphenationOnly()
        {
            InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);

            HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 2, 4);

            // min=2, max=4
            assertTokenStreamContents(tf, new string[] {"basketballkurv", "ba", "sket", "bal", "ball", "kurv"});

            tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 6);

            // min=4, max=6
            assertTokenStreamContents(tf, new string[] {"basketballkurv", "basket", "sket", "ball", "lkurv", "kurv"});

            tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 10);

            // min=4, max=10
            assertTokenStreamContents(tf, new string[] {"basketballkurv", "basket", "basketbal", "basketball", "sket", "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv"});
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testHyphenationCompoundWordsDELongestMatch() throws Exception
        public virtual void testHyphenationCompoundWordsDELongestMatch()
        {
            CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");

            InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);

            // the word basket will not be added due to the longest match option
            HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
            assertTokenStreamContents(tf, new string[] {"basketballkurv", "basketball", "ball", "kurv"}, new int[] {1, 0, 0, 0});
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testHyphenationCompoundWordsDA() throws Exception
        public virtual void testHyphenationCompoundWordsDA()
        {
            CharArraySet dict = makeDictionary("læse", "hest");

            InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);

            HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
            assertTokenStreamContents(tf, new string[] {"min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 0, 0});
        }