C# (CSharp) org.apache.lucene.analysis.compound.hyphenation.HyphenationTree示例

编程语言: C# (CSharp)

类/类型: org.apache.lucene.analysis.compound.hyphenation.HyphenationTree

hotexamples.com的示例: 6

C# (CSharp) org.apache.lucene.analysis.compound.hyphenation.HyphenationTree - 已找到6个示例。这些是从开源项目中提取的最受好评的org.apache.lucene.analysis.compound.hyphenation.HyphenationTree现实C# (CSharp)示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

0

显示文件

文件： TestCompoundWordTokenFilter.cs 项目： zfxsss/lucenenet

//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testHyphenationCompoundWordsDA() throws Exception
        public virtual void testHyphenationCompoundWordsDA()
        {
            CharArraySet dict = makeDictionary("læse", "hest");

            InputSource     @is        = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);

            HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);

            assertTokenStreamContents(tf, new string[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 });
        }

示例#2

0

显示文件

文件： TestCompoundWordTokenFilter.cs 项目： zfxsss/lucenenet

//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testHyphenationCompoundWordsDELongestMatch() throws Exception
        public virtual void testHyphenationCompoundWordsDELongestMatch()
        {
            CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");

            InputSource     @is        = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);

            // the word basket will not be added due to the longest match option
            HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);

            assertTokenStreamContents(tf, new string[] { "basketballkurv", "basketball", "ball", "kurv" }, new int[] { 1, 0, 0, 0 });
        }

示例#3

0

显示文件

文件： TestCompoundWordTokenFilter.cs 项目： zfxsss/lucenenet

//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testEmptyTerm() throws Exception
        public virtual void testEmptyTerm()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            Analyzer     a    = new AnalyzerAnonymousInnerClassHelper4(this, dict);

            checkOneTerm(a, "", "");

            InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
            Analyzer        b          = new AnalyzerAnonymousInnerClassHelper5(this, hyphenator);

            checkOneTerm(b, "", "");
        }

示例#4

0

显示文件

文件： TestCompoundWordTokenFilter.cs 项目： zfxsss/lucenenet

        /// <summary>
        /// With hyphenation-only, you can get a lot of nonsense tokens.
        /// This can be controlled with the min/max subword size.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testHyphenationOnly() throws Exception
        public virtual void testHyphenationOnly()
        {
            InputSource     @is        = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);

            HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 2, 4);

            // min=2, max=4
            assertTokenStreamContents(tf, new string[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" });

            tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 6);

            // min=4, max=6
            assertTokenStreamContents(tf, new string[] { "basketballkurv", "basket", "sket", "ball", "lkurv", "kurv" });

            tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 10);

            // min=4, max=10
            assertTokenStreamContents(tf, new string[] { "basketballkurv", "basket", "basketbal", "basketball", "sket", "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv" });
        }

示例#5

0

显示文件

文件： TestCompoundWordTokenFilter.cs 项目： zfxsss/lucenenet

 public AnalyzerAnonymousInnerClassHelper5(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator)
 {
     this.outerInstance = outerInstance;
     this.hyphenator    = hyphenator;
 }

示例#6

0

显示文件

文件： TestCompoundWordTokenFilter.cs 项目： WakeflyCBass/lucenenet

 public AnalyzerAnonymousInnerClassHelper5(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator)
 {
     this.outerInstance = outerInstance;
       this.hyphenator = hyphenator;
 }