//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testHyphenationCompoundWordsDA() throws Exception public virtual void testHyphenationCompoundWordsDA() { CharArraySet dict = makeDictionary("læse", "hest"); InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm()); HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is); HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false); assertTokenStreamContents(tf, new string[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testHyphenationCompoundWordsDELongestMatch() throws Exception public virtual void testHyphenationCompoundWordsDELongestMatch() { CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv"); InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm()); HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is); // the word basket will not be added due to the longest match option HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true); assertTokenStreamContents(tf, new string[] { "basketballkurv", "basketball", "ball", "kurv" }, new int[] { 1, 0, 0, 0 }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testEmptyTerm() throws Exception public virtual void testEmptyTerm() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def"); CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def"); Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this, dict); checkOneTerm(a, "", ""); InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm()); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is); HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is); Analyzer b = new AnalyzerAnonymousInnerClassHelper5(this, hyphenator); checkOneTerm(b, "", ""); }
/// <summary> /// With hyphenation-only, you can get a lot of nonsense tokens. /// This can be controlled with the min/max subword size. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testHyphenationOnly() throws Exception public virtual void testHyphenationOnly() { InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm()); HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is); HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 2, 4); // min=2, max=4 assertTokenStreamContents(tf, new string[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" }); tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 6); // min=4, max=6 assertTokenStreamContents(tf, new string[] { "basketballkurv", "basket", "sket", "ball", "lkurv", "kurv" }); tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 10); // min=4, max=10 assertTokenStreamContents(tf, new string[] { "basketballkurv", "basket", "basketbal", "basketball", "sket", "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv" }); }
public AnalyzerAnonymousInnerClassHelper5(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator) { this.outerInstance = outerInstance; this.hyphenator = hyphenator; }