protected internal override TokenStreamComponents CreateComponents(string fieldName) { Called = true; Tokenizer source = new WhitespaceTokenizer(); return(new TokenStreamComponents(source, new LowerCaseFilter(source))); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRetainMockAttribute() throws Exception public virtual void testRetainMockAttribute() { CharArraySet dict = makeDictionary("abc", "d", "efg"); Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg")); TokenStream stream = new MockRetainAttributeFilter(tokenizer); stream = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, stream, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false); MockRetainAttribute retAtt = stream.addAttribute(typeof(MockRetainAttribute)); stream.reset(); while (stream.incrementToken()) { assertTrue("Custom attribute value was lost", retAtt.Retain); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReset() throws Exception public virtual void testReset() { CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "Überwachung"); Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Rindfleischüberwachungsgesetz")); DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false); CharTermAttribute termAtt = tf.getAttribute(typeof(CharTermAttribute)); tf.reset(); assertTrue(tf.incrementToken()); assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString()); assertTrue(tf.incrementToken()); assertEquals("Rind", termAtt.ToString()); tf.end(); tf.close(); wsTokenizer.Reader = new StringReader("Rindfleischüberwachungsgesetz"); tf.reset(); assertTrue(tf.incrementToken()); assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString()); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testQueryReset() throws Exception public virtual void testQueryReset() { const string input = "How the s a brown s cow d like A B thing?"; WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf); CharTermAttribute term = wt.addAttribute(typeof(CharTermAttribute)); nsf.reset(); assertTrue(nsf.incrementToken()); assertEquals("How_the", term.ToString()); assertTrue(nsf.incrementToken()); assertEquals("the_s", term.ToString()); nsf.close(); wt.Reader = new StringReader(input); nsf.reset(); assertTrue(nsf.incrementToken()); assertEquals("How_the", term.ToString()); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomRealisticWhiteSpace() throws java.io.IOException public virtual void testRandomRealisticWhiteSpace() { IDictionary<string, string> map = new Dictionary<string, string>(); int numTerms = atLeast(50); for (int i = 0; i < numTerms; i++) { string randomRealisticUnicodeString = TestUtil.randomRealisticUnicodeString(random()); char[] charArray = randomRealisticUnicodeString.ToCharArray(); StringBuilder builder = new StringBuilder(); for (int j = 0; j < charArray.Length;) { int cp = char.codePointAt(charArray, j, charArray.Length); if (!char.IsWhiteSpace(cp)) { builder.appendCodePoint(cp); } j += char.charCount(cp); } if (builder.Length > 0) { string value = TestUtil.randomSimpleString(random()); map[builder.ToString()] = value.Length == 0 ? "a" : value; } } if (map.Count == 0) { map["booked"] = "books"; } StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean()); ISet<KeyValuePair<string, string>> entrySet = map.SetOfKeyValuePairs(); StringBuilder input = new StringBuilder(); IList<string> output = new List<string>(); foreach (KeyValuePair<string, string> entry in entrySet) { builder.add(entry.Key, entry.Value); if (random().nextBoolean() || output.Count == 0) { input.Append(entry.Key).Append(" "); output.Add(entry.Value); } } Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input.ToString())); TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.build())); assertTokenStreamContents(stream, output.ToArray()); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReset() throws Exception public virtual void testReset() { Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("please divide this sentence")); TokenStream filter = new ShingleFilter(wsTokenizer, 2); assertTokenStreamContents(filter, new string[]{"please","please divide","divide","divide this","this","this sentence","sentence"}, new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27}, new string[]{TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE}, new int[]{1,0,1,0,1,0,1}); wsTokenizer.Reader = new StringReader("please divide this sentence"); assertTokenStreamContents(filter, new string[]{"please","please divide","divide","divide this","this","this sentence","sentence"}, new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27}, new string[]{TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE,"shingle",TypeAttribute.DEFAULT_TYPE}, new int[]{1,0,1,0,1,0,1}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReset() throws Exception public virtual void testReset() { WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde")); NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1); assertTokenStreamContents(filter, new string[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0}); tokenizer.Reader = new StringReader("abcde"); assertTokenStreamContents(filter, new string[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRetainMockAttribute() throws Exception public virtual void testRetainMockAttribute() { CharArraySet dict = makeDictionary("abc", "d", "efg"); Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg")); TokenStream stream = new MockRetainAttributeFilter(tokenizer); stream = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, stream, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false); MockRetainAttribute retAtt = stream.addAttribute(typeof(MockRetainAttribute)); stream.reset(); while (stream.incrementToken()) { assertTrue("Custom attribute value was lost", retAtt.Retain); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReset() throws Exception public virtual void testReset() { CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "Überwachung"); Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Rindfleischüberwachungsgesetz")); DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false); CharTermAttribute termAtt = tf.getAttribute(typeof(CharTermAttribute)); tf.reset(); assertTrue(tf.incrementToken()); assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString()); assertTrue(tf.incrementToken()); assertEquals("Rind", termAtt.ToString()); tf.end(); tf.close(); wsTokenizer.Reader = new StringReader("Rindfleischüberwachungsgesetz"); tf.reset(); assertTrue(tf.incrementToken()); assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString()); }
public override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader); return new TokenStreamComponents(tokenizer, new ChineseFilter(tokenizer)); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReset() throws Exception public virtual void testReset() { WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde")); EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3); assertTokenStreamContents(filter, new string[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5}); tokenizer.Reader = new StringReader("abcde"); assertTokenStreamContents(filter, new string[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReset() throws Exception public virtual void testReset() { const string input = "How the s a brown s cow d like A B thing?"; WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CharTermAttribute term = cgf.addAttribute(typeof(CharTermAttribute)); cgf.reset(); assertTrue(cgf.incrementToken()); assertEquals("How", term.ToString()); assertTrue(cgf.incrementToken()); assertEquals("How_the", term.ToString()); assertTrue(cgf.incrementToken()); assertEquals("the", term.ToString()); assertTrue(cgf.incrementToken()); assertEquals("the_s", term.ToString()); cgf.close(); wt.Reader = new StringReader(input); cgf.reset(); assertTrue(cgf.incrementToken()); assertEquals("How", term.ToString()); }