public virtual void Test() { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); cas.add("jjp"); cas.add("wlmwoknt"); cas.add("tcgyreo"); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.Add("mtqlpi", ""); builder.Add("mwoknt", "jjp"); builder.Add("tcgyreo", "zpfpajyws"); NormalizeCharMap map = builder.Build(); Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer t = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65); TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas); return(new TokenStreamComponents(t, f)); }, initReader: (fieldName, reader) => { reader = new MockCharFilter(reader, 0); reader = new MappingCharFilter(map, reader); return(reader); }); CheckAnalysisConsistency(Random, a, false, "wmgddzunizdomqyj"); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer t = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65); TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas); return(new TokenStreamComponents(t, f)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords); return(new TokenStreamComponents(t, new CommonGramsQueryFilter(cgf))); }
/// <summary> /// Test that CommonGramsFilter works correctly in case-insensitive mode /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testCaseSensitive() throws Exception public virtual void testCaseSensitive() { const string input = "How The s a brown s cow d like A B thing?"; MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false); TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); assertTokenStreamContents(cgf, new string[] {"How", "The", "The_s", "s", "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow", "cow_d", "d", "d_like", "like", "A", "B", "thing?"}); }
/// <summary> /// Test that CommonGramsFilter works correctly in case-insensitive mode /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testCaseSensitive() throws Exception public virtual void testCaseSensitive() { const string input = "How The s a brown s cow d like A B thing?"; MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false); TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); assertTokenStreamContents(cgf, new string[] { "How", "The", "The_s", "s", "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow", "cow_d", "d", "d_like", "like", "A", "B", "thing?" }); }
/// <summary> /// Test CommonGramsQueryFilter when first and last words are stopwords. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void TestFirstAndLastStopWord() throws Exception public virtual void TestFirstAndLastStopWord() { const string input = "the of"; MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new string[] { "the_of" }); }
/// <summary> /// Test CommonGramsQueryFilter in the case of a single word query /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testOneWordQuery() throws Exception public virtual void testOneWordQuery() { const string input = "monster"; MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new string[] { "monster" }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testQueryReset() throws Exception public virtual void testQueryReset() { const string input = "How the s a brown s cow d like A B thing?"; WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf); CharTermAttribute term = wt.addAttribute(typeof(CharTermAttribute)); nsf.reset(); assertTrue(nsf.incrementToken()); assertEquals("How_the", term.ToString()); assertTrue(nsf.incrementToken()); assertEquals("the_s", term.ToString()); nsf.close(); wt.Reader = new StringReader(input); nsf.reset(); assertTrue(nsf.incrementToken()); assertEquals("How_the", term.ToString()); }
static object[] NewFilterArgs(Random random, TokenStream stream, Type[] paramTypes) { object[] args = new object[paramTypes.Length]; for (int i = 0; i < args.Length; i++) { Type paramType = paramTypes[i]; if (paramType == typeof(TokenStream)) { args[i] = stream; } else if (paramType == typeof(CommonGramsFilter)) { // TODO: fix this one, thats broken: CommonGramsQueryFilter takes this one explicitly args[i] = new CommonGramsFilter(TEST_VERSION_CURRENT, stream, NewRandomArg <CharArraySet>(random, typeof(CharArraySet))); } else { args[i] = NewRandomArg <object>(random, paramType); } } return(args); }
/// <summary> /// Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter /// </summary> public override TokenFilter create(TokenStream input) { CommonGramsFilter commonGrams = (CommonGramsFilter)base.create(input); return(new CommonGramsQueryFilter(commonGrams)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords); return new TokenStreamComponents(t, new CommonGramsQueryFilter(cgf)); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReset() throws Exception public virtual void testReset() { const string input = "How the s a brown s cow d like A B thing?"; WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CharTermAttribute term = cgf.addAttribute(typeof(CharTermAttribute)); cgf.reset(); assertTrue(cgf.incrementToken()); assertEquals("How", term.ToString()); assertTrue(cgf.incrementToken()); assertEquals("How_the", term.ToString()); assertTrue(cgf.incrementToken()); assertEquals("the", term.ToString()); assertTrue(cgf.incrementToken()); assertEquals("the_s", term.ToString()); cgf.close(); wt.Reader = new StringReader(input); cgf.reset(); assertTrue(cgf.incrementToken()); assertEquals("How", term.ToString()); }
/// <summary> /// Test CommonGramsQueryFilter in the case of a single (stop)word query /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testOneWordQueryStopWord() throws Exception public virtual void testOneWordQueryStopWord() { const string input = "the"; MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new string[] {"the"}); }