//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testInform() throws Exception public virtual void testInform() { ResourceLoader loader = new ClasspathResourceLoader(this.GetType()); assertTrue("loader is null and it shouldn't be", loader != null); StopFilterFactory factory = (StopFilterFactory)tokenFilterFactory("Stop", "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.StopWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (StopFilterFactory)tokenFilterFactory("Stop", "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.StopWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (StopFilterFactory)tokenFilterFactory("Stop", "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.StopWords; assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); // defaults factory = (StopFilterFactory)tokenFilterFactory("Stop"); assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.StopWords); assertEquals(false, factory.IgnoreCase); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testInform() throws Exception public virtual void testInform() { ResourceLoader loader = new ClasspathResourceLoader(typeof(TestStopFilter)); assertTrue("loader is null and it shouldn't be", loader != null); CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory)tokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.CommonWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (CommonGramsQueryFilterFactory)tokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.CommonWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (CommonGramsQueryFilterFactory)tokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.CommonWords; assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); }
private int rule13(char[] s, int len) { if (len > 6 && endsWith(s, len, "ηθηκεσ")) { len -= 6; } else if (len > 5 && (endsWith(s, len, "ηθηκα") || endsWith(s, len, "ηθηκε"))) { len -= 5; } bool removed = false; if (len > 4 && endsWith(s, len, "ηκεσ")) { len -= 4; removed = true; } else if (len > 3 && (endsWith(s, len, "ηκα") || endsWith(s, len, "ηκε"))) { len -= 3; removed = true; } if (removed && (exc13.contains(s, 0, len) || endsWith(s, len, "σκωλ") || endsWith(s, len, "σκουλ") || endsWith(s, len, "ναρθ") || endsWith(s, len, "σφ") || endsWith(s, len, "οθ") || endsWith(s, len, "πιθ"))) { len += 2; // add back the -ηκ } return(len); }
private int rule7(char[] s, int len) { if (len == 5 && endsWith(s, len, "αγαμε")) { return(len - 1); } if (len > 7 && endsWith(s, len, "ηθηκαμε")) { len -= 7; } else if (len > 6 && endsWith(s, len, "ουσαμε")) { len -= 6; } else if (len > 5 && (endsWith(s, len, "αγαμε") || endsWith(s, len, "ησαμε") || endsWith(s, len, "ηκαμε"))) { len -= 5; } if (len > 3 && endsWith(s, len, "αμε")) { len -= 3; if (exc7.contains(s, 0, len)) { len += 2; // add back -αμ } } return(len); }
private int rule15(char[] s, int len) { bool removed = false; if (len > 4 && endsWith(s, len, "αγεσ")) { len -= 4; removed = true; } else if (len > 3 && (endsWith(s, len, "αγα") || endsWith(s, len, "αγε"))) { len -= 3; removed = true; } if (removed) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final boolean cond1 = exc15a.contains(s, 0, len) || endsWith(s, len, "οφ") || endsWith(s, len, "πελ") || endsWith(s, len, "χορτ") || endsWith(s, len, "λλ") || endsWith(s, len, "σφ") || endsWith(s, len, "ρπ") || endsWith(s, len, "φρ") || endsWith(s, len, "πρ") || endsWith(s, len, "λοχ") || endsWith(s, len, "σμην"); bool cond1 = exc15a.contains(s, 0, len) || endsWith(s, len, "οφ") || endsWith(s, len, "πελ") || endsWith(s, len, "χορτ") || endsWith(s, len, "λλ") || endsWith(s, len, "σφ") || endsWith(s, len, "ρπ") || endsWith(s, len, "φρ") || endsWith(s, len, "πρ") || endsWith(s, len, "λοχ") || endsWith(s, len, "σμην"); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final boolean cond2 = exc15b.contains(s, 0, len) || endsWith(s, len, "κολλ"); bool cond2 = exc15b.contains(s, 0, len) || endsWith(s, len, "κολλ"); if (cond1 && !cond2) { len += 2; // add back -αγ } } return(len); }
private int rule4(char[] s, int len) { if (len > 3 && (endsWith(s, len, "εωσ") || endsWith(s, len, "εων"))) { len -= 3; if (exc4.contains(s, 0, len)) { len++; // add back -ε } } return(len); }
/// <summary> /// If no words are provided, then a set of english default stopwords is used. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testDefaults() throws Exception public virtual void testDefaults() { CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory)tokenFilterFactory("CommonGramsQuery"); CharArraySet words = factory.CommonWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue(words.contains("the")); Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false); TokenStream stream = factory.create(tokenizer); assertTokenStreamContents(stream, new string[] { "testing_the", "the_factory" }); }
private int rule17(char[] s, int len) { if (len > 4 && endsWith(s, len, "ηστε")) { len -= 4; if (exc17.contains(s, 0, len)) { len += 3; // add back the -ηστ } } return(len); }
private int rule8(char[] s, int len) { bool removed = false; if (len > 8 && endsWith(s, len, "ιουντανε")) { len -= 8; removed = true; } else if (len > 7 && endsWith(s, len, "ιοντανε") || endsWith(s, len, "ουντανε") || endsWith(s, len, "ηθηκανε")) { len -= 7; removed = true; } else if (len > 6 && endsWith(s, len, "ιοτανε") || endsWith(s, len, "οντανε") || endsWith(s, len, "ουσανε")) { len -= 6; removed = true; } else if (len > 5 && endsWith(s, len, "αγανε") || endsWith(s, len, "ησανε") || endsWith(s, len, "οτανε") || endsWith(s, len, "ηκανε")) { len -= 5; removed = true; } if (removed && exc8a.contains(s, 0, len)) { // add -αγαν (we removed > 4 chars so its safe) len += 4; s[len - 4] = 'α'; s[len - 3] = 'γ'; s[len - 2] = 'α'; s[len - 1] = 'ν'; } if (len > 3 && endsWith(s, len, "ανε")) { len -= 3; if (endsWithVowelNoY(s, len) || exc8b.contains(s, 0, len)) { len += 2; // add back -αν } } return(len); }
private int rule9(char[] s, int len) { if (len > 5 && endsWith(s, len, "ησετε")) { len -= 5; } if (len > 3 && endsWith(s, len, "ετε")) { len -= 3; if (exc9.contains(s, 0, len) || endsWithVowelNoY(s, len) || endsWith(s, len, "οδ") || endsWith(s, len, "αιρ") || endsWith(s, len, "φορ") || endsWith(s, len, "ταθ") || endsWith(s, len, "διαθ") || endsWith(s, len, "σχ") || endsWith(s, len, "ενδ") || endsWith(s, len, "ευρ") || endsWith(s, len, "τιθ") || endsWith(s, len, "υπερθ") || endsWith(s, len, "ραθ") || endsWith(s, len, "ενθ") || endsWith(s, len, "ροθ") || endsWith(s, len, "σθ") || endsWith(s, len, "πυρ") || endsWith(s, len, "αιν") || endsWith(s, len, "συνδ") || endsWith(s, len, "συν") || endsWith(s, len, "συνθ") || endsWith(s, len, "χωρ") || endsWith(s, len, "πον") || endsWith(s, len, "βρ") || endsWith(s, len, "καθ") || endsWith(s, len, "ευθ") || endsWith(s, len, "εκθ") || endsWith(s, len, "νετ") || endsWith(s, len, "ρον") || endsWith(s, len, "αρκ") || endsWith(s, len, "βαρ") || endsWith(s, len, "βολ") || endsWith(s, len, "ωφελ")) { len += 2; // add back -ετ } } return(len); }
private int rule14(char[] s, int len) { bool removed = false; if (len > 5 && endsWith(s, len, "ουσεσ")) { len -= 5; removed = true; } else if (len > 4 && (endsWith(s, len, "ουσα") || endsWith(s, len, "ουσε"))) { len -= 4; removed = true; } if (removed && (exc14.contains(s, 0, len) || endsWithVowel(s, len) || endsWith(s, len, "ποδαρ") || endsWith(s, len, "βλεπ") || endsWith(s, len, "πανταχ") || endsWith(s, len, "φρυδ") || endsWith(s, len, "μαντιλ") || endsWith(s, len, "μαλλ") || endsWith(s, len, "κυματ") || endsWith(s, len, "λαχ") || endsWith(s, len, "ληγ") || endsWith(s, len, "φαγ") || endsWith(s, len, "ομ") || endsWith(s, len, "πρωτ"))) { len += 3; // add back -ουσ } return(len); }
private int rule12(char[] s, int len) { if (len > 5 && endsWith(s, len, "ιεστε")) { len -= 5; if (exc12a.contains(s, 0, len)) { len += 4; // add back -ιεστ } } if (len > 4 && endsWith(s, len, "εστε")) { len -= 4; if (exc12b.contains(s, 0, len)) { len += 3; // add back -εστ } } return(len); }
private int rule16(char[] s, int len) { bool removed = false; if (len > 4 && endsWith(s, len, "ησου")) { len -= 4; removed = true; } else if (len > 3 && (endsWith(s, len, "ησε") || endsWith(s, len, "ησα"))) { len -= 3; removed = true; } if (removed && exc16.contains(s, 0, len)) { len += 2; // add back -ησ } return(len); }
private int rule19(char[] s, int len) { bool removed = false; if (len > 6 && (endsWith(s, len, "ησουμε") || endsWith(s, len, "ηθουμε"))) { len -= 6; removed = true; } else if (len > 4 && endsWith(s, len, "ουμε")) { len -= 4; removed = true; } if (removed && exc19.contains(s, 0, len)) { len += 3; s[len - 3] = 'ο'; s[len - 2] = 'υ'; s[len - 1] = 'μ'; } return(len); }
private int rule6(char[] s, int len) { bool removed = false; if (len > 3 && (endsWith(s, len, "ικα") || endsWith(s, len, "ικο"))) { len -= 3; removed = true; } else if (len > 4 && (endsWith(s, len, "ικου") || endsWith(s, len, "ικων"))) { len -= 4; removed = true; } if (removed) { if (endsWithVowel(s, len) || exc6.contains(s, 0, len)) { len += 2; // add back -ικ } } return(len); }
/// <summary> /// Find the unique stem(s) of the provided word /// </summary> /// <param name="word"> Word to find the stems for </param> /// <returns> List of stems for the word </returns> public IList<CharsRef> uniqueStems(char[] word, int length) { IList<CharsRef> stems = stem(word, length); if (stems.Count < 2) { return stems; } CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase); IList<CharsRef> deduped = new List<CharsRef>(); foreach (CharsRef s in stems) { if (!terms.contains(s)) { deduped.Add(s); terms.add(s); } } return deduped; }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStopListPositions() throws java.io.IOException public virtual void testStopListPositions() { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); string s = "This is a good test of the english stop analyzer with positions"; int[] expectedIncr = new int[] {1, 1, 1, 3, 1, 1, 1, 2, 1}; TokenStream stream = newStop.tokenStream("test", s); try { assertNotNull(stream); int i = 0; CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute)); PositionIncrementAttribute posIncrAtt = stream.addAttribute(typeof(PositionIncrementAttribute)); stream.reset(); while (stream.incrementToken()) { string text = termAtt.ToString(); assertFalse(stopWordsSet.contains(text)); assertEquals(expectedIncr[i++],posIncrAtt.PositionIncrement); } stream.end(); } finally { IOUtils.closeWhileHandlingException(stream); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStopList() throws java.io.IOException public virtual void testStopList() { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer"); try { assertNotNull(stream); CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute)); stream.reset(); while (stream.incrementToken()) { string text = termAtt.ToString(); assertFalse(stopWordsSet.contains(text)); } stream.end(); } finally { IOUtils.closeWhileHandlingException(stream); } }