// LUCENE-3849: make sure after .end() we see the "ending" posInc //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testEndStopword() throws Exception public virtual void testEndStopword() { CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of"); StopFilter stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("test of"), MockTokenizer.WHITESPACE, false), stopSet); assertTokenStreamContents(stpf, new string[] { "test" }, new int[] { 0 }, new int[] { 4 }, null, new int[] { 1 }, null, 7, 1, null, true); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStopFilt() throws java.io.IOException public virtual void testStopFilt() { StringReader reader = new StringReader("Now is The Time"); string[] stopWords = new string[] { "is", "the", "Time" }; CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords); TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet); assertTokenStreamContents(stream, new string[] { "Now", "The" }); }
/// <summary> /// Returns as <seealso cref="CharArraySet"/> from wordFiles, which /// can be a comma-separated list of filenames /// </summary> protected internal CharArraySet GetWordSet(ResourceLoader loader, string wordFiles, bool ignoreCase) { assureMatchVersion(); IList <string> files = splitFileNames(wordFiles); CharArraySet words = null; if (files.Count > 0) { // default stopwords list has 35 or so words, but maybe don't make it that // big to start words = new CharArraySet(luceneMatchVersion, files.Count * 10, ignoreCase); foreach (string file in files) { var wlist = getLines(loader, file.Trim()); words.AddAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, ignoreCase)); } } return(words); }
public void inform(ResourceLoader loader) { string stopWordFiles = (string)args.get("words"); ignoreCase = getBoolean("ignoreCase", false); enablePositionIncrements = getBoolean("enablePositionIncrements", false); if (stopWordFiles != null) { try { List /*<String>*/ files = StrUtils.splitFileNames(stopWordFiles); if (stopWords == null && files.size() > 0) { //default stopwords list has 35 or so words, but maybe don't make it that big to start stopWords = new CharArraySet(files.size() * 10, ignoreCase); } for (var iter = files.iterator(); iter.hasNext();) { string file = (string)iter.next(); List /*<String>*/ wlist = loader.getLines(file.Trim()); //TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call stopWords.addAll(StopFilter.makeStopSet((string[])wlist.toArray(new string[0]), ignoreCase)); } } catch (IOException e) { throw new System.ApplicationException("Unexpected exception", e); } } else { //explicitly remove 'it' from the stop words list. //add in special case of 'careerone' #pragma warning disable 612 var stopWordsList = StopAnalyzer.ENGLISH_STOP_WORDS; #pragma warning restore 612 stopWordsList = stopWordsList.Where(s => s != "it").Concat(new[] { "careerone" }).ToArray(); stopWords = (CharArraySet)StopFilter.makeStopSet(stopWordsList, ignoreCase); } }
/// <summary> /// Test Position increments applied by StopFilter with and without enabling this option. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStopPositons() throws java.io.IOException public virtual void testStopPositons() { StringBuilder sb = new StringBuilder(); List <string> a = new List <string>(); for (int i = 0; i < 20; i++) { string w = English.intToEnglish(i).trim(); sb.Append(w).Append(" "); if (i % 3 != 0) { a.Add(w); } } log(sb.ToString()); string[] stopWords = a.ToArray(); for (int i = 0; i < a.Count; i++) { log("Stop: " + stopWords[i]); } CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords); // with increments StringReader reader = new StringReader(sb.ToString()); StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet); doTestStopPositons(stpf, true); // without increments reader = new StringReader(sb.ToString()); stpf = new StopFilter(Version.LUCENE_43, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet); doTestStopPositons(stpf, false); // with increments, concatenating two stop filters List <string> a0 = new List <string>(); List <string> a1 = new List <string>(); for (int i = 0; i < a.Count; i++) { if (i % 2 == 0) { a0.Add(a[i]); } else { a1.Add(a[i]); } } string[] stopWords0 = a0.ToArray(); for (int i = 0; i < a0.Count; i++) { log("Stop0: " + stopWords0[i]); } string[] stopWords1 = a1.ToArray(); for (int i = 0; i < a1.Count; i++) { log("Stop1: " + stopWords1[i]); } CharArraySet stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0); CharArraySet stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1); reader = new StringReader(sb.ToString()); StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set stpf0.EnablePositionIncrements = true; StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated! doTestStopPositons(stpf01, true); }