Пример #1
0
        // LUCENE-3849: make sure after .end() we see the "ending" posInc
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testEndStopword() throws Exception
        public virtual void testEndStopword()
        {
            CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of");
            StopFilter   stpf    = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("test of"), MockTokenizer.WHITESPACE, false), stopSet);

            assertTokenStreamContents(stpf, new string[] { "test" }, new int[] { 0 }, new int[] { 4 }, null, new int[] { 1 }, null, 7, 1, null, true);
        }
Пример #2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testStopFilt() throws java.io.IOException
        public virtual void testStopFilt()
        {
            StringReader reader = new StringReader("Now is The Time");

            string[]     stopWords = new string[] { "is", "the", "Time" };
            CharArraySet stopSet   = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
            TokenStream  stream    = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);

            assertTokenStreamContents(stream, new string[] { "Now", "The" });
        }
Пример #3
0
        /// <summary>
        /// Returns as <seealso cref="CharArraySet"/> from wordFiles, which
        /// can be a comma-separated list of filenames
        /// </summary>
        protected internal CharArraySet GetWordSet(ResourceLoader loader, string wordFiles, bool ignoreCase)
        {
            assureMatchVersion();
            IList <string> files = splitFileNames(wordFiles);
            CharArraySet   words = null;

            if (files.Count > 0)
            {
                // default stopwords list has 35 or so words, but maybe don't make it that
                // big to start
                words = new CharArraySet(luceneMatchVersion, files.Count * 10, ignoreCase);
                foreach (string file in files)
                {
                    var wlist = getLines(loader, file.Trim());
                    words.AddAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, ignoreCase));
                }
            }
            return(words);
        }
Пример #4
0
        public void inform(ResourceLoader loader)
        {
            string stopWordFiles = (string)args.get("words");

            ignoreCase = getBoolean("ignoreCase", false);
            enablePositionIncrements = getBoolean("enablePositionIncrements", false);

            if (stopWordFiles != null)
            {
                try {
                    List /*<String>*/ files = StrUtils.splitFileNames(stopWordFiles);
                    if (stopWords == null && files.size() > 0)
                    {
                        //default stopwords list has 35 or so words, but maybe don't make it that big to start
                        stopWords = new CharArraySet(files.size() * 10, ignoreCase);
                    }
                    for (var iter = files.iterator(); iter.hasNext();)
                    {
                        string            file  = (string)iter.next();
                        List /*<String>*/ wlist = loader.getLines(file.Trim());
                        //TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call
                        stopWords.addAll(StopFilter.makeStopSet((string[])wlist.toArray(new string[0]), ignoreCase));
                    }
                } catch (IOException e) {
                    throw new System.ApplicationException("Unexpected exception", e);
                }
            }
            else
            {
                //explicitly remove 'it' from the stop words list.
                //add in special case of 'careerone'

#pragma warning disable 612
                var stopWordsList = StopAnalyzer.ENGLISH_STOP_WORDS;
#pragma warning restore 612
                stopWordsList = stopWordsList.Where(s => s != "it").Concat(new[] { "careerone" }).ToArray();

                stopWords = (CharArraySet)StopFilter.makeStopSet(stopWordsList, ignoreCase);
            }
        }
Пример #5
0
        /// <summary>
        /// Test Position increments applied by StopFilter with and without enabling this option.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testStopPositons() throws java.io.IOException
        public virtual void testStopPositons()
        {
            StringBuilder sb = new StringBuilder();
            List <string> a  = new List <string>();

            for (int i = 0; i < 20; i++)
            {
                string w = English.intToEnglish(i).trim();
                sb.Append(w).Append(" ");
                if (i % 3 != 0)
                {
                    a.Add(w);
                }
            }
            log(sb.ToString());
            string[] stopWords = a.ToArray();
            for (int i = 0; i < a.Count; i++)
            {
                log("Stop: " + stopWords[i]);
            }
            CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
            // with increments
            StringReader reader = new StringReader(sb.ToString());
            StopFilter   stpf   = new StopFilter(Version.LUCENE_40, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);

            doTestStopPositons(stpf, true);
            // without increments
            reader = new StringReader(sb.ToString());
            stpf   = new StopFilter(Version.LUCENE_43, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
            doTestStopPositons(stpf, false);
            // with increments, concatenating two stop filters
            List <string> a0 = new List <string>();
            List <string> a1 = new List <string>();

            for (int i = 0; i < a.Count; i++)
            {
                if (i % 2 == 0)
                {
                    a0.Add(a[i]);
                }
                else
                {
                    a1.Add(a[i]);
                }
            }
            string[] stopWords0 = a0.ToArray();
            for (int i = 0; i < a0.Count; i++)
            {
                log("Stop0: " + stopWords0[i]);
            }
            string[] stopWords1 = a1.ToArray();
            for (int i = 0; i < a1.Count; i++)
            {
                log("Stop1: " + stopWords1[i]);
            }
            CharArraySet stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0);
            CharArraySet stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);

            reader = new StringReader(sb.ToString());
            StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0);     // first part of the set

            stpf0.EnablePositionIncrements = true;
            StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1);     // two stop filters concatenated!

            doTestStopPositons(stpf01, true);
        }