private void assertClasspathDelegation(IResourceLoader rl)
        {
            //var englishStopText = System.IO.Path.Combine(analysisCommonFolder.FullName, @"Analysis\Snowball\english_stop.txt");
            // LUCENENET specific - rather than being completely dependent on the location of the file
            // in the file system, we use the embedded resource to write the file to a known location
            // before passing it to our resource loader.
            string englishStopFile = "english_stop.txt";
            var    file            = CreateTempFile(System.IO.Path.GetFileNameWithoutExtension(englishStopFile), System.IO.Path.GetExtension(englishStopFile));

            using (var stream = typeof(Snowball.SnowballFilter).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(Snowball.SnowballFilter), englishStopFile))
            {
                using (var outputStream = new FileStream(file.FullName, FileMode.OpenOrCreate, FileAccess.Write))
                {
                    stream.CopyTo(outputStream);
                }
            }
            // try a stopwords file from classpath
            CharArraySet set = WordlistLoader.GetSnowballWordSet(new System.IO.StreamReader(rl.OpenResource(file.FullName), Encoding.UTF8), TEST_VERSION_CURRENT);

            assertTrue(set.contains("you"));
            // try to load a class; we use string comparison because classloader may be different...
            assertEquals("Lucene.Net.Analysis.Util.RollingCharBuffer", rl.NewInstance <object>("Lucene.Net.Analysis.Util.RollingCharBuffer").ToString());
            // theoretically classes should also be loadable:
            //IOUtils.CloseWhileHandlingException(rl.OpenResource("java/lang/String.class")); // LUCENENET TODO: Not sure what the equivalent to this is (or if there is one).
        }
Example #2
0
        private void assertClasspathDelegation(IResourceLoader rl)
        {
            // try a stopwords file from classpath
            CharArraySet set = WordlistLoader.GetSnowballWordSet(new System.IO.StreamReader(rl.OpenResource(System.IO.Path.GetFullPath(@"..\..\..\Lucene.Net.Analysis.Common\Analysis\Snowball\english_stop.txt")), Encoding.UTF8), TEST_VERSION_CURRENT);

            assertTrue(set.contains("you"));
            // try to load a class; we use string comparison because classloader may be different...
            assertEquals("Lucene.Net.Analysis.Util.RollingCharBuffer", rl.NewInstance <object>("Lucene.Net.Analysis.Util.RollingCharBuffer").ToString());
            // theoretically classes should also be loadable:
            //IOUtils.CloseWhileHandlingException(rl.OpenResource("java/lang/String.class")); // LUCENENET TODO: Not sure what the equivalent to this is (or if there is one).
        }
Example #3
0
        public virtual void TestSnowballListLoading()
        {
            string       s       = "|comment\n" + " |comment\n" + "\n" + "  \t\n" + " |comment | comment\n" + "ONE\n" + "   two   \n" + " three   four five \n" + "six seven | comment\n"; //multiple stopwords + comment -  multiple stopwords -  stopword with leading/trailing space -  stopword, in uppercase -  commented line with comment -  line with only whitespace -  blank line -  commented line with leading whitespace -  commented line
            CharArraySet wordset = WordlistLoader.GetSnowballWordSet(new StringReader(s), TEST_VERSION_CURRENT);

            assertEquals(7, wordset.size());
            assertTrue(wordset.contains("ONE"));
            assertTrue(wordset.contains("two"));
            assertTrue(wordset.contains("three"));
            assertTrue(wordset.contains("four"));
            assertTrue(wordset.contains("five"));
            assertTrue(wordset.contains("six"));
            assertTrue(wordset.contains("seven"));
        }
        /// <summary>
        /// Same as <see cref="GetWordSet(IResourceLoader, string, bool)"/>,
        /// except the input is in snowball format.
        /// </summary>
        protected CharArraySet GetSnowballWordSet(IResourceLoader loader, string wordFiles, bool ignoreCase)
        {
            AssureMatchVersion();
            IList <string> files = SplitFileNames(wordFiles);
            CharArraySet   words = null;

            if (files.Count > 0)
            {
                // default stopwords list has 35 or so words, but maybe don't make it that
                // big to start
                words = new CharArraySet(m_luceneMatchVersion, files.Count * 10, ignoreCase);
                foreach (string file in files)
                {
                    using (Stream stream = loader.OpenResource(file.Trim()))
                        using (TextReader reader = new StreamReader(stream, Encoding.UTF8))
                        {
                            WordlistLoader.GetSnowballWordSet(reader, words);
                        }
                }
            }
            return(words);
        }