예제 #1
0
 /// <summary>
 /// Creates a <see cref="CharArraySet"/> from a file.
 /// </summary>
 /// <param name="stopwords">
 ///          the stopwords reader to load
 /// </param>
 /// <param name="matchVersion">
 ///          the Lucene version for cross version compatibility </param>
 /// <returns> a <see cref="CharArraySet"/> containing the distinct stopwords from the given
 ///         reader </returns>
 /// <exception cref="IOException">
 ///           if loading the stopwords throws an <see cref="IOException"/> </exception>
 protected static CharArraySet LoadStopwordSet(TextReader stopwords, LuceneVersion matchVersion)
 {
     try
     {
         return(WordlistLoader.GetWordSet(stopwords, matchVersion));
     }
     finally
     {
         IOUtils.Dispose(stopwords);
     }
 }
예제 #2
0
        /// <summary>
        /// Creates a <see cref="CharArraySet"/> from a file.
        /// </summary>
        /// <param name="stopwords">
        ///          the stopwords file to load
        /// </param>
        /// <param name="matchVersion">
        ///          the Lucene version for cross version compatibility </param>
        /// <returns> a <see cref="CharArraySet"/> containing the distinct stopwords from the given
        ///         file </returns>
        /// <exception cref="IOException">
        ///           if loading the stopwords throws an <see cref="IOException"/> </exception>
        protected static CharArraySet LoadStopwordSet(FileInfo stopwords, LuceneVersion matchVersion)
        {
            TextReader reader = null;

            try
            {
                reader = IOUtils.GetDecodingReader(stopwords, Encoding.UTF8);
                return(WordlistLoader.GetWordSet(reader, matchVersion));
            }
            finally
            {
                IOUtils.Dispose(reader);
            }
        }
예제 #3
0
        /// <summary>
        /// Creates a <see cref="CharArraySet"/> from an embedded resource associated with a class. (See
        /// <see cref="Assembly.GetManifestResourceStream(string)"/>).
        /// </summary>
        /// <param name="ignoreCase">
        ///          <c>true</c> if the set should ignore the case of the
        ///          stopwords, otherwise <c>false</c> </param>
        /// <param name="aClass">
        ///          a class that is associated with the given stopwordResource </param>
        /// <param name="resource">
        ///          name of the resource file associated with the given class </param>
        /// <param name="comment">
        ///          comment string to ignore in the stopword file </param>
        /// <returns> a <see cref="CharArraySet"/> containing the distinct stopwords from the given
        ///         file </returns>
        /// <exception cref="IOException">
        ///           if loading the stopwords throws an <see cref="IOException"/> </exception>
        protected static CharArraySet LoadStopwordSet(bool ignoreCase, Type aClass, string resource, string comment)
        {
            TextReader reader = null;

            try
            {
                var resourceStream = aClass.FindAndGetManifestResourceStream(resource);
                reader = IOUtils.GetDecodingReader(resourceStream, Encoding.UTF8);
                return(WordlistLoader.GetWordSet(reader, comment, new CharArraySet(
#pragma warning disable 612, 618
                                                     LuceneVersion.LUCENE_CURRENT, 16, ignoreCase)));

#pragma warning restore 612, 618
            }
            finally
            {
                IOUtils.Dispose(reader);
            }
        }
예제 #4
0
        /// <summary>
        /// Same as <see cref="GetWordSet(IResourceLoader, string, bool)"/>,
        /// except the input is in snowball format.
        /// </summary>
        protected CharArraySet GetSnowballWordSet(IResourceLoader loader, string wordFiles, bool ignoreCase)
        {
            AssureMatchVersion();
            IList <string> files = SplitFileNames(wordFiles);
            CharArraySet   words = null;

            if (files.Count > 0)
            {
                // default stopwords list has 35 or so words, but maybe don't make it that
                // big to start
                words = new CharArraySet(m_luceneMatchVersion, files.Count * 10, ignoreCase);
                foreach (string file in files)
                {
                    using Stream stream     = loader.OpenResource(file.Trim());
                    using TextReader reader = new StreamReader(stream, Encoding.UTF8);
                    WordlistLoader.GetSnowballWordSet(reader, words);
                }
            }
            return(words);
        }
예제 #5
0
 /// <summary>
 /// Returns the resource's lines (with content treated as UTF-8)
 /// </summary>
 protected IList <string> GetLines(IResourceLoader loader, string resource)
 {
     return(WordlistLoader.GetLines(loader.OpenResource(resource), Encoding.UTF8));
 }