/// <summary> /// Creates a <see cref="CharArraySet"/> from a file. /// </summary> /// <param name="stopwords"> /// the stopwords reader to load /// </param> /// <param name="matchVersion"> /// the Lucene version for cross version compatibility </param> /// <returns> a <see cref="CharArraySet"/> containing the distinct stopwords from the given /// reader </returns> /// <exception cref="IOException"> /// if loading the stopwords throws an <see cref="IOException"/> </exception> protected static CharArraySet LoadStopwordSet(TextReader stopwords, LuceneVersion matchVersion) { try { return(WordlistLoader.GetWordSet(stopwords, matchVersion)); } finally { IOUtils.Dispose(stopwords); } }
/// <summary> /// Creates a <see cref="CharArraySet"/> from a file. /// </summary> /// <param name="stopwords"> /// the stopwords file to load /// </param> /// <param name="matchVersion"> /// the Lucene version for cross version compatibility </param> /// <returns> a <see cref="CharArraySet"/> containing the distinct stopwords from the given /// file </returns> /// <exception cref="IOException"> /// if loading the stopwords throws an <see cref="IOException"/> </exception> protected static CharArraySet LoadStopwordSet(FileInfo stopwords, LuceneVersion matchVersion) { TextReader reader = null; try { reader = IOUtils.GetDecodingReader(stopwords, Encoding.UTF8); return(WordlistLoader.GetWordSet(reader, matchVersion)); } finally { IOUtils.Dispose(reader); } }
/// <summary> /// Creates a <see cref="CharArraySet"/> from an embedded resource associated with a class. (See /// <see cref="Assembly.GetManifestResourceStream(string)"/>). /// </summary> /// <param name="ignoreCase"> /// <c>true</c> if the set should ignore the case of the /// stopwords, otherwise <c>false</c> </param> /// <param name="aClass"> /// a class that is associated with the given stopwordResource </param> /// <param name="resource"> /// name of the resource file associated with the given class </param> /// <param name="comment"> /// comment string to ignore in the stopword file </param> /// <returns> a <see cref="CharArraySet"/> containing the distinct stopwords from the given /// file </returns> /// <exception cref="IOException"> /// if loading the stopwords throws an <see cref="IOException"/> </exception> protected static CharArraySet LoadStopwordSet(bool ignoreCase, Type aClass, string resource, string comment) { TextReader reader = null; try { var resourceStream = aClass.FindAndGetManifestResourceStream(resource); reader = IOUtils.GetDecodingReader(resourceStream, Encoding.UTF8); return(WordlistLoader.GetWordSet(reader, comment, new CharArraySet( #pragma warning disable 612, 618 LuceneVersion.LUCENE_CURRENT, 16, ignoreCase))); #pragma warning restore 612, 618 } finally { IOUtils.Dispose(reader); } }
/// <summary> /// Same as <see cref="GetWordSet(IResourceLoader, string, bool)"/>, /// except the input is in snowball format. /// </summary> protected CharArraySet GetSnowballWordSet(IResourceLoader loader, string wordFiles, bool ignoreCase) { AssureMatchVersion(); IList <string> files = SplitFileNames(wordFiles); CharArraySet words = null; if (files.Count > 0) { // default stopwords list has 35 or so words, but maybe don't make it that // big to start words = new CharArraySet(m_luceneMatchVersion, files.Count * 10, ignoreCase); foreach (string file in files) { using Stream stream = loader.OpenResource(file.Trim()); using TextReader reader = new StreamReader(stream, Encoding.UTF8); WordlistLoader.GetSnowballWordSet(reader, words); } } return(words); }
/// <summary> /// Returns the resource's lines (with content treated as UTF-8) /// </summary> protected IList <string> GetLines(IResourceLoader loader, string resource) { return(WordlistLoader.GetLines(loader.OpenResource(resource), Encoding.UTF8)); }