/* * Loads stopwords hash from resource stream (file, database...). * @param wordfile File containing the wordlist * @param encoding Encoding used (win-1250, iso-8859-2, ...), null for default system encoding * @deprecated use {@link WordlistLoader#getWordSet(Reader, String) } * and {@link #CzechAnalyzer(Version, Set)} instead */ public void LoadStopWords(Stream wordfile, System.Text.Encoding encoding) { PreviousTokenStream = null; // force a new stopfilter to be created if (wordfile == null) { stoptable = Support.Compatibility.SetFactory.CreateHashSet <string>(); return; } try { // clear any previous table (if present) stoptable = Support.Compatibility.SetFactory.CreateHashSet <string>(); StreamReader isr; if (encoding == null) { isr = new StreamReader(wordfile); } else { isr = new StreamReader(wordfile, encoding); } stoptable = WordlistLoader.GetWordSet(isr); } catch (IOException) { // clear any previous table (if present) // TODO: throw IOException stoptable = Support.Compatibility.SetFactory.CreateHashSet <string>(); } }
static DefaultsHolder() { try { DEFAULT_STOP_SET = WordlistLoader.GetWordSet(IOUtils.GetDecodingReader(typeof(PolishAnalyzer), DEFAULT_STOPWORD_FILE, Encoding.UTF8), "#", #pragma warning disable 612, 618 LuceneVersion.LUCENE_CURRENT); #pragma warning restore 612, 618 } catch (IOException ex) { // default set should always be present as it is part of the // distribution (embedded resource) throw new InvalidOperationException("Unable to load default stopword set", ex); } try { DEFAULT_TABLE = StempelStemmer.Load(typeof(PolishAnalyzer).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(PolishAnalyzer), DEFAULT_STEMMER_FILE)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (embedded resource) throw new InvalidOperationException("Unable to load default stemming tables", ex); } }
internal static ISet <string> LoadDefaultStopWordSet() { using (StreamReader reader = new StreamReader(System.Reflection.Assembly.GetAssembly(typeof(ArabicAnalyzer)).GetManifestResourceStream("Lucene.Net.Analysis.AR." + DEFAULT_STOPWORD_FILE))) { return(CharArraySet.UnmodifiableSet(CharArraySet.Copy(WordlistLoader.GetWordSet(reader, STOPWORDS_COMMENT)))); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testComments() throws Exception public virtual void testComments() { string s = "ONE\n two \nthree\n#comment"; CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#", TEST_VERSION_CURRENT); checkSet(wordSet1); assertFalse(wordSet1.contains("#comment")); assertFalse(wordSet1.contains("comment")); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testWordlistLoading() throws java.io.IOException public virtual void testWordlistLoading() { string s = "ONE\n two \nthree"; CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), TEST_VERSION_CURRENT); checkSet(wordSet1); CharArraySet wordSet2 = WordlistLoader.getWordSet(new System.IO.StreamReader(new StringReader(s)), TEST_VERSION_CURRENT); checkSet(wordSet2); }
internal static CharArraySet LoadDefaultStopWordSet() { // make sure it is unmodifiable as we expose it in the outer class return(CharArraySet.UnmodifiableSet(WordlistLoader.GetWordSet(IOUtils .GetDecodingReader(typeof(SmartChineseAnalyzer), DEFAULT_STOPWORD_FILE, Encoding.UTF8), STOPWORD_FILE_COMMENT, #pragma warning disable 612, 618 LuceneVersion.LUCENE_CURRENT))); #pragma warning restore 612, 618 }
public void SetStemExclusionTable(FileInfo exclusionlist) { try { ExclusionTable = WordlistLoader.GetWordSet(exclusionlist); PreviousTokenStream = null; } catch (IOException ex) { throw new Exception("", ex); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void assertClasspathDelegation(ResourceLoader rl) throws Exception private void assertClasspathDelegation(ResourceLoader rl) { // try a stopwords file from classpath CharArraySet set = WordlistLoader.getSnowballWordSet(new System.IO.StreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), Encoding.UTF8), TEST_VERSION_CURRENT); assertTrue(set.contains("you")); // try to load a class; we use string comparison because classloader may be different... //JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method: assertEquals("org.apache.lucene.analysis.util.RollingCharBuffer", rl.newInstance("org.apache.lucene.analysis.util.RollingCharBuffer", typeof(object)).GetType().FullName); // theoretically classes should also be loadable: IOUtils.closeWhileHandlingException(rl.openResource("java/lang/String.class")); }
static DefaultSetHolder() { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException) { // default set should always be present as it is part of the // distribution (JAR) throw new Exception("Unable to load default stopword set"); } }
/// <summary> /// Test stopwords in snowball format /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testSnowballListLoading() throws java.io.IOException public virtual void testSnowballListLoading() { string s = "|comment\n" + " |comment\n" + "\n" + " \t\n" + " |comment | comment\n" + "ONE\n" + " two \n" + " three four five \n" + "six seven | comment\n"; //multiple stopwords + comment - multiple stopwords - stopword with leading/trailing space - stopword, in uppercase - commented line with comment - line with only whitespace - blank line - commented line with leading whitespace - commented line CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s), TEST_VERSION_CURRENT); assertEquals(7, wordset.size()); assertTrue(wordset.contains("ONE")); assertTrue(wordset.contains("two")); assertTrue(wordset.contains("three")); assertTrue(wordset.contains("four")); assertTrue(wordset.contains("five")); assertTrue(wordset.contains("six")); assertTrue(wordset.contains("seven")); }
static ISet <String> LoadDefaultStopWordSet() { var stream = System.Reflection.Assembly.GetAssembly(typeof(PersianAnalyzer)).GetManifestResourceStream("Lucene.Net.Analyzers.Fa." + DEFAULT_STOPWORD_FILE); try { StreamReader reader = new StreamReader(stream, System.Text.Encoding.UTF8); // make sure it is unmodifiable as we expose it in the outer class return(CharArraySet.UnmodifiableSet(new CharArraySet(WordlistLoader.GetWordSet(reader, STOPWORDS_COMMENT), true))); } finally { stream.Close(); } }
static DefaultSetHolder() { try { DEFAULT_STOP_SET = WordlistLoader.GetSnowballWordSet( IOUtils.GetDecodingReader(typeof(SnowballFilter), typeof(SnowballFilter).Namespace + "." + DEFAULT_STOPWORD_FILE, Encoding.UTF8), LuceneVersion.LUCENE_CURRENT); } catch (IOException) { // default set should always be present as it is part of the // distribution (JAR) throw new Exception("Unable to load default stopword set"); } }
static DefaultSetHolder() { try { DEFAULT_STOP_SET = WordlistLoader.GetWordSet( IOUtils.GetDecodingReader(typeof(SoraniAnalyzer), DEFAULT_STOPWORD_FILE, Encoding.UTF8), #pragma warning disable 612, 618 LuceneVersion.LUCENE_CURRENT); #pragma warning restore 612, 618 } catch (IOException) { // default set should always be present as it is part of the // distribution (JAR) throw new Exception("Unable to load default stopword set"); } }
private static CharArraySet LoadDefaultStopSet() // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006) { try { return(WordlistLoader.GetWordSet(IOUtils.GetDecodingReader(typeof(PolishAnalyzer), DEFAULT_STOPWORD_FILE, Encoding.UTF8), "#", #pragma warning disable 612, 618 LuceneVersion.LUCENE_CURRENT)); #pragma warning restore 612, 618 } catch (Exception ex) when(ex.IsIOException()) { // default set should always be present as it is part of the // distribution (embedded resource) throw RuntimeException.Create("Unable to load default stopword set", ex); } }
private static CharArraySet LoadDefaultStopSet() // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006) { try { return(WordlistLoader.GetSnowballWordSet( IOUtils.GetDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, Encoding.UTF8), #pragma warning disable 612, 618 LuceneVersion.LUCENE_CURRENT)); #pragma warning restore 612, 618 } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) throw new Exception("Unable to load default stopword set", ex); } }
internal static readonly CharArraySet DEFAULT_STOP_SET = LoadDefaultSet(); // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006) private static CharArraySet LoadDefaultSet() { try { return(WordlistLoader.GetSnowballWordSet(IOUtils.GetDecodingReader(typeof(UkrainianMorfologikAnalyzer), DEFAULT_STOPWORD_FILE, Encoding.UTF8), #pragma warning disable 612, 618 LuceneVersion.LUCENE_CURRENT)); #pragma warning restore 612, 618 } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) throw new Exception("Unable to load default stopword set", ex); } }
static DefaultSetHolder() { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException) { // default set should always be present as it is part of the // distribution (JAR) throw new Exception("Unable to load default stopword set"); } DEFAULT_STEM_DICT = new CharArrayMap <>(Version.LUCENE_CURRENT, 4, false); DEFAULT_STEM_DICT.put("fiets", "fiets"); //otherwise fiet DEFAULT_STEM_DICT.put("bromfiets", "bromfiets"); //otherwise bromfiet DEFAULT_STEM_DICT.put("ei", "eier"); DEFAULT_STEM_DICT.put("kind", "kinder"); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testBaseDir() throws Exception public virtual void testBaseDir() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.io.File super = createTempDir("fsResourceLoaderBase").getAbsoluteFile(); File @base = createTempDir("fsResourceLoaderBase").AbsoluteFile; try { @base.mkdirs(); Writer os = new System.IO.StreamWriter(new System.IO.FileStream(@base, "template.txt", System.IO.FileMode.Create, System.IO.FileAccess.Write), Encoding.UTF8); try { os.write("foobar\n"); } finally { IOUtils.closeWhileHandlingException(os); } ResourceLoader rl = new FilesystemResourceLoader(@base); assertEquals("foobar", WordlistLoader.getLines(rl.openResource("template.txt"), StandardCharsets.UTF_8).get(0)); // Same with full path name: string fullPath = (new File(@base, "template.txt")).ToString(); assertEquals("foobar", WordlistLoader.getLines(rl.openResource(fullPath), StandardCharsets.UTF_8).get(0)); assertClasspathDelegation(rl); assertNotFound(rl); // now use RL without base dir: rl = new FilesystemResourceLoader(); assertEquals("foobar", WordlistLoader.getLines(rl.openResource((new File(@base, "template.txt")).ToString()), StandardCharsets.UTF_8).get(0)); assertClasspathDelegation(rl); assertNotFound(rl); } finally { TestUtil.rm(@base); } }
/** * Builds an analyzer with the given stop words. */ public LithuanianAnalyzer(FileInfo stopwords) { stoptable = WordlistLoader.GetWordSet(stopwords); }
/** * Builds an exclusionlist from the words contained in the given file. */ public void SetStemExclusionTable(FileInfo exclusionlist) { excltable = WordlistLoader.GetWordSet(exclusionlist); }
/// <summary>Builds an analyzer with the stop words from the given reader.</summary> /// <seealso cref="WordlistLoader.GetWordSet(System.IO.TextReader)"> /// </seealso> /// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> /> /// /// </param> /// <param name="stopwords">Reader to read stop words from /// </param> public StandardAnalyzer(Version matchVersion, System.IO.TextReader stopwords) : this(matchVersion, WordlistLoader.GetWordSet(stopwords)) { }
public DanishAnalyzer(Version matchVersion, FileInfo stopwordsFile) { StopTable = WordlistLoader.GetWordSet(stopwordsFile); MatchVersion = matchVersion; }
/* * Builds an analyzer with the given stop words. * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead */ public BrazilianAnalyzerCustom(Lucene.Net.Util.Version matchVersion, FileInfo stopwords) : this(matchVersion, WordlistLoader.GetWordSet(stopwords)) { }
/// <summary>Builds an analyzer with the stop words from the given reader.</summary> /// <seealso cref="WordlistLoader.GetWordSet(Reader)"> /// </seealso> /// <param name="matchVersion">Lucene version to match See {@link /// <a href="#version">above</a>} /// </param> /// <param name="stopwords">Reader to read stop words from /// </param> public StandardAnalyzer(Version matchVersion, System.IO.TextReader stopwords) { stopSet = WordlistLoader.GetWordSet(stopwords); Init(matchVersion); }
/* * Builds an exclusionlist from the words contained in the given file. * @deprecated use {@link #BrazilianAnalyzer(Version, Set, Set)} instead */ public void SetStemExclusionTable(FileInfo exclusionlist) { excltable = WordlistLoader.GetWordSet(exclusionlist); PreviousTokenStream = null; // force a new stemmer to be created }
/* * Builds an analyzer with the given stop words. * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead */ public BrazilianAnalyzer(Version matchVersion, FileInfo stopwords) : this(matchVersion, WordlistLoader.GetWordSet(stopwords)) { }
/// <summary>Builds an analyzer with the stop words from the given reader.</summary> /// <seealso cref="WordlistLoader.GetWordSet(Reader)"> /// </seealso> public StandardAnalyzer(System.IO.TextReader stopwords) { stopSet = WordlistLoader.GetWordSet(stopwords); }
/// <summary>Builds an analyzer with the stop words from the given file.</summary> /// <seealso cref="WordlistLoader.GetWordSet(File)"> /// </seealso> public StandardAnalyzer(System.IO.FileInfo stopwords) { stopSet = WordlistLoader.GetWordSet(stopwords); }
/** * Builds an analyzer with the given stop words. */ public BrazilianAnalyzer(FileInfo stopwords) { stoptable = WordlistLoader.GetWordtable(stopwords); }
/* * Builds an analyzer with the given stop words. Lines can be commented out using <see cref="STOPWORDS_COMMENT"/> */ public ArabicAnalyzer(Version matchVersion, FileInfo stopwords) : this(matchVersion, WordlistLoader.GetWordSet(stopwords, STOPWORDS_COMMENT)) { }