public void TestReusableTokenStream() { Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_CURRENT); AssertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" }); AssertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česká", "republika" }); }
public void TestInvalidStopWordFile() { CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT); cz.LoadStopWords(new UnreliableInputStream(), Encoding.UTF8); AssertAnalyzesTo(cz, "Pokud mluvime o volnem", new String[] { "pokud", "mluvime", "o", "volnem" }); }
public void TestStopWordFileReuse() { CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT); AssertAnalyzesToReuse(cz, "Česká Republika", new String[] { "česká", "republika" }); Stream stopwords = new FileStream(customStopFile, FileMode.Open, FileAccess.Read); cz.LoadStopWords(stopwords, Encoding.UTF8); AssertAnalyzesToReuse(cz, "Česká Republika", new String[] { "česká" }); }
private Analyzer GuessAnalyzer(string filePath) { Analyzer ret = null; switch (Path.GetFileName(filePath).Substring(0, 2).ToLowerInvariant()) { case "zh": ret = new ChineseAnalyzer(); break; case "cs": ret = new CzechAnalyzer(); break; case "da": ret = new SnowballAnalyzer("Danish"); break; case "nl": ret = new SnowballAnalyzer("Dutch"); break; case "en": ret = new SnowballAnalyzer("English"); break; case "fi": ret = new SnowballAnalyzer("Finnish"); break; case "fr": ret = new SnowballAnalyzer("French"); break; case "de": ret = new SnowballAnalyzer("German"); break; case "it": ret = new SnowballAnalyzer("Italian"); break; case "ja": ret = new CJKAnalyzer(); break; case "ko": ret = new CJKAnalyzer(); break; case "no": ret = new SnowballAnalyzer("Norwegian"); break; case "pt": ret = new SnowballAnalyzer("Portuguese"); break; case "ru": ret = new SnowballAnalyzer("Russian"); break; case "es": ret = new SnowballAnalyzer("Spanish"); break; case "se": ret = new SnowballAnalyzer("Swedish"); break; default: ret = new StandardAnalyzer(); break; } return(ret); }
private Analyzer GuessAnalyzer(string filePath, out bool isRTL) { Analyzer ret = null; isRTL = false; switch (Path.GetFileName(filePath).Substring(0, 2).ToLowerInvariant()) { case "zh": ret = new ChineseAnalyzer(); break; case "cs": ret = new CzechAnalyzer(); break; case "da": ret = new SnowballAnalyzer("Danish"); break; case "nl": ret = new SnowballAnalyzer("Dutch"); break; case "en": ret = new SnowballAnalyzer("English"); break; case "fi": ret = new SnowballAnalyzer("Finnish"); break; case "fr": ret = new SnowballAnalyzer("French"); break; case "de": ret = new SnowballAnalyzer("German"); break; case "it": ret = new SnowballAnalyzer("Italian"); break; case "ja": ret = new CJKAnalyzer(); break; case "ko": ret = new CJKAnalyzer(); break; case "no": ret = new SnowballAnalyzer("Norwegian"); break; case "pt": ret = new SnowballAnalyzer("Portuguese"); break; case "ru": ret = new SnowballAnalyzer("Russian"); break; case "es": ret = new SnowballAnalyzer("Spanish"); break; case "se": ret = new SnowballAnalyzer("Swedish"); break; case "ar": isRTL = true; // TODO: Lucene 2.9 has a light stemmer for Arabic providing good search results ret = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); break; case "he": { isRTL = true; string hspellPath = System.Configuration.ConfigurationManager.AppSettings["hspellPath"]; if (!string.IsNullOrEmpty(hspellPath) && Directory.Exists(hspellPath)) { try { ret = new Lucene.Net.Analysis.Hebrew.MorphAnalyzer(hspellPath); break; } catch { } } ret = new Lucene.Net.Analysis.Hebrew.SimpleAnalyzer(); break; } default: ret = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); break; } return(ret); }