public void TestEnglish() { Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English"); AssertAnalyzesTo(a, "he abhorred accents", new String[] { "he", "abhor", "accent" }); }
public virtual void TestReusableTokenStream() { Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English"); AssertAnalyzesTo(a, "he abhorred accents", new string[] { "he", "abhor", "accent" }); AssertAnalyzesTo(a, "she abhorred him", new string[] { "she", "abhor", "him" }); }
public void TestStopwords() { Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English", StandardAnalyzer.STOP_WORDS_SET); AssertAnalyzesTo(a, "the quick brown fox jumped", new String[] { "quick", "brown", "fox", "jump" }); }
public virtual void TestTurkish() { Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "Turkish"); AssertAnalyzesTo(a, "ağacı", new string[] { "ağaç" }); AssertAnalyzesTo(a, "AĞACI", new string[] { "ağaç" }); }
public virtual void TestTurkish() { Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "Turkish"); AssertAnalyzesTo(a, "ağacı", new string[] { "ağaç" }); AssertAnalyzesTo(a, "AĞACI", new string[] { "ağaç" }); }
public void TestStopwords() { Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English", StandardAnalyzer.STOP_WORDS_SET); AssertAnalyzesTo(a, "the quick brown fox jumped", new String[] { "quick", "brown", "fox", "jump" }); }
public void TestReusableTokenStream() { Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English"); AssertAnalyzesToReuse(a, "he abhorred accents", new String[] { "he", "abhor", "accent" }); AssertAnalyzesToReuse(a, "she abhorred him", new String[] { "she", "abhor", "him" }); }
public virtual void TestTurkishBWComp() { Analyzer a = new SnowballAnalyzer(LuceneVersion.LUCENE_30, "Turkish"); // AĞACI in turkish lowercases to ağacı, but with lowercase filter ağaci. // this fails due to wrong casing, because the stemmer // will only remove -ı, not -i AssertAnalyzesTo(a, "ağacı", new string[] { "ağaç" }); AssertAnalyzesTo(a, "AĞACI", new string[] { "ağaci" }); }
public void TestReusableTokenStream() { Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English"); AssertAnalyzesToReuse(a, "he abhorred accents", new String[] { "he", "abhor", "accent" }); AssertAnalyzesToReuse(a, "she abhorred him", new String[] { "she", "abhor", "him" }); }
public virtual void TestEnglishLowerCase() { Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English"); AssertAnalyzesTo(a, "cryogenic", new string[] { "cryogen" }); AssertAnalyzesTo(a, "CRYOGENIC", new string[] { "cryogen" }); Analyzer b = new SnowballAnalyzer(LuceneVersion.LUCENE_30, "English"); AssertAnalyzesTo(b, "cryogenic", new string[] { "cryogen" }); AssertAnalyzesTo(b, "CRYOGENIC", new string[] { "cryogen" }); }
public void Setup() { stopWords = new string[StopAnalyzer.ENGLISH_STOP_WORDS_SET.Count + 1]; stopWords[0] = "into"; int i = 1; foreach (string value in StopAnalyzer.ENGLISH_STOP_WORDS_SET) stopWords[i++] = value; var ram = new RAMDirectory(); var snow = new SnowballAnalyzer("English", stopWords); _service = new SearchEngineService(ram, snow); }
public void TestJiraLuceneNet54() { var analyzer = new SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT, "Finnish"); var input = new StringReader("terve"); var tokenStream = analyzer.TokenStream("fieldName", input); var termAttr = tokenStream.AddAttribute <ITermAttribute>(); Assert.That(tokenStream.IncrementToken(), Is.True); Assert.That(termAttr.Term, Is.EqualTo("terv")); }
public virtual void TestTurkishBWComp() { Analyzer a = new SnowballAnalyzer(LuceneVersion.LUCENE_30, "Turkish"); // AĞACI in turkish lowercases to ağacı, but with lowercase filter ağaci. // this fails due to wrong casing, because the stemmer // will only remove -ı, not -i AssertAnalyzesTo(a, "ağacı", new string[] { "ağaç" }); AssertAnalyzesTo(a, "AĞACI", new string[] { "ağaci" }); }
public virtual void TestEnglishLowerCase() { Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English"); AssertAnalyzesTo(a, "cryogenic", new string[] { "cryogen" }); AssertAnalyzesTo(a, "CRYOGENIC", new string[] { "cryogen" }); Analyzer b = new SnowballAnalyzer(LuceneVersion.LUCENE_30, "English"); AssertAnalyzesTo(b, "cryogenic", new string[] { "cryogen" }); AssertAnalyzesTo(b, "CRYOGENIC", new string[] { "cryogen" }); }
public void TestWithRealWorldData() { foreach (var file in Directory.GetFiles(Path.Combine(Paths.ProjectRootDirectory, @"test-files\analysis"))) { using (Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "Turkish")) { var ts = a.TokenStream("dummy", new System.IO.StringReader(File.ReadAllText(file))); while (ts.IncrementToken()) { var att = ts.GetAttribute<ITermAttribute>(); Console.WriteLine(att.Term); } } } }
public void Build() { Directory directory = FSDirectory.GetDirectory(indexPath); Analyzer analyzer = new SnowballAnalyzer("English"); IndexWriter writer = new IndexWriter(directory, analyzer, true); new DirectoryInfo(contentPath) .GetFilesRecursive() .Where(file => Parser.IsParseable(file.FullName)) .Select(file => new { Path = file.FullName, Text = Parser.Parse(file.FullName), Title = file.Name }) .ForEach((item) => { Document doc = new Document(); doc.Add(new Field("title", item.Title, Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("path", item.Path, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("text", item.Text, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); }); writer.Optimize(); writer.Close(); }
public IEnumerable<SearchResult> Search(string query) { Analyzer analyzer = new SnowballAnalyzer("English"); QueryParser parser = new QueryParser("text", analyzer); Query luceneQuery = parser.Parse(query); Directory directory = FSDirectory.GetDirectory(indexPath); IndexSearcher searcher = new IndexSearcher(directory); QueryScorer queryScorer = new QueryScorer(luceneQuery); Highlighter highlighter = new Highlighter(queryScorer); TopDocs topDocs = searcher.Search(luceneQuery, 100); var searchResults = new List<SearchResult>(); foreach (ScoreDoc scoreDoc in topDocs.scoreDocs) { Document doc = searcher.Doc(scoreDoc.doc); searchResults.Add(new SearchResult { Path = doc.Get("path"), Score = scoreDoc.score, Title = doc.Get("title"), Preview = highlighter.GetBestFragment(analyzer, "text", doc.Get("text")) }); } return searchResults; }
public SpanishAnalyzer(ISet<string> stop_words) { analyzer = new SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, "Spanish", stop_words); STOP_WORDS = stop_words; }
public SpanishAnalyzer() { analyzer = new SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, "Spanish"); }
public void TestEnglish() { Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English"); AssertAnalyzesTo(a, "he abhorred accents", new String[] { "he", "abhor", "accent" }); }
public void TestJiraLuceneNet54() { var analyzer = new SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT, "Finnish"); var input = new StringReader("terve"); var tokenStream = analyzer.TokenStream("fieldName", input); var termAttr = tokenStream.AddAttribute<ITermAttribute>(); Assert.That(tokenStream.IncrementToken(), Is.True); Assert.That(termAttr.Term, Is.EqualTo("terv")); }
/// <summary> /// /// </summary> /// <param name="IndexField"></param> /// <param name="LuceneIndex"></param> /// <param name="searchQuery"></param> /// <returns></returns> public static string GetHighlight(string IndexField, string LuceneIndex, string searchQuery, string highlightField) { string hightlightText = string.Empty; var formatter = new SimpleHTMLFormatter("<span class=\"umbSearchHighlight\">", "</span>"); var highlighter = new Highlighter(formatter, FragmentScorer(searchQuery, highlightField, LuceneIndex)); var tokenStream = new SnowballAnalyzer("English").TokenStream(highlightField, new StringReader(IndexField)); string tmp = highlighter.GetBestFragments(tokenStream, IndexField, 3, "..."); if (tmp.Length > 0) hightlightText = tmp + "..."; return hightlightText; }
private Analyzer GuessAnalyzer(string filePath) { Analyzer ret = null; switch (Path.GetFileName(filePath).Substring(0, 2).ToLowerInvariant()) { case "zh": ret = new ChineseAnalyzer(); break; case "cs": ret = new CzechAnalyzer(); break; case "da": ret = new SnowballAnalyzer("Danish"); break; case "nl": ret = new SnowballAnalyzer("Dutch"); break; case "en": ret = new SnowballAnalyzer("English"); break; case "fi": ret = new SnowballAnalyzer("Finnish"); break; case "fr": ret = new SnowballAnalyzer("French"); break; case "de": ret = new SnowballAnalyzer("German"); break; case "it": ret = new SnowballAnalyzer("Italian"); break; case "ja": ret = new CJKAnalyzer(); break; case "ko": ret = new CJKAnalyzer(); break; case "no": ret = new SnowballAnalyzer("Norwegian"); break; case "pt": ret = new SnowballAnalyzer("Portuguese"); break; case "ru": ret = new SnowballAnalyzer("Russian"); break; case "es": ret = new SnowballAnalyzer("Spanish"); break; case "se": ret = new SnowballAnalyzer("Swedish"); break; default: ret = new StandardAnalyzer(); break; } return ret; }
public void StemmingEnabledQueryParsing() { string queryText = "The guy bought multiple bikes before he left town to race."; var snowballAnalyzer = new SnowballAnalyzer(Version.LUCENE_30, "English"); var queryParser = new QueryParser(Version.LUCENE_30, "description", snowballAnalyzer); // Parse the search text. var query = queryParser.Parse(queryText); // Write the parsed query to the output to see the results. Trace.WriteLine(new QueryVisualizer(true).Process(query).ToString()); }
public virtual void TestEnglish() { Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English"); AssertAnalyzesTo(a, "he abhorred accents", new string[] { "he", "abhor", "accent" }); }
public virtual void TestEnglish() { Analyzer a = new SnowballAnalyzer("English"); AssertAnalyzesTo(a, "he abhorred accents", new System.String[]{"he", "abhor", "accent"}); }
public virtual void TestStopwords() { Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English", StandardAnalyzer.STOP_WORDS_SET); AssertAnalyzesTo(a, "the quick brown fox jumped", new string[] { "quick", "brown", "fox", "jump" }); }
public virtual void TestEnglish() { Analyzer a = new SnowballAnalyzer("English"); AssertAnalyzesTo(a, "he abhorred accents", new System.String[] { "he", "abhor", "accent" }); }
public virtual void TestEnglish() { Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English"); AssertAnalyzesTo(a, "he abhorred accents", new string[] { "he", "abhor", "accent" }); }
public virtual void TestReusableTokenStream() { Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English"); AssertAnalyzesTo(a, "he abhorred accents", new string[] { "he", "abhor", "accent" }); AssertAnalyzesTo(a, "she abhorred him", new string[] { "she", "abhor", "him" }); }
private Analyzer GuessAnalyzer(string filePath,out bool isRTL) { Analyzer ret = null; isRTL = false; switch (Path.GetFileName(filePath).Substring(0, 2).ToLowerInvariant()) { case "zh": ret = new ChineseAnalyzer(); break; case "cs": ret = new CzechAnalyzer(); break; case "da": ret = new SnowballAnalyzer("Danish"); break; case "nl": ret = new SnowballAnalyzer("Dutch"); break; case "en": ret = new SnowballAnalyzer("English"); break; case "fi": ret = new SnowballAnalyzer("Finnish"); break; case "fr": ret = new SnowballAnalyzer("French"); break; case "de": ret = new SnowballAnalyzer("German"); break; case "it": ret = new SnowballAnalyzer("Italian"); break; case "ja": ret = new CJKAnalyzer(); break; case "ko": ret = new CJKAnalyzer(); break; case "no": ret = new SnowballAnalyzer("Norwegian"); break; case "pt": ret = new SnowballAnalyzer("Portuguese"); break; case "ru": ret = new SnowballAnalyzer("Russian"); break; case "es": ret = new SnowballAnalyzer("Spanish"); break; case "se": ret = new SnowballAnalyzer("Swedish"); break; case "ar": isRTL = true; // TODO: Lucene 2.9 has a light stemmer for Arabic providing good search results ret = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); break; case "he": { isRTL = true; string hspellPath = System.Configuration.ConfigurationManager.AppSettings["hspellPath"]; if (!string.IsNullOrEmpty(hspellPath) && Directory.Exists(hspellPath)) { try { ret = new Lucene.Net.Analysis.Hebrew.MorphAnalyzer(hspellPath); break; } catch { } } ret = new Lucene.Net.Analysis.Hebrew.SimpleAnalyzer(); break; } default: ret = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); break; } return ret; }
public virtual void TestStopwords() { Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English", StandardAnalyzer.STOP_WORDS_SET); AssertAnalyzesTo(a, "the quick brown fox jumped", new string[] { "quick", "brown", "fox", "jump" }); }