public SuggestSimilar ( System word, int num_sug ) : System.String[] | ||
word | System | String the word you want a spell check done on /// |
num_sug | int | int the number of suggest words /// |
return | System.String[] |
public SuggestionQueryResult Query(SuggestionQuery suggestionQuery) { if (suggestionQuery.Term.StartsWith("<<") && suggestionQuery.Term.EndsWith(">>")) { var individualTerms = suggestionQuery.Term.Substring(2, suggestionQuery.Term.Length - 4).Split(new[] { ' ', '\t', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); var result = new List <string>(); foreach (var term in individualTerms) { result.AddRange(spellChecker.SuggestSimilar(term, suggestionQuery.MaxSuggestions, null, suggestionQuery.Field, true)); } return(new SuggestionQueryResult { Suggestions = result.ToArray() }); } string[] suggestions = spellChecker.SuggestSimilar(suggestionQuery.Term, suggestionQuery.MaxSuggestions, null, suggestionQuery.Field, true); return(new SuggestionQueryResult { Suggestions = suggestions }); }
public string Check(string value) { EnsureIndexed(); var existing = indexReader.DocFreq(new Term("word", value)); if (existing > 0) { return(value); } var suggestions = checker.SuggestSimilar(value, 10, null, "word", true); var jaro = new JaroWinklerDistance(); var leven = new LevenshteinDistance(); var ngram = new NGramDistance(); var metrics = suggestions.Select(s => new { word = s, freq = indexReader.DocFreq(new Term("word", s)), jaro = jaro.GetDistance(value, s), leven = leven.GetDistance(value, s), ngram = ngram.GetDistance(value, s) }) .OrderByDescending(metric => ( (metric.freq / 100f) + metric.jaro + metric.leven + metric.ngram ) / 4f ) .ToList(); return(metrics.Select(m => m.word).FirstOrDefault()); }
public void TestSpellchecker() { SpellChecker.Net.Search.Spell.SpellChecker sc = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory()); indexReader = IndexReader.Open(store); sc.IndexDictionary(new LuceneDictionary(indexReader, "contents")); String[] suggestions = sc.SuggestSimilar("Tam", 1); AssertEquals(1, suggestions.Length); AssertEquals("Tom", suggestions[0]); suggestions = sc.SuggestSimilar("Jarry", 1); AssertEquals(1, suggestions.Length); AssertEquals("Jerry", suggestions[0]); indexReader.Close(); }
} // SearchActiveDocument public string getSpellingSuggestion(string query) { FSDirectory indexDir = FSDirectory.GetDirectory(this.spellingIndexDir, false); SpellChecker.Net.Search.Spell.SpellChecker spellchecker = new SpellChecker.Net.Search.Spell.SpellChecker(indexDir); IndexReader my_lucene_reader = IndexReader.Open(indexDir); string[] words = query.Split(new char[] { ' ', ',', ';' }, StringSplitOptions.RemoveEmptyEntries); List <string> allSuggestions = new List <string>(); foreach (string word in words) { string[] suggestions = spellchecker.SuggestSimilar(word, 1); if (suggestions.Length > 0) { allSuggestions.Add(suggestions[0]); } else { allSuggestions.Add(word); } } string completeSuggestion = String.Join(" ", allSuggestions.ToArray()); return(completeSuggestion); }
public string Suggest(string phrase) { StringBuilder res = new StringBuilder(); try { String[] words = phrase.Split(new char[] { ' ' }); SpellChecker.Net.Search.Spell.SpellChecker spell = GetSpelling(false); if (spell != null) { for (int i = 0; i < words.Length; i++) { string[] similar = spell.SuggestSimilar(words[i], 1); if (similar != null && similar.Length > 0) { res.Append(similar[0]); if (i != words.Length - 1) { res.Append(' '); } } } } } catch (Exception ex) { GXLogging.Error(log, "Suggest Error", ex); } return(res.ToString()); }
private static void Main(string[] args) { var ramDirectory = new RAMDirectory(); var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(ramDirectory); var ms = new MemoryStream(); var sw = new StreamWriter(ms); sw.WriteLine("Book"); sw.WriteLine("Bath"); sw.WriteLine("Bed"); sw.WriteLine("Make"); sw.WriteLine("Model"); sw.WriteLine("Vacum"); sw.WriteLine("Wending machine"); sw.Flush(); ms.Position = 0; spellChecker.setStringDistance(new JaroWinklerDistance()); spellChecker.SetAccuracy(0.3f); spellChecker.IndexDictionary(new PlainTextDictionary(ms), CancellationToken.None); var indexReader = IndexReader.Open(ramDirectory, true); var termEnum = indexReader.Terms(); while (termEnum.Next()) { Console.WriteLine(termEnum.Term); } var suggestSimilar = spellChecker.SuggestSimilar("both", 10); foreach (var s in suggestSimilar) { Console.WriteLine(s); } }
private static void SuggestAndVerify(SpellChecker.Net.Search.Spell.SpellChecker checker, string misspelledWord, string expectedSuggestion) { string[] similarWords = checker.SuggestSimilar(misspelledWord, 1); Assert.AreEqual(1, similarWords.Length); log.DebugFormat("If searching: '{0}'\t\t\t\tI suggest: '{1}'", misspelledWord, similarWords[0]); Assert.AreEqual(expectedSuggestion, similarWords[0]); }
internal List <string> SuggestSimilar(string prefix, int maxItems) { var items = spellChecker.SuggestSimilar(prefix, maxItems, null, null, true).ToList(); if (spellChecker.Exist(prefix)) { items.Add(prefix); } return(items); }
public string[] GetSuggestedWords(string spellIndex, string term, int maxCount) { FSDirectory dir = FSDirectory.Open(spellIndex); var spell = new SpellChecker.Net.Search.Spell.SpellChecker(dir); spell.SetAccuracy(0.6f); spell.setStringDistance(new LevenshteinDistance()); return(spell.SuggestSimilar(term, maxCount)); }
/// <summary> /// Gets the similar words. /// </summary> /// <param name="reader">The reader.</param> /// <param name="fieldName">Name of the field.</param> /// <param name="word">The word.</param> /// <returns></returns> private static string[] SuggestSimilar(IndexReader reader, string fieldName, string word) { var spell = new SpellChecker.Net.Search.Spell.SpellChecker(reader.Directory()); spell.IndexDictionary(new LuceneDictionary(reader, fieldName)); var similarWords = spell.SuggestSimilar(word, 2); // now make sure to close the spell checker spell.Close(); return(similarWords); }
public SuggestionQueryResult Query(SuggestionQuery suggestionQuery) { var suggestions = spellChecker.SuggestSimilar(suggestionQuery.Term, suggestionQuery.MaxSuggestions, null, suggestionQuery.Field, true); return(new SuggestionQueryResult { Suggestions = suggestions }); }
public void TestBuild() { String LF = System.Environment.NewLine; String input = "oneword" + LF + "twoword" + LF + "threeword"; PlainTextDictionary ptd = new PlainTextDictionary( new MemoryStream( System.Text.Encoding.UTF8.GetBytes(input)) ); RAMDirectory ramDir = new RAMDirectory(); SpellChecker.Net.Search.Spell.SpellChecker spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(ramDir); spellChecker.IndexDictionary(ptd); String[] similar = spellChecker.SuggestSimilar("treeword", 2); Assert.AreEqual(2, similar.Length); Assert.AreEqual(similar[0], "threeword"); Assert.AreEqual(similar[1], "twoword"); }
public void TestBuild() { String LF = System.Environment.NewLine; String input = "oneword" + LF + "twoword" + LF + "threeword"; PlainTextDictionary ptd = new PlainTextDictionary(new MemoryStream(System.Text.Encoding.UTF8.GetBytes(input))); RAMDirectory ramDir = new RAMDirectory(); SpellChecker.Net.Search.Spell.SpellChecker spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(ramDir); spellChecker.IndexDictionary(ptd); String[] similar = spellChecker.SuggestSimilar("treeword", 2); Assert.AreEqual(2, similar.Length); Assert.AreEqual(similar[0], "threeword"); Assert.AreEqual(similar[1], "twoword"); }
public SuggestionQueryResult Query(SuggestionQuery suggestionQuery, IndexReader indexReader) { if (suggestionQuery.Term.StartsWith("<<") && suggestionQuery.Term.EndsWith(">>")) { return(QueryOverMultipleWords(suggestionQuery, indexReader, suggestionQuery.Term.Substring(2, suggestionQuery.Term.Length - 4))); } if (suggestionQuery.Term.StartsWith("(") && suggestionQuery.Term.EndsWith(")")) { return(QueryOverMultipleWords(suggestionQuery, indexReader, suggestionQuery.Term.Substring(1, suggestionQuery.Term.Length - 2))); } string[] suggestions = spellChecker.SuggestSimilar(suggestionQuery.Term, suggestionQuery.MaxSuggestions, indexReader, suggestionQuery.Field, true); return(new SuggestionQueryResult { Suggestions = suggestions }); }
public static string[] SuggestSilmilarWords(string term, int count = 10) { IndexReader indexReader = IndexReader.Open(FSDirectory.Open(_luceneDir), true); // Create the SpellChecker var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(FSDirectory.Open(_luceneDir + "\\Spell")); // Create SpellChecker Index spellChecker.ClearIndex(); spellChecker.IndexDictionary(new LuceneDictionary(indexReader, StronglyTyped.PropertyName <LuceneSearchModel>(x => x.Title))); spellChecker.IndexDictionary(new LuceneDictionary(indexReader, StronglyTyped.PropertyName <LuceneSearchModel>(x => x.Description))); //Suggest Similar Words return(spellChecker.SuggestSimilar(term, count, null, null, true)); }
public SuggestionQueryResult Query(SuggestionQuery suggestionQuery, IndexReader indexReader) { if (suggestionQuery.Accuracy.HasValue == false) { throw new InvalidOperationException("SuggestionQuery.Accuracy must be specified."); } if (suggestionQuery.Distance.HasValue == false) { throw new InvalidOperationException("SuggestionQuery.Distance must be specified."); } spellChecker.setStringDistance(SuggestionQueryRunner.GetStringDistance(suggestionQuery.Distance.Value)); spellChecker.SetAccuracy(suggestionQuery.Accuracy.Value); if (suggestionQuery.Term.StartsWith("<<") && suggestionQuery.Term.EndsWith(">>")) { return(QueryOverMultipleWords(suggestionQuery, indexReader, suggestionQuery.Term.Substring(2, suggestionQuery.Term.Length - 4))); } if (suggestionQuery.Term.StartsWith("(") && suggestionQuery.Term.EndsWith(")")) { return(QueryOverMultipleWords(suggestionQuery, indexReader, suggestionQuery.Term.Substring(1, suggestionQuery.Term.Length - 2))); } string[] suggestions = spellChecker.SuggestSimilar(suggestionQuery.Term, suggestionQuery.MaxSuggestions, indexReader, suggestionQuery.Field, true); return(new SuggestionQueryResult { Suggestions = suggestions }); }
public static string[] SuggestSilmilarWords(string term, int count = 10) { IndexReader indexReader = IndexReader.Open(FSDirectory.Open(_luceneDir), true); // Create the SpellChecker var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(FSDirectory.Open(_luceneDir + "\\Spell")); // Create SpellChecker Index spellChecker.ClearIndex(); spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Title")); spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Body")); spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "SubTitle")); spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Keywords")); spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Description")); //Suggest Similar Words return(spellChecker.SuggestSimilar(term, count, null, null, true)); }
public SuggestionQueryResult ExecuteSuggestionQuery(string indexName, SuggestionQuery suggestionQuery) { if (suggestionQuery == null) throw new ArgumentNullException("suggestionQuery"); if (string.IsNullOrWhiteSpace(suggestionQuery.Term)) throw new ArgumentNullException("suggestionQuery.Term"); if (string.IsNullOrWhiteSpace(indexName)) throw new ArgumentNullException("indexName"); if (string.IsNullOrWhiteSpace(suggestionQuery.Field)) throw new ArgumentNullException("suggestionQuery.Field"); if (suggestionQuery.MaxSuggestions <= 0) suggestionQuery.MaxSuggestions = 10; if (suggestionQuery.Accuracy <= 0 || suggestionQuery.Accuracy > 1) suggestionQuery.Accuracy = 0.5f; suggestionQuery.MaxSuggestions = Math.Min(suggestionQuery.MaxSuggestions, _database.Configuration.MaxPageSize); var currentSearcher = _database.IndexStorage.GetCurrentIndexSearcher(indexName); IndexSearcher searcher; using(currentSearcher.Use(out searcher)) { var indexReader = searcher.GetIndexReader(); var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory(), GetStringDistance(suggestionQuery)); try { spellChecker.IndexDictionary(new LuceneDictionary(indexReader, suggestionQuery.Field)); spellChecker.SetAccuracy(suggestionQuery.Accuracy); var suggestions = spellChecker.SuggestSimilar(suggestionQuery.Term, suggestionQuery.MaxSuggestions, indexReader, suggestionQuery.Field, true); return new SuggestionQueryResult { Suggestions = suggestions }; } finally { spellChecker.Close(); // this is really stupid, but it doesn't handle this in its close method! GC.SuppressFinalize(spellChecker); } } }
static void Main(string[] args) { Directory luceneDir = new RAMDirectory(); Directory spellDir = new RAMDirectory(); CreateLuceneIndex(luceneDir); Net.Search.Spell.SpellChecker spell = GetSpellChecker(luceneDir, spellDir); var word = "dammark"; string[] similarWords = spell.SuggestSimilar(word, 10); // show the similar words for (int wordIndex = 0; wordIndex < similarWords.Length; wordIndex++) { Console.WriteLine("{0} is similar to {1}", similarWords[wordIndex], word); } }
public List <string> GetTopSuggestions(string value, int numberOfItems) { EnsureIndexed(); var suggestionCollection = new List <string>(); var existing = _indexReader.DocFreq(new Term(SpellCheckerConstants.SpellCheckerKey, value)); if (existing > 0)// the fist one will be correct of exist { suggestionCollection.Add(value); } var suggestions = _checker.SuggestSimilar(value, numberOfItems, null, SpellCheckerConstants.SpellCheckerKey, true); var jaro = new JaroWinklerDistance(); var leven = new LevenshteinDistance(); var ngram = new NGramDistance(); var metrics = suggestions.Select(s => new { word = s, freq = _indexReader.DocFreq(new Term(SpellCheckerConstants.SpellCheckerKey, s)), jaro = jaro.GetDistance(value, s), leven = leven.GetDistance(value, s), ngram = ngram.GetDistance(value, s) }) .OrderByDescending(metric => metric.jaro) .ThenByDescending(m => m.ngram) .ThenByDescending(metric => ( metric.freq / 100f + metric.leven ) / 2f ) .ToList(); var wordsOnly = metrics.Select(m => m.word).ToList(); suggestionCollection.AddRange(wordsOnly); return(suggestionCollection); }
/// <summary> /// tek kelime seklinde girilmis bilgilerin dogrulugunu kontrol edip onerileni getirir /// </summary> /// <param name="word"></param> /// <returns></returns> private static Suggestion SpellCheck(string word) { var dir = new RAMDirectory(); var iw = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), IndexWriter.MaxFieldLength.UNLIMITED); var distDoc = new Document(); var textdistField = new Field("text", "", Field.Store.YES, Field.Index.ANALYZED); distDoc.Add(textdistField); var iddistField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); distDoc.Add(iddistField); textdistField.SetValue("Küçükyalı Kozyatağı"); iddistField.SetValue("0"); var countyDoc = new Document(); var textcountyField = new Field("text", "", Field.Store.YES, Field.Index.ANALYZED); countyDoc.Add(textcountyField); var idcountyField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); countyDoc.Add(idcountyField); textcountyField.SetValue("Maltepe Maslak"); idcountyField.SetValue("1"); var cityDoc = new Document(); var textcityField = new Field("text", "", Field.Store.YES, Field.Index.ANALYZED); cityDoc.Add(textcityField); var idcityField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); cityDoc.Add(idcityField); textcityField.SetValue("İstanbul İzmir"); idcityField.SetValue("2"); iw.AddDocument(distDoc); iw.AddDocument(cityDoc); iw.AddDocument(countyDoc); iw.Commit(); var reader = iw.GetReader(); var speller = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory()); speller.IndexDictionary(new LuceneDictionary(reader, "text")); var suggestions = speller.SuggestSimilar(word, 5); var retVal = new Suggestion { SuggestedWord = suggestions.Length > 0 ? suggestions[0] : "" }; var searcher = new IndexSearcher(reader); foreach (var doc in suggestions.Select(suggestion => searcher.Search(new TermQuery(new Term("text", suggestion)), null, Int32.MaxValue)).SelectMany(docs => docs.ScoreDocs)) { switch (searcher.Doc(doc.Doc).Get("id")) { case "0": retVal.SuggestedType = SuggestionType.District; break; case "1": retVal.SuggestedType = SuggestionType.County; break; case "2": retVal.SuggestedType = SuggestionType.City; break; } } reader.Dispose(); iw.Dispose(); return(retVal); }
/// <summary> /// Gets the similar words. /// </summary> /// <param name="reader">The reader.</param> /// <param name="fieldName">Name of the field.</param> /// <param name="word">The word.</param> /// <returns></returns> private string[] SuggestSimilar(IndexReader reader, string fieldName, string word) { var spell = new SpellChecker(reader.Directory()); spell.IndexDictionary(new LuceneDictionary(reader, fieldName)); var similarWords = spell.SuggestSimilar(word, 2); // now make sure to close the spell checker spell.Close(); return similarWords; }
public static string[] SuggestSilmilarWords(string term, int count = 10) { IndexReader indexReader = IndexReader.Open(FSDirectory.Open(_luceneDir), true); // Create the SpellChecker var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(FSDirectory.Open(_luceneDir + "\\Spell")); // Create SpellChecker Index spellChecker.ClearIndex(); spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Name")); spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Author")); spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Publisher")); spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "ISBN")); spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Description")); //Suggest Similar Words return spellChecker.SuggestSimilar(term, count, null, null, true); }
protected void Page_Load(object sender, EventArgs e) { lastUpdatedText = SiteConfiguration.GetDictionaryText("Last Updated"); cmdPrev.Text = SiteConfiguration.GetDictionaryText("Previous Button"); cmdNext.Text = SiteConfiguration.GetDictionaryText("Next Button"); // Decode the search string query string. Will be empty string if no search string was provided. string searchStr = Server.UrlDecode(WebUtil.GetQueryString("searchStr")); // If the visitor provided no criteria, don't bother searching if (searchStr == string.Empty) lblSearchString.Text = SiteConfiguration.GetDictionaryText("Search Criteria") + SiteConfiguration.GetDictionaryText("No Criteria"); else { string indexName = StringUtil.GetString(IndexName, SiteConfiguration.GetSiteSettingsItem()["Search Index"]); searchMgr = new SearchManager(indexName); // Remind the visitor what they provided as search criteria lblSearchString.Text = SiteConfiguration.GetDictionaryText("Search Criteria") + searchStr; // Perform the actual search searchMgr.Search(searchStr); // Display the search results results = searchMgr.SearchResults; // Now iterate over the number of results foreach (var result in results) { Item hit = result.GetObject<Item>(); if (hit != null) { ResultsList.Add(hit); } } // no results were found so we need to show message and suggestions if (searchMgr.SearchResults.Count == 0) { Sitecore.Search.Index index = Sitecore.Search.SearchManager.GetIndex("system"); SpellChecker.Net.Search.Spell.SpellChecker spellchecker = new SpellChecker.Net.Search.Spell.SpellChecker(index.Directory); spellchecker.IndexDictionary(new LuceneDictionary(IndexReader.Open(index.Directory), "_content")); String[] suggestions = spellchecker.SuggestSimilar(searchStr, 5); if (suggestions.Length > 0) { lblSearchString.Text += "<p>"; lblSearchString.Text += SiteConfiguration.GetDictionaryText("Did You Mean"); foreach (string s in suggestions) { lblSearchString.Text += String.Format(" <a href=\"{0}?searchStr={1}\">{2}</a> ", LinkManager.GetItemUrl(Sitecore.Context.Item), s, s); } lblSearchString.Text += "</p>"; } else { string noResultsMsg = SiteConfiguration.GetDictionaryText("No Results"); LiteralControl noResults = new LiteralControl(string.Format("<p>{0}</p>", noResultsMsg)); pnResultsPanel.Controls.Add(noResults); } } else { if (!Page.IsPostBack) DisplayResults(); } } }
public static SearchResultsViewModel SearchWithSuggestions(SearchQuery searchQuery, bool suggestOnlyWhenNoResults = false) { var ret = new SearchResultsViewModel { SearchResults = new List<SearchResultsViewModel.SearchResult>(PageSize), Query = searchQuery }; // Parse query, possibly throwing a ParseException Query query; if (searchQuery.TitlesOnly) { var qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "Title", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29) ); query = qp.Parse(searchQuery.Query); } else { query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, searchQuery.Query, SearchFields, SearchFlags, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29) ); } // Perform the actual search var tsdc = TopScoreDocCollector.create(PageSize * searchQuery.CurrentPage, true); Searcher.Search(query, tsdc); ret.TotalResults = tsdc.GetTotalHits(); var hits = tsdc.TopDocs().ScoreDocs; // Do the suggestion magic if (suggestOnlyWhenNoResults && ret.TotalResults == 0 || (!suggestOnlyWhenNoResults)) { ret.Suggestions = new List<string>(); var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(Searcher.GetIndexReader().Directory()); // This is kind of a hack to get things working quickly // for real-world usage we probably want to get the analyzed terms from the Query object var individualTerms = searchQuery.Query.Split(new[] { ' ', '\t', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); foreach (var term in individualTerms) { // we only specify field name if we actually got results, // to improve suggestions relevancy ret.Suggestions.AddRange(spellChecker.SuggestSimilar(term, searchQuery.MaxSuggestions, null, ret.TotalResults == 0 ? null : "Title", true)); } } // Init the highlighter instance var fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder(new[] { "[b]" }, new[] { "[/b]" })); int i; for (i = (searchQuery.CurrentPage - 1) * PageSize; i < hits.Length; ++i) { var d = Searcher.Doc(hits[i].doc); var fq = fvh.GetFieldQuery(query); var fragment = fvh.GetBestFragment(fq, Searcher.GetIndexReader(), hits[i].doc, "Content", 400); ret.SearchResults.Add(new SearchResultsViewModel.SearchResult { Id = d.Get("Id"), Title = d.Get("Title"), Score = hits[i].score, LuceneDocId = hits[i].doc, Fragment = MvcHtmlString.Create(fragment.HtmlStripFragment()), }); } return ret; }
public static SearchResultsViewModel SearchWithSuggestions(SearchQuery searchQuery, bool suggestOnlyWhenNoResults = false) { var ret = new SearchResultsViewModel { SearchResults = new List <SearchResultsViewModel.SearchResult>(PageSize), Query = searchQuery }; // Parse query, possibly throwing a ParseException Query query; if (searchQuery.TitlesOnly) { var qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "Title", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29) ); query = qp.Parse(searchQuery.Query); } else { query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, searchQuery.Query, SearchFields, SearchFlags, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29) ); } // Perform the actual search var tsdc = TopScoreDocCollector.create(PageSize * searchQuery.CurrentPage, true); Searcher.Search(query, tsdc); ret.TotalResults = tsdc.GetTotalHits(); var hits = tsdc.TopDocs().ScoreDocs; // Do the suggestion magic if (suggestOnlyWhenNoResults && ret.TotalResults == 0 || (!suggestOnlyWhenNoResults)) { ret.Suggestions = new List <string>(); var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(Searcher.GetIndexReader().Directory()); // This is kind of a hack to get things working quickly // for real-world usage we probably want to get the analyzed terms from the Query object var individualTerms = searchQuery.Query.Split(new[] { ' ', '\t', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); foreach (var term in individualTerms) { // we only specify field name if we actually got results, // to improve suggestions relevancy ret.Suggestions.AddRange(spellChecker.SuggestSimilar(term, searchQuery.MaxSuggestions, null, ret.TotalResults == 0 ? null : "Title", true)); } } // Init the highlighter instance var fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder(new[] { "[b]" }, new[] { "[/b]" })); int i; for (i = (searchQuery.CurrentPage - 1) * PageSize; i < hits.Length; ++i) { var d = Searcher.Doc(hits[i].doc); var fq = fvh.GetFieldQuery(query); var fragment = fvh.GetBestFragment(fq, Searcher.GetIndexReader(), hits[i].doc, "Content", 400); ret.SearchResults.Add(new SearchResultsViewModel.SearchResult { Id = d.Get("Id"), Title = d.Get("Title"), Score = hits[i].score, LuceneDocId = hits[i].doc, Fragment = MvcHtmlString.Create(fragment.HtmlStripFragment()), }); } return(ret); }
public virtual async Task <SearchResult> SearchAsync(string term, int?filterByCategory = null, int languageId = -1, PostType?postType = null, SearchPlace searchPlace = SearchPlace.Anywhere, SearchResultSortType orderBy = SearchResultSortType.Score, int maxResult = 1000, bool exactSearch = false) { var result = new SearchResult(); term = term.Trim(); //replace multiple spaces with a single space RegexOptions options = RegexOptions.None; Regex regex = new Regex("[ ]{2,}", options); term = regex.Replace(term, " "); if (string.IsNullOrWhiteSpace(term)) { return(result); } var watch = new System.Diagnostics.Stopwatch(); watch.Start(); try { await Task.Run(() => { using (var directory = FSDirectory.Open(new DirectoryInfo(_indexFilesPath))) { using (var searcher = new IndexSearcher(directory, readOnly: true)) { var searchInFields = new List <string>(); if (searchPlace == SearchPlace.Anywhere) { searchInFields.AddRange(new string[] { "Title", "Description", "Keywords", "Tags" }); } else { if (searchPlace.HasFlagFast(SearchPlace.Title)) { searchInFields.Add("Title"); } if (searchPlace.HasFlagFast(SearchPlace.Description)) { searchInFields.Add("Description"); } if (searchPlace.HasFlagFast(SearchPlace.Keywords)) { searchInFields.Add("Keywords"); } if (searchPlace.HasFlagFast(SearchPlace.Tags)) { searchInFields.Add("Tags"); } } BooleanFilter filter = null; if (languageId > -1 || filterByCategory != null || postType != null) { filter = new BooleanFilter(); if (languageId > -1) { filter.Add(new FilterClause( new QueryWrapperFilter(new TermQuery(new Term("LanguageId", languageId.ToString()))), Occur.MUST)); } if (filterByCategory != null) { filter.Add(new FilterClause( new QueryWrapperFilter(new TermQuery(new Term("Categories", filterByCategory.Value.ToString()))), Occur.MUST)); } if (postType != null) { filter.Add(new FilterClause( new QueryWrapperFilter(new TermQuery(new Term("PostType", postType.Value.ToString()))), Occur.MUST)); } } var currentSettings = _settingService.LoadSetting <SiteSettings>(); if (!currentSettings.EnableBlog) { //Filter Blog Posts if Blog is disabled if (filter == null) { filter = new BooleanFilter(); } filter.Add(new FilterClause( new QueryWrapperFilter(new TermQuery(new Term("PostType", PostType.BlogPost.ToString()))), Occur.MUST_NOT)); } Sort sort = new Sort(SortField.FIELD_SCORE); switch (orderBy) { case SearchResultSortType.NumberOfVisits: sort = new Sort(new SortField("NumberOfVisit", SortField.INT, true)); break; case SearchResultSortType.PublishDate: sort = new Sort(new SortField("PublishDate", SortField.LONG, true)); break; case SearchResultSortType.LastUpDate: sort = new Sort(new SortField("LastUpDate", SortField.LONG, true)); break; } var analyzer = new StandardAnalyzer(Version); var parser = new MultiFieldQueryParser(Version, searchInFields.ToArray(), analyzer); QueryScorer scorer = null; var hits = new List <ScoreDoc>(); Query query = null; if (exactSearch) { query = ParseQuery(term, parser); hits.AddRange(searcher.Search(query, filter, maxResult, sort).ScoreDocs); } else { query = ParseQuery($"(\"{term}\")", parser); hits.AddRange(searcher.Search(query, filter, maxResult, sort).ScoreDocs); query = ParseQuery($"({term.Replace(" ", "*")})", parser); hits.AddRange(searcher.Search(query, filter, maxResult, sort).ScoreDocs); query = ParseQuery($"(+{term.Trim().Replace(" ", " +")})", parser); hits.AddRange(searcher.Search(query, filter, maxResult, sort).ScoreDocs); query = ParseQuery(term, parser); hits.AddRange(searcher.Search(query, filter, maxResult, sort).ScoreDocs); } scorer = new QueryScorer(query); if (hits.Count == 0) { term = SearchByPartialWords(term); query = ParseQuery(term, parser); scorer = new QueryScorer(query); hits.AddRange(searcher.Search(query, filter, maxResult, sort).ScoreDocs); } var formatter = new SimpleHTMLFormatter( "<span class='badge badge-warning'>", "</span>"); var fragmenter = new SimpleFragmenter(300); var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = fragmenter }; foreach (var scoreDoc in hits) { var doc = searcher.Doc(scoreDoc.Doc); result.Documents.Add(new SearchResultDocument() { DocumentId = int.Parse(doc.Get("ID")), LanguageId = int.Parse(doc.Get("LanguageId")), LanguageIsoCode = doc.Get("LanguageCode"), Score = scoreDoc.Score, DocumentTitle = GetHighlight("Title", highlighter, analyzer, doc.Get("Title"), false), DocumentBody = GetHighlight("Description", highlighter, analyzer, doc.Get("Description"), true), DocumentKeywords = doc.Get("Keywords"), DocumentTags = doc.Get("Tags"), }); } result.Documents = result.Documents.DistinctBy(p => new { p.DocumentId }) .ToList(); analyzer.Close(); //SuggestSimilar using (var spellDirectory = FSDirectory.Open(new DirectoryInfo(_spellFilesPath))) { using (var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(spellDirectory)) { result.SuggestSimilar.AddRange(spellChecker.SuggestSimilar(term, 10, null, null, true)); } } } } }); } catch (Exception ex) { result.Error = ex; result.HasError = true; } watch.Stop(); result.ElapsedMilliseconds = watch.ElapsedMilliseconds; _eventPublisher.Publish(new SearchEvent(term, filterByCategory, languageId, postType, searchPlace, maxResult, result)); return(result); }
public void TestSpellchecker() { SpellChecker.Net.Search.Spell.SpellChecker sc = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory()); indexReader = IndexReader.Open(store); sc.IndexDictionary(new LuceneDictionary(indexReader, "contents")); String[] suggestions = sc.SuggestSimilar("Tam", 1); AssertEquals(1, suggestions.Length); AssertEquals("Tom", suggestions[0]); suggestions = sc.SuggestSimilar("Jarry", 1); AssertEquals(1, suggestions.Length); AssertEquals("Jerry", suggestions[0]); indexReader.Close(); }
//function to return the suggestions from spell checker public string[] SpellCheckerSuggestion(string inputQuery) { String[] suggestions = spellchecker.SuggestSimilar(inputQuery, 5); return(suggestions); }
/// <summary> /// Gets the similar words. /// </summary> /// <param name="fieldName">Name of the field.</param> /// <param name="word">The word.</param> /// <returns></returns> public string[] GetSimilarWords(string fieldName, string word) { SpellChecker.Net.Search.Spell.SpellChecker spell = new SpellChecker.Net.Search.Spell.SpellChecker(_IndexReader.Directory()); spell.IndexDictionary(new LuceneDictionary(_IndexReader, fieldName)); return(spell.SuggestSimilar(word, 2)); }