/// <summary> /// Use a multifield query parser /// </summary> /// <param name="fields"></param> /// <param name="stringQuery"></param> /// <param name="defaultOp"></param> /// <returns></returns> public QueryBuilder MatchParsedInput(IEnumerable <string> fields, string stringQuery, QueryParser.Operator defaultOp = null) { if (string.IsNullOrWhiteSpace(stringQuery)) { return(this); } var parser = new MultiFieldQueryParser(Version, fields.ToArray(), _analyzer); if (defaultOp != null) { parser.SetDefaultOperator(defaultOp); } Query query; try { query = parser.Parse(stringQuery); } catch (ParseException) { query = parser.Parse(QueryParser.Escape(stringQuery.ToLower())); } return(query != null?AddSubQuery(query) : this); }
/// <summary> /// Search in the index using the following filters. /// </summary> /// <param name="queryString">Words to search for (ALL)</param> /// <param name="start">Start record to retrieve (zero based)</param> /// <param name="max">Amount of records to receive from the start index number.</param> /// <param name="flavorId">Flavor to filter (can be null)</param> /// <param name="createdByMe">Filter created by me records (can be null)</param> /// <param name="sortField">Sort Field</param> /// <param name="isDescending">Mark if sorting in descending order</param> /// <returns>A SearchEngineResponse object containing the total of records and found items</returns> public SearchEngineResponse Search(SearchParameters parameters, int start, int max, string sortField, bool isDescending) { try { if (parameters.Seasons == null) { parameters.Seasons = "fall winter spring summer".Split(' '); } var list = new List <SearchEngineResult>(); Query bodyQuery = null; if (!String.IsNullOrEmpty(parameters.FreeText)) { QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, new string[] { Categories, Silouhettes, Colors, Fabrics, Tags, EventTypes }, _analyzer); parser.SetDefaultOperator(QueryParser.Operator.AND); if (!string.IsNullOrEmpty(parameters.FreeText.Trim())) { bodyQuery = parser.Parse(parameters.FreeText); } } return(PerformQuery(list, bodyQuery, start, max, -1, -1, parameters, sortField, isDescending)); } catch (Lucene.Net.Store.NoSuchDirectoryException) { return(new SearchEngineResponse { TotalCount = 0, Results = new List <SearchEngineResult>() }); } }
public void Search(string indexDir, string q, int pageSize, int pageIndex, out int recCount) { indexDir = HttpContext.Current.Server.MapPath("~/Search/"); string keywords = q; var search = new IndexSearcher(indexDir); q = GetKeyWordsSplitBySpace(q, new PanGuTokenizer()); string[] fields = { "title", "Category", "Desc" }; QueryParser qp = new MultiFieldQueryParser(fields, new PanGuAnalyzer(true)); qp.SetDefaultOperator(Lucene.Net.QueryParsers.QueryParser.OR_OPERATOR); //var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "Desc", new PanGuAnalyzer(true)); Query query = qp.Parse(q); Hits hits = search.Search(query); // 新的查询 TopDocs newHits = search.Search(query, 100); ScoreDoc[] scoreDocs = newHits.ScoreDocs; for (int i = 0; i < scoreDocs.Length; i++) { Document document = search.Doc(scoreDocs[i].doc); document.GetField("id").StringValue(); } recCount = hits.Length(); int j = (pageIndex - 1) * pageSize; for (int i = 0; i <= hits.Length() - 1; i++) { hits.Doc(i).GetField("Desc").StringValue(); } search.Close(); }
private Result GetSearchResults(string searchQuery, string[] searchFields, string sortField, bool sortDescending, int count, int skip, ApprovalState approvalState = ApprovalState.Any) { var reader = GetIndexReader(); var searcher = GetIndexSearcher(reader); string sortFieldName; if (string.IsNullOrWhiteSpace(sortField)) { sortField = sortFieldName = CreatedField; sortDescending = true; } else if (sortField == CreatedField) { sortFieldName = CreatedField; } else { sortFieldName = FieldNameForSorting(sortField); } Query query; if (string.IsNullOrWhiteSpace(searchQuery) == false && searchFields != null && searchFields.Any()) { //searchQuery = searchQuery.Replace("*", "").Replace(" ", "* ") + "*"; if (searchFields.Count() > 1) { var parser = new MultiFieldQueryParser(Version.LUCENE_29, searchFields, GetAnalyzer()); parser.SetDefaultOperator(QueryParser.Operator.AND); Log.Info("searchQuery =" + searchQuery, null); try { query = parser.Parse(searchQuery.Trim()); } catch (ParseException ex) { Log.Error(ex, ex.Message, null); query = parser.Parse(QueryParser.Escape(searchQuery.Trim())); } } else { query = new QueryParser(Version.LUCENE_29, searchFields.FirstOrDefault(), GetAnalyzer()).Parse(searchQuery); } } else { query = approvalState == ApprovalState.Any ? new MatchAllDocsQuery() : (Query) new TermQuery(new Term(ApprovalField, approvalState.ToString())); } var docs = searcher.Search( query, null, reader.MaxDoc(), new Sort(new SortField(sortFieldName, SortField.STRING, sortDescending)) ); var scoreDocs = docs.ScoreDocs; var rows = new List <Row>(); for (var i = skip; i < (skip + count) && i < scoreDocs.Length; i++) { if (reader.IsDeleted(scoreDocs[i].doc)) { continue; } var doc = searcher.Doc(scoreDocs[i].doc); var row = ParseRow(doc); rows.Add(row); } searcher.Close(); reader.Close(); return(new Result(scoreDocs.Count(), rows, sortField, sortDescending)); }
public static SearchResultDataSet <Article> Search(ArticleQuery query) { //触发事件 GlobalEvents.UserSearch(query.Title); //索引文件不存在时,返回null if (!GlobalSettings.CheckFileExist(PhysicalIndexDirectory)) { return(new SearchResultDataSet <Article>()); } DateTime startTime = DateTime.Now; BooleanQuery currentQuery = new BooleanQuery(); //CategoryID if (query.CategoryID.HasValue && query.CategoryID.Value != 0) { Term categoryIDTearm = new Term(NewsIndexField.CategoryID, query.CategoryID.ToString()); Query categoryIDQuery = new TermQuery(categoryIDTearm); currentQuery.Add(categoryIDQuery, BooleanClause.Occur.MUST); } //KeyWord if (!string.IsNullOrEmpty(query.Title)) { query.Title = SearchHelper.LuceneKeywordsScrubber(query.Title); if (!string.IsNullOrEmpty(query.Title)) { string[] searchFieldsForKeyword = new string[4]; searchFieldsForKeyword[0] = NewsIndexField.Title; searchFieldsForKeyword[1] = NewsIndexField.SubTitle; searchFieldsForKeyword[2] = NewsIndexField.Abstract; searchFieldsForKeyword[3] = NewsIndexField.Keywords; MultiFieldQueryParser articleWordQueryParser = new MultiFieldQueryParser(searchFieldsForKeyword, SearchHelper.GetChineseAnalyzer()); articleWordQueryParser.SetLowercaseExpandedTerms(true); articleWordQueryParser.SetDefaultOperator(QueryParser.OR_OPERATOR); string keyWordsSplit = SearchHelper.SplitKeywordsBySpace(query.Title); Query articleWordQuery = articleWordQueryParser.Parse(keyWordsSplit); currentQuery.Add(articleWordQuery, BooleanClause.Occur.MUST); } } //Search IndexSearcher searcher = new IndexSearcher(PhysicalIndexDirectory); Hits hits = searcher.Search(currentQuery); SearchResultDataSet <Article> articles = new SearchResultDataSet <Article>(); int pageLowerBound = query.PageIndex * query.PageSize; int pageUpperBound = pageLowerBound + query.PageSize; if (pageUpperBound > hits.Length()) { pageUpperBound = hits.Length(); } //HighLight PanGu.HighLight.Highlighter highlighter = null; if (!string.IsNullOrEmpty(query.Title)) { highlighter = new PanGu.HighLight.Highlighter(new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"#c60a00\">", "</font>"), new PanGu.Segment()); highlighter.FragmentSize = 100; } for (int i = pageLowerBound; i < pageUpperBound; i++) { Article item = ConvertDocumentToArticle(hits.Doc(i)); if (!string.IsNullOrEmpty(query.Title)) { string bestBody = null; if (!string.IsNullOrEmpty(item.Abstract) && item.Abstract.Length > MaxNumFragmentsRequired) { bestBody = highlighter.GetBestFragment(query.Title, item.Abstract); } if (!string.IsNullOrEmpty(bestBody)) { item.Abstract = bestBody; } else { item.Abstract = HtmlHelper.TrimHtml(item.Abstract, 100); } string bestSubject = null; if (!string.IsNullOrEmpty(item.Title) && item.Title.Length > MaxNumFragmentsRequired) { bestSubject = highlighter.GetBestFragment(query.Title, item.Title); } if (!string.IsNullOrEmpty(bestSubject)) { item.Title = bestSubject; } } articles.Records.Add(item); } searcher.Close(); articles.TotalRecords = hits.Length(); DateTime endTime = DateTime.Now; articles.SearchDuration = (endTime.Ticks - startTime.Ticks) / 1E7f; articles.PageIndex = query.PageIndex; articles.PageSize = query.PageSize; return(articles); }
/// <summary> /// Searches the specified phrase in the specified search fields. /// </summary> /// <param name="wiki">The wiki.</param> /// <param name="searchFields">The search fields.</param> /// <param name="phrase">The phrase to search.</param> /// <param name="searchOption">The search options.</param> /// <returns>A list of <see cref="SearchResult"/> items.</returns> public static List <SearchResult> Search(string wiki, SearchField[] searchFields, string phrase, SearchOptions searchOption) { IIndexDirectoryProviderV40 indexDirectoryProvider = Collectors.CollectorsBox.GetIndexDirectoryProvider(wiki); Analyzer analyzer = new SimpleAnalyzer(); IndexSearcher searcher = new IndexSearcher(indexDirectoryProvider.GetDirectory(), false); string[] searchFieldsAsString = (from f in searchFields select f.AsString()).ToArray(); MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, searchFieldsAsString, analyzer); if (searchOption == SearchOptions.AllWords) { queryParser.SetDefaultOperator(QueryParser.Operator.AND); } if (searchOption == SearchOptions.AtLeastOneWord) { queryParser.SetDefaultOperator(QueryParser.Operator.OR); } try { Query query = queryParser.Parse(phrase); TopDocs topDocs = searcher.Search(query, 100); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<b class=\"searchkeyword\">", "</b>"), new QueryScorer(query)); List <SearchResult> searchResults = new List <SearchResult>(topDocs.totalHits); for (int i = 0; i < Math.Min(100, topDocs.totalHits); i++) { Document doc = searcher.Doc(topDocs.scoreDocs[i].doc); SearchResult result = new SearchResult(); result.DocumentType = DocumentTypeFromString(doc.GetField(SearchField.DocumentType.AsString()).StringValue()); result.Relevance = topDocs.scoreDocs[i].score * 100; switch (result.DocumentType) { case DocumentType.Page: PageDocument page = new PageDocument(); page.Wiki = doc.GetField(SearchField.Wiki.AsString()).StringValue(); page.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue(); page.Title = doc.GetField(SearchField.Title.AsString()).StringValue(); TokenStream tokenStream1 = analyzer.TokenStream(SearchField.Title.AsString(), new StringReader(page.Title)); page.HighlightedTitle = highlighter.GetBestFragments(tokenStream1, page.Title, 3, " [...] "); page.Content = doc.GetField(SearchField.Content.AsString()).StringValue(); tokenStream1 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(page.Content)); page.HighlightedContent = highlighter.GetBestFragments(tokenStream1, page.Content, 3, " [...] "); result.Document = page; break; case DocumentType.Message: MessageDocument message = new MessageDocument(); message.Wiki = doc.GetField(SearchField.Wiki.AsString()).StringValue(); message.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue(); message.DateTime = DateTime.Parse(doc.GetField(SearchField.MessageDateTime.AsString()).StringValue()); message.Subject = doc.GetField(SearchField.Title.AsString()).StringValue(); message.Body = doc.GetField(SearchField.Content.AsString()).StringValue(); TokenStream tokenStream2 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(message.Body)); message.HighlightedBody = highlighter.GetBestFragments(tokenStream2, message.Body, 3, " [...] "); result.Document = message; break; case DocumentType.Attachment: PageAttachmentDocument attachment = new PageAttachmentDocument(); attachment.Wiki = doc.GetField(SearchField.Wiki.AsString()).StringValue(); attachment.PageFullName = doc.GetField(SearchField.PageFullName.AsString()).StringValue(); attachment.FileName = doc.GetField(SearchField.FileName.AsString()).StringValue(); attachment.FileContent = doc.GetField(SearchField.FileContent.AsString()).StringValue(); TokenStream tokenStream3 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(attachment.FileContent)); attachment.HighlightedFileContent = highlighter.GetBestFragments(tokenStream3, attachment.FileContent, 3, " [...] "); result.Document = attachment; break; case DocumentType.File: FileDocument file = new FileDocument(); file.Wiki = doc.GetField(SearchField.Wiki.AsString()).StringValue(); file.FileName = doc.GetField(SearchField.FileName.AsString()).StringValue(); file.FileContent = doc.GetField(SearchField.FileContent.AsString()).StringValue(); TokenStream tokenStream4 = analyzer.TokenStream(SearchField.Content.AsString(), new StringReader(file.FileContent)); file.HighlightedFileContent = highlighter.GetBestFragments(tokenStream4, file.FileContent, 3, " [...]"); result.Document = file; break; } searchResults.Add(result); } searcher.Close(); return(searchResults); } catch (ParseException) { return(new List <SearchResult>(0)); } }
} // constructor /// <summary> /// Searches the keyword index using the keywordQuery. /// /// See http://www.dotlucene.net/documentation/QuerySyntax.html for the format of the keywordQuery. /// /// This function will return a fully-filled array of IndexableFileInfo objects. /// </summary> /// <param name="keywordQuery"></param> /// <param name="queryForHighlighter"></param> /// <returns></returns> public IndexableFileInfo[] doSearch(string keywordQuery, string queryForHighlighter) { IndexSearcher searcher; IndexReader indexReader; try { FSDirectory indexDir = FSDirectory.GetDirectory(this.luceneIndexDir, false); indexReader = IndexReader.Open(indexDir); searcher = new IndexSearcher(indexReader); } catch { // if the luceneIndexDir does not contain index files (yet), IndexSearcher // throws a nice Exception. return(new IndexableFileInfo[0]); } List <IndexableFileInfo> arrayList = new List <IndexableFileInfo>(); try { string Query = keywordQuery; if (Query == String.Empty) { return(new IndexableFileInfo[0]); } string HighlighterQuery = queryForHighlighter; // -- weirdly enough, when the query is empty, an exception is thrown during the QueryParser.Parse // this hack gets around that. if (HighlighterQuery == String.Empty) { HighlighterQuery = Guid.NewGuid().ToString(); } // parse the query, "text" is the default field to search // note: use the StandardAnalyzer! (the SimpleAnalyzer doesn't work correctly when searching by fields that are integers!) // MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new string[] { "title", "contents" }, new hatWebPortalAnalyzer()); MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new string[] { "title", "contents" }, new SimpleAnalyzer()); queryParser.SetDefaultOperator(QueryParser.AND_OPERATOR); Query query = queryParser.Parse(Query); QueryParser highlightQueryParser = new QueryParser("contents", new hatWebPortalAnalyzer()); Query highlighterQuery = highlightQueryParser.Parse(HighlighterQuery); query = searcher.Rewrite(query); // is this needed?? " Expert: called to re-write queries into primitive queries." // search Hits hits = searcher.Search(query, Sort.RELEVANCE); // create highlighter Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), new QueryScorer(highlighterQuery)); // -- go through hits and return results for (int i = 0; i < hits.Length(); i++) { Document d = hits.Doc(i); string filename = d.Get("filename"); string plainText = d.Get("contents"); string title = d.Get("title"); string sectionName = d.Get("SectionName"); string filenameParams = d.Get("filenameParams"); bool contentIsPageSummary = Convert.ToBoolean(d.Get("contentIsPageSummary")); double score = Convert.ToDouble(hits.Score(i)); DateTime lastModified = DateTools.StringToDate(d.Get("LastModified")); TokenStream tokenStream = new hatWebPortalAnalyzer().TokenStream("contents", new StringReader(plainText)); string fragment = plainText; if (!contentIsPageSummary) { fragment = highlighter.GetBestFragments(tokenStream, plainText, 2, "..."); } IndexableFileInfo newHit = new IndexableFileInfo(filename, filenameParams, title, fragment, sectionName, lastModified, contentIsPageSummary, score); arrayList.Add(newHit); } // for } finally { searcher.Close(); indexReader.Close(); } return(arrayList.ToArray()); } // SearchActiveDocument
private SearchEngineResponse PerformQuery(ICollection <SearchEngineResult> list, Query queryOrig, int start, int max, int blogId, int entryId, SearchParameters parameters, string sortField, bool isDescending) { var query = new BooleanQuery(); if (queryOrig != null) { query.Add(queryOrig, BooleanClause.Occur.MUST); } if (parameters != null) { if (parameters.CreatedByMe.HasValue) { query.Add(new TermQuery(new Term(CreatedByMe, Convert.ToInt32(parameters.CreatedByMe).ToString())), BooleanClause.Occur.MUST); } if (parameters.FlavorId.HasValue) { query.Add(GetIntRangeQuery(FlavorId, parameters.FlavorId.Value), BooleanClause.Occur.MUST); } if (parameters.IsRatedByMe.HasValue && parameters.IsRatedByMe.Value) { query.Add(GetIntRangeQuery(MyRating, 1, 5), BooleanClause.Occur.MUST); } if (parameters.IsRatedByFriends.HasValue && parameters.IsRatedByFriends.Value) { query.Add(GetIntRangeQuery(FriendRating, 1, 5), BooleanClause.Occur.MUST); } if (parameters.MyRating.HasValue) { query.Add(GetIntRangeQuery(MyRating, parameters.MyRating.Value), BooleanClause.Occur.MUST); } if (parameters.FriendRating.HasValue) { query.Add(GetIntRangeQuery(FriendRating, parameters.FriendRating.Value), BooleanClause.Occur.MUST); } if (parameters.IsUpToDate) { query.Add(new TermQuery(new Term(IsUpToDate, Convert.ToInt32(parameters.IsUpToDate).ToString())), BooleanClause.Occur.MUST); } if (parameters.TrendKeywords != null && parameters.TrendKeywords.Length > 0) { // Create a query to filter by multiple fields one phrase is required to appear. QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, new string[] { Categories, Silouhettes, Colors, Fabrics, Seasons, EventTypes }, _analyzer); parser.SetDefaultOperator(QueryParser.Operator.OR); Query keywordsQuery = parser.Parse(string.Join(" ", parameters.TrendKeywords)); query.Add(keywordsQuery, BooleanClause.Occur.MUST); } if (parameters.EventTypes != null && parameters.EventTypes.Length > 0) { QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, EventTypes, _analyzer); parser.SetDefaultOperator(QueryParser.Operator.AND); string joined = string.Join(" ", parameters.EventTypes.Select(x => "\"" + x + "\"").ToArray()); query.Add(parser.Parse(joined), BooleanClause.Occur.MUST); } if (parameters.Seasons != null && parameters.Seasons.Length > 0) { QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, Seasons, _analyzer); parser.SetDefaultOperator(QueryParser.Operator.OR); query.Add(parser.Parse(string.Join(" ", parameters.Seasons)), BooleanClause.Occur.MUST); } //// Query to filter only by the colors field (maybe is faster or useful) if (!String.IsNullOrEmpty(parameters.Color)) { query.Add(new TermQuery(new Term(Colors, parameters.Color)), BooleanClause.Occur.MUST); } } Hits hits; if (sortField != null) { var sortTerm = new SortField(sortField, SortField.INT, isDescending); var sort = new Sort(sortTerm); hits = Searcher.Search(query, null, sort); } else { hits = Searcher.Search(query); } int length = hits.Length(); int resultsAdded = 0; if (start < length) { for (int i = start; i < length && resultsAdded < max; i++) { SearchEngineResult result = CreateSearchResult(hits.Doc(i), 0); list.Add(result); resultsAdded++; } } return(new SearchEngineResponse { TotalCount = length, Results = list }); }