private static Query ParseQuery(SearchFilter searchFilter) { // 1. parse the query into field clauses and general terms // We imagine that mostly, field clauses are meant to 'filter' results found searching for general terms. // The resulting clause collections may be empty. var queryParser = new NuGetQueryParser(); var clauses = queryParser.Parse(searchFilter.SearchTerm).Select(StandardizeSearchTerms).ToList(); var fieldSpecificTerms = clauses.Where(a => a.Field != null); var generalTerms = clauses.Where(a => a.Field == null); // Convert terms to appropriate Lucene Query objects var analyzer = new PerFieldAnalyzer(); var fieldSpecificQueries = fieldSpecificTerms .Select(c => AnalysisHelper.GetFieldQuery(analyzer, c.Field, c.TermOrPhrase)) .Where(q => !IsDegenerateQuery(q)) .ToList(); var generalQueries = generalTerms .Select(c => AnalysisHelper.GetMultiFieldQuery(analyzer, Fields, c.TermOrPhrase)) .Where(q => !IsDegenerateQuery(q)) .ToList(); if (fieldSpecificQueries.Count == 0 && generalQueries.Count == 0) { return(new MatchAllDocsQuery()); } // At this point we try to detect user intent... // a) General search? [foo bar] // b) Id-targeted search? [id:Foo bar] // c) Other Field-targeted search? [author:Foo bar] bool doExactId = !fieldSpecificQueries.Any(); Query generalQuery = BuildGeneralQuery(doExactId, searchFilter.SearchTerm, analyzer, generalTerms, generalQueries); // IF field targeting is done, we should basically want to AND their field specific queries with all other query terms if (fieldSpecificQueries.Any()) { var combinedQuery = new BooleanQuery(); if (!IsDegenerateQuery(generalQuery)) { combinedQuery.Add(generalQuery, Occur.MUST); } foreach (var fieldQuery in fieldSpecificQueries) { if (!IsDegenerateQuery(fieldQuery)) { combinedQuery.Add(fieldQuery, Occur.MUST); } } generalQuery = combinedQuery; } return(generalQuery); }
private void EnsureIndexWriterCore(bool creatingIndex) { var analyzer = new PerFieldAnalyzer(); _indexWriter = new IndexWriter(_directory, analyzer, create: creatingIndex, mfl: IndexWriter.MaxFieldLength.UNLIMITED); // Should always be add, due to locking var got = WriterCache.GetOrAdd(_directory, _indexWriter); Debug.Assert(got == _indexWriter); }
private void EnsureIndexWriterCore(bool creatingIndex) { var analyzer = new PerFieldAnalyzer(); try { _indexWriter = new IndexWriter(_directory, analyzer, create: creatingIndex, mfl: IndexWriter.MaxFieldLength.UNLIMITED); } catch (LockObtainFailedException ex) { DirectoryInfo luceneIndexDirectory = new DirectoryInfo(LuceneCommon.IndexDirectory); FSDirectory luceneFSDirectory = FSDirectory.Open(luceneIndexDirectory, new Lucene.Net.Store.SimpleFSLockFactory(luceneIndexDirectory)); IndexWriter.Unlock(luceneFSDirectory); _indexWriter = new IndexWriter(_directory, analyzer, create: creatingIndex, mfl: IndexWriter.MaxFieldLength.UNLIMITED); // Log but swallow the exception ErrorSignal.FromCurrentContext().Raise(ex); } // Should always be add, due to locking var got = WriterCache.GetOrAdd(_directory, _indexWriter); Debug.Assert(got == _indexWriter); }
private static Query ParseQuery(SearchFilter searchFilter) { // 1. parse the query into field clauses and general terms // We imagine that mostly, field clauses are meant to 'filter' results found searching for general terms. // The resulting clause collections may be empty. var queryParser = new NuGetQueryParser(); var clauses = queryParser.Parse(searchFilter.SearchTerm).Select(StandardizeSearchTerms).ToList(); var fieldSpecificTerms = clauses.Where(a => a.Field != null); var generalTerms = clauses.Where(a => a.Field == null); // Convert terms to appropriate Lucene Query objects var analyzer = new PerFieldAnalyzer(); var fieldSpecificQueries = fieldSpecificTerms .Select(c => AnalysisHelper.GetFieldQuery(analyzer, c.Field, c.TermOrPhrase)) .Where(q => !IsDegenerateQuery(q)) .ToList(); var generalQueries = generalTerms .Select(c => AnalysisHelper.GetMultiFieldQuery(analyzer, Fields, c.TermOrPhrase)) .Where(q => !IsDegenerateQuery(q)) .ToList(); if (fieldSpecificQueries.Count == 0 && generalQueries.Count == 0) { return new MatchAllDocsQuery(); } // At this point we try to detect user intent... // a) General search? [foo bar] // b) Id-targeted search? [id:Foo bar] // c) Other Field-targeted search? [author:Foo bar] bool doExactId = !fieldSpecificQueries.Any(); Query generalQuery = BuildGeneralQuery(doExactId, searchFilter.SearchTerm, analyzer, generalTerms, generalQueries); // IF field targeting is done, we should basically want to AND their field specific queries with all other query terms if (fieldSpecificQueries.Any()) { var combinedQuery = new BooleanQuery(); if (!IsDegenerateQuery(generalQuery)) { combinedQuery.Add(generalQuery, Occur.MUST); } foreach (var fieldQuery in fieldSpecificQueries) { if (!IsDegenerateQuery(fieldQuery)) { combinedQuery.Add(fieldQuery, Occur.MUST); } } generalQuery = combinedQuery; } return generalQuery; }
private static Query ParseQuery(SearchFilter searchFilter) { if (String.IsNullOrWhiteSpace(searchFilter.SearchTerm)) { return(new MatchAllDocsQuery()); } var fields = new[] { "Id", "Title", "Tags", "Description", "Author" }; //var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); var analyzer = new PerFieldAnalyzer(); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields, analyzer); // All terms in the multi-term query appear in at least one of the fields. var conjuctionQuery = new BooleanQuery(); conjuctionQuery.Boost = 2.0f; // Some terms in the multi-term query appear in at least one of the fields. var disjunctionQuery = new BooleanQuery(); disjunctionQuery.Boost = 0.1f; // Escape the entire term we use for exact searches. var escapedSearchTerm = Escape(searchFilter.SearchTerm).Replace("id\\:", string.Empty).Replace("author\\:", string.Empty).Replace("tag\\:", string.Empty); // Do not escape id when used against Id-Exact. The results will return incorrectly var idExactSearchTerm = searchFilter.SearchTerm.Replace("id:", string.Empty).Replace("author:", string.Empty).Replace("tag:", string.Empty); var exactIdQuery = new TermQuery(new Term("Id-Exact", idExactSearchTerm)); exactIdQuery.Boost = 7.0f; var relatedIdQuery = new WildcardQuery(new Term("Id-Exact", idExactSearchTerm + ".*")); relatedIdQuery.Boost = 6.5f; var startsIdQuery = new WildcardQuery(new Term("Id-Exact", idExactSearchTerm + "*")); startsIdQuery.Boost = 6.0f; var wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + idExactSearchTerm + "*")); wildCardIdQuery.Boost = 3.0f; var exactTitleQuery = new TermQuery(new Term("Title-Exact", escapedSearchTerm)); exactTitleQuery.Boost = 6.5f; var startsTitleQuery = new WildcardQuery(new Term("Title-Exact", escapedSearchTerm + "*")); startsTitleQuery.Boost = 5.5f; var wildCardTitleQuery = new WildcardQuery(new Term("Title-Exact", "*" + escapedSearchTerm + "*")); wildCardTitleQuery.Boost = 2.5f; // Suffix wildcard search e.g. jquer* var wildCardQuery = new BooleanQuery(); wildCardQuery.Boost = 0.5f; // GetSearchTerms() escapes the search terms, so do not escape again var terms = GetSearchTerms(searchFilter.SearchTerm).ToList(); bool onlySearchById = searchFilter.ByIdOnly || searchFilter.ExactIdOnly || terms.AnySafe(t => t.StartsWith("id\\:")); bool onlySearchByExactId = searchFilter.ExactIdOnly; bool onlySearchByAuthor = terms.AnySafe(t => t.StartsWith("author\\:")); bool onlySearchByTag = terms.AnySafe(t => t.StartsWith("tag\\:")); bool searchLimiter = onlySearchById || onlySearchByAuthor || onlySearchByTag; foreach (var term in terms) { var localTerm = term.Replace("id\\:", string.Empty).Replace("author\\:", string.Empty).Replace("tag\\:", string.Empty); var termQuery = queryParser.Parse(localTerm); conjuctionQuery.Add(termQuery, Occur.MUST); disjunctionQuery.Add(termQuery, Occur.SHOULD); foreach (var field in fields) { if (onlySearchById && field != "Id") { continue; } if (onlySearchByAuthor && field != "Author") { continue; } if (onlySearchByTag && field != "Tags") { continue; } var wildCardTermQuery = new WildcardQuery(new Term(field, localTerm + "*")); wildCardTermQuery.Boost = searchLimiter ? 7.0f : 0.7f; wildCardQuery.Add(wildCardTermQuery, Occur.MUST); } } // Create an OR of all the queries that we have var combinedQuery = conjuctionQuery.Combine(new Query[] { exactIdQuery, relatedIdQuery, exactTitleQuery, startsIdQuery, startsTitleQuery, wildCardIdQuery, wildCardTitleQuery, conjuctionQuery, wildCardQuery }); if (onlySearchByExactId) { combinedQuery = conjuctionQuery.Combine(new Query[] { exactIdQuery }); } else if (onlySearchById) { combinedQuery = conjuctionQuery.Combine(new Query[] { exactIdQuery, relatedIdQuery, startsIdQuery, wildCardIdQuery, wildCardQuery }); } else if (onlySearchByAuthor || onlySearchByTag) { combinedQuery = conjuctionQuery.Combine(new Query[] { wildCardQuery }); } //if (searchFilter.SortProperty == SortProperty.Relevance) //{ // // If searching by relevance, boost scores by download count. // var downloadCountBooster = new FieldScoreQuery("DownloadCount", FieldScoreQuery.Type.INT); // return new CustomScoreQuery(combinedQuery, downloadCountBooster); //} return(combinedQuery); }