/// <summary> /// Adds the query. /// </summary> /// <param name="fieldName">Name of the field.</param> /// <param name="query">The query.</param> /// <param name="filter">The filter.</param> protected void AddQuery(string fieldName, BooleanQuery query, StringCollection filter) { fieldName = fieldName.ToLower(); if (filter.Count > 0) { if (filter.Count != 1) { var booleanQuery = new BooleanQuery(); var containsFilter = false; foreach (var index in filter) { if (String.IsNullOrEmpty(index)) { continue; } var nodeQuery = new WildcardQuery(new Term(fieldName, index)); booleanQuery.Add(nodeQuery, Occur.SHOULD); containsFilter = true; } if (containsFilter) { query.Add(booleanQuery, Occur.MUST); } } else { if (!String.IsNullOrEmpty(filter[0])) { this.AddQuery(fieldName, query, filter[0].ToLower()); } } } }
/// <summary> Simple similarity query generators. /// Takes every unique word and forms a boolean query where all words are optional. /// After you get this you'll use to to query your <see cref="IndexSearcher"/> for similar docs. /// The only caveat is the first hit returned <b>should be</b> your source document - you'll /// need to then ignore that. /// /// <p/> /// /// So, if you have a code fragment like this: /// <br/> /// <code> /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null); /// </code> /// /// <p/> /// /// The query returned, in string form, will be <c>'(i use lucene to search fast searchers are good')</c>. /// /// <p/> /// The philosophy behind this method is "two documents are similar if they share lots of words". /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words. /// /// <P/> /// This method is fail-safe in that if a long 'body' is passed in and /// <see cref="BooleanQuery.Add"/> (used internally) /// throws /// <see cref="BooleanQuery.TooManyClauses"/>, the /// query as it is will be returned. /// </summary> /// <param name="body">the body of the document you want to find similar documents to /// </param> /// <param name="a">the analyzer to use to parse the body /// </param> /// <param name="field">the field you want to search on, probably something like "contents" or "body" /// </param> /// <param name="stop">optional set of stop words to ignore /// </param> /// <returns> a query with all unique words in 'body' /// </returns> /// <throws> IOException this can't happen... </throws> public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, ISet<string> stop) { TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body)); ITermAttribute termAtt = ts.AddAttribute<ITermAttribute>(); BooleanQuery tmp = new BooleanQuery(); ISet<string> already = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<string>(); // ignore dups while (ts.IncrementToken()) { String word = termAtt.Term; // ignore opt stop words if (stop != null && stop.Contains(word)) continue; // ignore dups if (already.Contains(word)) continue; already.Add(word); // add to query TermQuery tq = new TermQuery(new Term(field, word)); try { tmp.Add(tq, Occur.SHOULD); } catch (BooleanQuery.TooManyClauses) { // fail-safe, just return what we have, not the end of the world break; } } return tmp; }
/// <summary> /// Builds the query. /// </summary> /// <param name="criteria">The criteria.</param> /// <returns></returns> public virtual object BuildQuery(ISearchCriteria criteria) { var query = new BooleanQuery(); if (criteria.CurrentFilterValues != null) { for (var index = 0; index < criteria.CurrentFilterFields.Length; index++) { var filter = criteria.CurrentFilters.ElementAt(index); var value = criteria.CurrentFilterValues.ElementAt(index); var field = criteria.CurrentFilterFields.ElementAt(index); // Skip currencies that are not part of the filter if (filter.GetType() == typeof(PriceRangeFilter)) // special filtering { var currency = (filter as PriceRangeFilter).Currency; if (!currency.Equals(criteria.Currency, StringComparison.OrdinalIgnoreCase)) { continue; } } var filterQuery = filter.GetType() == typeof(PriceRangeFilter) ? LuceneQueryHelper.CreateQuery(criteria, field, value as RangeFilterValue) : LuceneQueryHelper.CreateQuery(field, value); if (filterQuery != null) { query.Add(filterQuery, Occur.MUST); } } } return query; }
public virtual void TestBooleanMust() { BooleanQuery expected = new BooleanQuery(); expected.Add(new TermQuery(new Term("field", "foo")), BooleanClause.Occur.MUST); expected.Add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.MUST); QueryBuilder builder = new QueryBuilder(new MockAnalyzer(Random())); Assert.AreEqual(expected, builder.CreateBooleanQuery("field", "foo bar", BooleanClause.Occur.MUST)); }
public virtual void TestSpanNearVersusBooleanAnd() { Term t1 = RandomTerm(); Term t2 = RandomTerm(); SpanQuery[] subquery = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; SpanNearQuery q1 = new SpanNearQuery(subquery, int.MaxValue, false); BooleanQuery q2 = new BooleanQuery(); q2.Add(new TermQuery(t1), Occur.MUST); q2.Add(new TermQuery(t2), Occur.MUST); AssertSameSet(q1, q2); }
/// <summary> /// Used by <see cref="DrillSideways"/> /// </summary> internal DrillDownQuery(FacetsConfig config, Filter filter, DrillDownQuery other) { query = new BooleanQuery(true); // disable coord BooleanClause[] clauses = other.query.Clauses; if (clauses.Length == other.drillDownDims.Count) { throw new System.ArgumentException("cannot apply filter unless baseQuery isn't null; pass ConstantScoreQuery instead"); } Debug.Assert(clauses.Length == 1 + other.drillDownDims.Count, clauses.Length + " vs " + (1 + other.drillDownDims.Count)); drillDownDims.AddAll(other.drillDownDims); query.Add(new FilteredQuery(clauses[0].Query, filter), Occur.MUST); for (int i = 1; i < clauses.Length; i++) { query.Add(clauses[i].Query, Occur.MUST); } this.config = config; }
/// <summary> /// Builds the query. /// </summary> /// <param name="criteria">The criteria.</param> /// <returns></returns> public virtual object BuildQuery(ISearchCriteria criteria) { var queryBuilder = new QueryBuilder(); var queryFilter = new BooleanFilter(); var query = new BooleanQuery(); queryBuilder.Query = query; queryBuilder.Filter = queryFilter; if (criteria.CurrentFilters != null) { foreach (var filter in criteria.CurrentFilters) { // Skip currencies that are not part of the filter if (filter.GetType() == typeof(PriceRangeFilter)) // special filtering { var currency = (filter as PriceRangeFilter).Currency; if (!currency.Equals(criteria.Currency, StringComparison.OrdinalIgnoreCase)) { continue; } } var filterQuery = LuceneQueryHelper.CreateQuery(criteria, filter, Occur.SHOULD); // now add other values that should also be counted? if (filterQuery != null) { var clause = new FilterClause(filterQuery, Occur.MUST); queryFilter.Add(clause); } } } return queryBuilder; }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#: //ORIGINAL LINE: private org.hibernate.criterion.Criterion createBooleanCriterion(org.apache.lucene.search.BooleanQuery bq) throws Exception private Criterion createBooleanCriterion(BooleanQuery bq) { BooleanClause[] clauses = bq.Clauses; Criterion prevCriterion = null; Criterion result = null; foreach (BooleanClause clause in clauses) { Query q = clause.Query; Criterion criterion = null; if (q is TermQuery) { criterion = createTermCriterion((TermQuery)q); } else if (q is PrefixQuery) { criterion = createPrefixCriterion((PrefixQuery)q); } else if (q is BooleanQuery) { criterion = createBooleanCriterion((BooleanQuery)q); } else if (q is PhraseQuery) { criterion = createPhraseCriterion((PhraseQuery)q); } else { //JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method: Console.WriteLine("Missed In-Inner Query : " + q.GetType().FullName); continue; } if (clause.Occur.Equals(BooleanClause.Occur.MUST_NOT)) { criterion = Restrictions.not(criterion); } else if (prevCriterion != null) { if (clause.Occur.Equals(BooleanClause.Occur.MUST)) { result = Restrictions.and(prevCriterion, criterion); criterion = result; } else if (clause.Occur.Equals(BooleanClause.Occur.SHOULD)) { result = Restrictions.or(prevCriterion, criterion); criterion = result; } } // System.out.println("MUST: " +clause.getOccur().equals(BooleanClause.Occur.MUST)); // AND // System.out.println("SHOULD: " +clause.getOccur().equals(BooleanClause.Occur.SHOULD)); // OR // System.out.println("MUST NOT: " +clause.getOccur().equals(BooleanClause.Occur.MUST_NOT)); // NOT prevCriterion = criterion; } if (result == null) { result = prevCriterion; } return(result); }
/// <summary> /// TODO refactor /// </summary> public IEnumerable <SearchResultViewModel> Search(IndexViewModel indexDirectory, string searchString, bool matchCase, bool matchWholeWord, bool useWildcards, out Task <IEnumerable <SearchResultViewModel> > ongoingSearchTask, CancellationToken cancelToken, IEnumerable <string> fileFilters = null) { ongoingSearchTask = null; if (!LuceneHelper.IsValidIndexDirectory(indexDirectory.IndexDirectory) || string.IsNullOrWhiteSpace(searchString)) { return(Enumerable.Empty <SearchResultViewModel>()); } List <SearchResultViewModel> results = new List <SearchResultViewModel>(); IsSearching = true; try { using (var reader = IndexReader.Open(FSDirectory.Open(indexDirectory.IndexDirectory), true)) using (var searcher = new IndexSearcher(reader)) { BooleanQuery resultQuery = new BooleanQuery(); if (matchWholeWord) { resultQuery.Add(new BooleanClause(BuildMatchWholeWordContentQuery(searchString, matchCase, useWildcards), Occur.MUST)); } else { resultQuery.Add(new BooleanClause(BuildMatchAnywhereQuery(reader, searchString, matchCase, useWildcards), Occur.MUST)); } //TODO use this instead of manual filter. //Doesn't work now because recursive booleanQuery doesn't work and can't say to match one of following filters. //Add fileFilter query //if (fileFilters != null && fileFilters.Any()) //{ // foreach (var query in BuildFileFilterQueries(fileFilters)) // resultQuery.Add(new BooleanClause(query, Occur.MUST)); //} //Add blacklist query var blacklist = Settings.CodeIDXSettings.Blacklist.BlacklistDirectories; if (ApplicationView.UserSettings.IsBlacklistEnabled) { foreach (var curClause in BuildBlacklistQueryClauses(blacklist)) { resultQuery.Add(curClause); } } Sort sort = new Sort(new SortField[] { new SortField(Constants.IndexFields.Directory, SortField.STRING), new SortField(Constants.IndexFields.Filename, SortField.STRING), new SortField(Constants.IndexFields.Extension, SortField.STRING) }); TopFieldDocs resultCollector = searcher.Search(resultQuery, null, Int32.MaxValue, sort); string adjustedSearchString = matchCase ? searchString : searchString.ToLower(); IEnumerable <string> patternParts = null; if (useWildcards) { patternParts = GetWildcardPatternParts(adjustedSearchString); } //kein Parallel.Foreach verwenden! //durch die grosse Anzahl der Threads die erstellt und verworfen werden ist die Performance sehr schlecht! int lastMatchIndex = 0; foreach (var match in resultCollector.ScoreDocs) { if (cancelToken.IsCancellationRequested) { return(results); } var curDoc = reader.Document(match.Doc); string docDirectory = curDoc.Get(Constants.IndexFields.Directory); string docFilename = curDoc.Get(Constants.IndexFields.Filename); string docExtension = curDoc.Get(Constants.IndexFields.Extension); string documentFilename = Path.Combine(docDirectory, docFilename) + docExtension; if (fileFilters != null && !fileFilters.Contains(documentFilename)) { continue; } IEnumerable <LineMatch> matchingLines = GetMatchingLines(new GetMatchingLinesArgs(documentFilename, adjustedSearchString, patternParts, matchCase, matchWholeWord)); bool isFirst = true; foreach (var lineMatch in matchingLines) { results.Add(new SearchResultViewModel ( isFirst, docDirectory, docFilename, docExtension, lineMatch.LineNumber, lineMatch.Line, lineMatch.Highlights )); isFirst = false; } if (results.Count >= CodeIDXSettings.Search.PageSize) { var docNumbers = resultCollector.ScoreDocs.Select(cur => cur.Doc).ToList(); ongoingSearchTask = Task.Run <IEnumerable <SearchResultViewModel> >(() => GetRemainingLazyDocuments(indexDirectory, docNumbers, lastMatchIndex, adjustedSearchString, patternParts, matchCase, matchWholeWord, cancelToken, fileFilters)); return(results); } lastMatchIndex++; } } } catch { } finally { if (ongoingSearchTask == null) { IsSearching = false; } } return(results); }
public virtual void TestBoolean2() { BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term("field", "sevento")), BooleanClause.Occur.MUST); query.Add(new TermQuery(new Term("field", "sevenly")), BooleanClause.Occur.MUST); CheckHits(query, new int[] { }); }
/// <summary> /// Creates a new <see cref="DrillDownQuery"/> over the given base query. Can be /// <c>null</c>, in which case the result <see cref="Query"/> from /// <see cref="Rewrite(IndexReader)"/> will be a pure browsing query, filtering on /// the added categories only. /// </summary> public DrillDownQuery(FacetsConfig config, Query baseQuery) { query = new BooleanQuery(true); // disable coord if (baseQuery != null) { query.Add(baseQuery, Occur.MUST); } this.config = config; }
public override Query Rewrite(IndexReader reader, MultiTermQuery query) { FilteredTermEnum enumerator = query.GetEnum(reader); BooleanQuery result = new BooleanQuery(true); int count = 0; try { do { Term t = enumerator.Term; if (t != null) { TermQuery tq = new TermQuery(t); // found a match tq.Boost = query.Boost * enumerator.Difference(); // set the boost result.Add(tq, Occur.SHOULD); // add to query count++; } } while (enumerator.Next()); } finally { enumerator.Close(); } query.IncTotalNumberOfTerms(count); return result; }
private static Query BuildGeneralQuery( bool doExactId, string originalSearchText, Analyzer analyzer, IEnumerable <NuGetSearchTerm> generalTerms, IEnumerable <Query> generalQueries) { // All terms in the multi-term query appear in at least one of the target fields. var conjuctionQuery = new BooleanQuery(); conjuctionQuery.Boost = 2.0f; // Some terms in the multi-term query appear in at least one of the target fields. var disjunctionQuery = new BooleanQuery(); disjunctionQuery.Boost = 0.1f; // Suffix wildcard search e.g. jquer* var wildCardQuery = new BooleanQuery(); wildCardQuery.Boost = 0.5f; string escapedExactId = originalSearchText.ToLowerInvariant(); Query exactIdQuery = null; Query wildCardIdQuery = null; if (doExactId) { exactIdQuery = new TermQuery(new Term("Id-Exact", escapedExactId)); exactIdQuery.Boost = 7.5f; wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + escapedExactId + "*")); } Query nearlyExactIdQuery = null; if (generalTerms.Any()) { string escapedApproximateId = string.Join(" ", generalTerms.Select(c => c.TermOrPhrase)); nearlyExactIdQuery = AnalysisHelper.GetFieldQuery(analyzer, "Id", escapedApproximateId); nearlyExactIdQuery.Boost = 2.0f; } foreach (var termQuery in generalQueries) { conjuctionQuery.Add(termQuery, Occur.MUST); disjunctionQuery.Add(termQuery, Occur.SHOULD); } var sanitizedTerms = generalTerms.Select(c => c.TermOrPhrase.ToLowerInvariant()); foreach (var sanitizedTerm in sanitizedTerms) { foreach (var field in Fields) { var wildCardTermQuery = new WildcardQuery(new Term(field, sanitizedTerm + "*")); wildCardTermQuery.Boost = 0.7f; wildCardQuery.Add(wildCardTermQuery, Occur.SHOULD); } } // OR of all the applicable queries var queries = new Query[] { exactIdQuery, wildCardIdQuery, nearlyExactIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }; var queriesToCombine = queries.Where(q => !IsDegenerateQuery(q)); var query = conjuctionQuery.Combine(queriesToCombine.ToArray()); return(query); }
private void DeleteSearchDocumentInternal(SearchDocument searchDocument, bool autoCommit) { var query = new BooleanQuery(); if (searchDocument.SearchTypeId > -1) { query.Add(NumericValueQuery(Constants.SearchTypeTag, searchDocument.SearchTypeId), Occur.MUST); } if (searchDocument.PortalId > -1) { query.Add(NumericValueQuery(Constants.PortalIdTag, searchDocument.PortalId), Occur.MUST); } if (searchDocument.RoleId > -1) { query.Add(NumericValueQuery(Constants.RoleIdTag, searchDocument.RoleId), Occur.MUST); } if (searchDocument.ModuleDefId > 0) { query.Add(NumericValueQuery(Constants.ModuleDefIdTag, searchDocument.ModuleDefId), Occur.MUST); } if (searchDocument.ModuleId > 0) { query.Add(NumericValueQuery(Constants.ModuleIdTag, searchDocument.ModuleId), Occur.MUST); } if (searchDocument.TabId > 0) { query.Add(NumericValueQuery(Constants.TabIdTag, searchDocument.TabId), Occur.MUST); } if (searchDocument.AuthorUserId > 0) { query.Add(NumericValueQuery(Constants.AuthorIdTag, searchDocument.AuthorUserId), Occur.MUST); } if (!string.IsNullOrEmpty(searchDocument.UniqueKey)) { query.Add(new TermQuery(new Term(Constants.UniqueKeyTag, searchDocument.UniqueKey)), Occur.MUST); } if (!string.IsNullOrEmpty(searchDocument.QueryString)) { query.Add(new TermQuery(new Term(Constants.QueryStringTag, searchDocument.QueryString)), Occur.MUST); } if (!string.IsNullOrEmpty(searchDocument.CultureCode)) { query.Add(NumericValueQuery(Constants.LocaleTag, Localization.Localization.GetCultureLanguageID(searchDocument.CultureCode)), Occur.MUST); } LuceneController.Instance.Delete(query); if (autoCommit) { Commit(); } }
public void flatten(Query sourceQuery, Dictionary <Query, Query> flatQueries) { if (sourceQuery is BooleanQuery) { BooleanQuery bq = (BooleanQuery)sourceQuery; foreach (BooleanClause clause in bq.GetClauses()) { if (!clause.IsProhibited) { flatten(clause.Query, flatQueries); } } } else if (sourceQuery is PrefixQuery) { if (!flatQueries.ContainsKey(sourceQuery)) { flatQueries.Add(sourceQuery, sourceQuery); } } else if (sourceQuery is DisjunctionMaxQuery) { DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery; foreach (Query query in dmq) { flatten(query, flatQueries); } } else if (sourceQuery is TermQuery) { if (!flatQueries.ContainsKey(sourceQuery)) { flatQueries.Add(sourceQuery, sourceQuery); } } else if (sourceQuery is PhraseQuery) { if (!flatQueries.ContainsKey(sourceQuery)) { PhraseQuery pq = (PhraseQuery)sourceQuery; if (pq.GetTerms().Length > 1) { flatQueries.Add(pq, pq); } else if (pq.GetTerms().Length == 1) { Query q = new TermQuery(pq.GetTerms()[0]); flatQueries.Add(q, q); } } } else { // Fallback to using extracted terms ISet <Term> terms = SetFactory.CreateHashSet <Term>(); try { sourceQuery.ExtractTerms(terms); } catch (NotSupportedException) { // thrown by default impl return; // ignore error and discard query } foreach (var term in terms) { flatten(new TermQuery(term), flatQueries); } } }
public static Query GenerateQuery(string fieldName, string query, Analyzer analyzer) { if (query == null) { return(null); } var resultQuery = new BooleanQuery(); var phraseQuery = new PhraseQuery { Slop = 0 }; //var phraseQueryTerms = new List<Term>(); //not much to search, only do exact match if (query.Length < 4) { phraseQuery.Add(new Term(fieldName, query)); resultQuery.Add(phraseQuery, Occur.MUST); return(resultQuery); } //add phrase match with boost, we will add the terms to the phrase below phraseQuery.Boost = 20; resultQuery.Add(phraseQuery, Occur.SHOULD); var tokenStream = analyzer.TokenStream("SearchText", new StringReader(query)); var termAttribute = tokenStream.AddAttribute <ITermAttribute>(); while (tokenStream.IncrementToken()) { var term = termAttribute.Term; //phraseQueryTerms.Add(new Term(fieldName, term)); //phraseQuery.Add(new[] { new Term(fieldName, term) }); phraseQuery.Add(new Term(fieldName, term)); var exactMatch = new TermQuery(new Term(fieldName, term)); //if the term is larger than 3, we'll do both exact match and wildcard/prefix if (term.Length >= 3) { var innerQuery = new BooleanQuery(); //add exact match with boost exactMatch.Boost = 10; innerQuery.Add(exactMatch, Occur.SHOULD); //add wildcard var pq = new PrefixQuery(new Term(fieldName, term)); //needed so that wildcard searches will return a score pq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; //new ErrorCheckingScoringBooleanQueryRewrite(); innerQuery.Add(pq, Occur.SHOULD); resultQuery.Add(innerQuery, Occur.MUST); } else { resultQuery.Add(exactMatch, Occur.MUST); } } //phraseQuery.Add(phraseQueryTerms.ToArray()); return(resultQuery.Clauses.Count > 0 ? resultQuery : null); }
/// <summary> /// 在不同的类型下再根据title和content字段中查询数据(分页) /// </summary> /// <param name="_flag">分类,传空值查询全部</param> /// <param name="keyword"></param> /// <param name="PageIndex"></param> /// <param name="PageSize"></param> /// <param name="TotalCount"></param> /// <returns></returns> public List <SearchResult> Search(string _flag, string keyword, int PageIndex, int PageSize) { if (PageIndex < 1) { PageIndex = 1; } Stopwatch st = Stopwatch.StartNew(); st.Start(); BooleanQuery bq = new BooleanQuery(); if (_flag != "") { QueryParser qpflag = new QueryParser(version, "flag", analyzer); Query qflag = qpflag.Parse(_flag); bq.Add(qflag, Occur.MUST);//与运算 } if (keyword != "") { string[] fileds = { "blogTag", "title", "content" }; //查询字段 QueryParser parser = null; // new QueryParser(version, field, analyzer);//一个字段查询 parser = new MultiFieldQueryParser(version, fileds, analyzer); //多个字段查询 Query queryKeyword = parser.Parse(keyword); bq.Add(queryKeyword, Occur.MUST); //与运算 } TopScoreDocCollector collector = TopScoreDocCollector.Create(PageIndex * PageSize, false); IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读 searcher.Search(bq, collector); if (collector == null || collector.TotalHits == 0) { //TotalCount = 0; return(null); } else { int start = PageSize * (PageIndex - 1); //结束数 int limit = PageSize; ScoreDoc[] hits = collector.TopDocs(start, limit).ScoreDocs; List <SearchResult> list = new List <SearchResult>(); int counter = 1; //TotalCount = collector.TotalHits; st.Stop(); //st.ElapsedMilliseconds;//毫秒 foreach (ScoreDoc sd in hits)//遍历搜索到的结果 { try { Document doc = searcher.Doc(sd.Doc); int id = int.Parse(doc.Get("id")); string title = doc.Get("title"); string content = doc.Get("content"); string blogTag = doc.Get("blogTag"); string url = doc.Get("url"); int flag = int.Parse(doc.Get("flag")); int clickQuantity = int.Parse(doc.Get("clickQuantity")); content = Highlight(keyword, content); //string titlehighlight = Highlight(keyword, title); //if (titlehighlight != "") title = titlehighlight; list.Add(new SearchResult(title, content, url, blogTag, id, clickQuantity, flag)); } catch (Exception ex) { Console.WriteLine(ex.Message); } counter++; } return(list); } }
public void TestQueryPhraseMapOverlap2gram() { // LUCENENET specific - altered some of the tests because // dictionaries throw KeyNotFoundException rather than returning null. BooleanQuery query = new BooleanQuery(); query.Add(toPhraseQuery(analyze("abc", F, analyzerB), F), Occur.MUST); query.Add(toPhraseQuery(analyze("bcd", F, analyzerB), F), Occur.MUST); // phraseHighlight = true, fieldMatch = true FieldQuery fq = new FieldQuery(query, true, true); IDictionary <String, QueryPhraseMap> map = fq.rootMaps; assertEquals(1, map.size()); assertFalse(map.TryGetValue(null, out _)); // assertNull(map[null]); assertNotNull(map[F]); QueryPhraseMap qpm = map[F]; assertEquals(2, qpm.subMap.size()); // "ab bc" assertNotNull(qpm.subMap["ab"]); QueryPhraseMap qpm2 = qpm.subMap["ab"]; assertFalse(qpm2.terminal); assertEquals(1, qpm2.subMap.size()); assertNotNull(qpm2.subMap["bc"]); QueryPhraseMap qpm3 = qpm2.subMap["bc"]; assertTrue(qpm3.terminal); assertEquals(1F, qpm3.boost, 0); // "ab bc cd" assertEquals(1, qpm3.subMap.size()); assertNotNull(qpm3.subMap["cd"]); QueryPhraseMap qpm4 = qpm3.subMap["cd"]; assertTrue(qpm4.terminal); assertEquals(1F, qpm4.boost, 0); // "bc cd" assertNotNull(qpm.subMap["bc"]); qpm2 = qpm.subMap["bc"]; assertFalse(qpm2.terminal); assertEquals(1, qpm2.subMap.size()); assertNotNull(qpm2.subMap["cd"]); qpm3 = qpm2.subMap["cd"]; assertTrue(qpm3.terminal); assertEquals(1F, qpm3.boost, 0); // phraseHighlight = false, fieldMatch = true fq = new FieldQuery(query, false, true); map = fq.rootMaps; assertEquals(1, map.size()); assertFalse(map.TryGetValue(null, out _)); // assertNull(map[null]); assertNotNull(map[F]); qpm = map[F]; assertEquals(3, qpm.subMap.size()); // "ab bc" assertNotNull(qpm.subMap["ab"]); qpm2 = qpm.subMap["ab"]; assertTrue(qpm2.terminal); assertEquals(1F, qpm2.boost, 0); assertEquals(1, qpm2.subMap.size()); assertNotNull(qpm2.subMap["bc"]); qpm3 = qpm2.subMap["bc"]; assertTrue(qpm3.terminal); assertEquals(1F, qpm3.boost, 0); // "ab bc cd" assertEquals(1, qpm3.subMap.size()); assertNotNull(qpm3.subMap["cd"]); qpm4 = qpm3.subMap["cd"]; assertTrue(qpm4.terminal); assertEquals(1F, qpm4.boost, 0); // "bc cd" assertNotNull(qpm.subMap["bc"]); qpm2 = qpm.subMap["bc"]; assertTrue(qpm2.terminal); assertEquals(1F, qpm2.boost, 0); assertEquals(1, qpm2.subMap.size()); assertNotNull(qpm2.subMap["cd"]); qpm3 = qpm2.subMap["cd"]; assertTrue(qpm3.terminal); assertEquals(1F, qpm3.boost, 0); // "cd" assertNotNull(qpm.subMap["cd"]); qpm2 = qpm.subMap["cd"]; assertTrue(qpm2.terminal); assertEquals(1F, qpm2.boost, 0); assertEquals(0, qpm2.subMap.size()); }
public void TestQueryPhraseMapOverlapPhrases() { // LUCENENET specific - altered some of the tests because // dictionaries throw KeyNotFoundException rather than returning null. BooleanQuery query = new BooleanQuery(); query.Add(pqF("a", "b", "c"), Occur.SHOULD); query.Add(pqF(2, "b", "c", "d"), Occur.SHOULD); query.Add(pqF(3, "b", "d"), Occur.SHOULD); // phraseHighlight = true, fieldMatch = true FieldQuery fq = new FieldQuery(query, true, true); IDictionary <String, QueryPhraseMap> map = fq.rootMaps; assertEquals(1, map.size()); assertFalse(map.TryGetValue(null, out _)); // assertNull(map[null]); assertNotNull(map[F]); QueryPhraseMap qpm = map[F]; assertEquals(2, qpm.subMap.size()); // "a b c" assertNotNull(qpm.subMap["a"]); QueryPhraseMap qpm2 = qpm.subMap["a"]; assertFalse(qpm2.terminal); assertEquals(1, qpm2.subMap.size()); assertNotNull(qpm2.subMap["b"]); QueryPhraseMap qpm3 = qpm2.subMap["b"]; assertFalse(qpm3.terminal); assertEquals(1, qpm3.subMap.size()); assertNotNull(qpm3.subMap["c"]); QueryPhraseMap qpm4 = qpm3.subMap["c"]; assertTrue(qpm4.terminal); assertEquals(1F, qpm4.boost, 0); assertNotNull(qpm4.subMap["d"]); QueryPhraseMap qpm5 = qpm4.subMap["d"]; assertTrue(qpm5.terminal); assertEquals(1F, qpm5.boost, 0); // "b c d"^2, "b d"^3 assertNotNull(qpm.subMap["b"]); qpm2 = qpm.subMap["b"]; assertFalse(qpm2.terminal); assertEquals(2, qpm2.subMap.size()); assertNotNull(qpm2.subMap["c"]); qpm3 = qpm2.subMap["c"]; assertFalse(qpm3.terminal); assertEquals(1, qpm3.subMap.size()); assertNotNull(qpm3.subMap["d"]); qpm4 = qpm3.subMap["d"]; assertTrue(qpm4.terminal); assertEquals(2F, qpm4.boost, 0); assertNotNull(qpm2.subMap["d"]); qpm3 = qpm2.subMap["d"]; assertTrue(qpm3.terminal); assertEquals(3F, qpm3.boost, 0); }
public Document GetThumbDocFromPath(string indexPath) { var query = new BooleanQuery(); query.add(new TermQuery(new Term(FieldName.SignatureIndexPath, indexPath)), BooleanClause.Occur.MUST); using (var wrapper = thumbSearchManager.Wrapper()) { var results = wrapper.IndexSearcher.search(query, 1); return results.totalHits > 0 ? wrapper.IndexSearcher.doc(results.scoreDocs[0].doc) : null; } }
/// <summary> Parses a query, searching on the fields specified. Use this if you need /// to specify certain fields as required, and others as prohibited. /// <p/> /// Usage: /// <code> /// String[] query = {"query1", "query2", "query3"}; /// String[] fields = {"filename", "contents", "description"}; /// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, /// BooleanClause.Occur.MUST, /// BooleanClause.Occur.MUST_NOT}; /// MultiFieldQueryParser.parse(query, fields, flags, analyzer); /// </code> /// <p/> /// The code above would construct a query: /// /// <code> /// (filename:query1) +(contents:query2) -(description:query3) /// </code> /// /// </summary> /// <param name="matchVersion">Lucene version to match; this is passed through to /// QueryParser. /// </param> /// <param name="queries">Queries string to parse /// </param> /// <param name="fields">Fields to search on /// </param> /// <param name="flags">Flags describing the fields /// </param> /// <param name="analyzer">Analyzer to use /// </param> /// <throws> ParseException </throws> /// <summary> if query parsing fails /// </summary> /// <throws> IllegalArgumentException </throws> /// <summary> if the length of the queries, fields, and flags array differ /// </summary> public static Query Parse(Version matchVersion, System.String[] queries, System.String[] fields, Occur[] flags, Analyzer analyzer) { if (!(queries.Length == fields.Length && queries.Length == flags.Length)) throw new System.ArgumentException("queries, fields, and flags array have have different length"); BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.Length; i++) { QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); Query q = qp.Parse(queries[i]); if (q != null && (!(q is BooleanQuery) || ((BooleanQuery)q).GetClauses().Length > 0)) { bQuery.Add(q, flags[i]); } } return bQuery; }
private static Query ParseQuery(QueryTerm queryTerm, SearchFilter searchFilter) { var combineQuery = new BooleanQuery(); #region 关键字查询构建 if (!String.IsNullOrWhiteSpace(searchFilter.SearchTerm)) { var fields = new[] { OutDoorIndexFields.Title, OutDoorIndexFields.Description, OutDoorIndexFields.AreaCate, OutDoorIndexFields.IndustryCate, OutDoorIndexFields.CrowdCate, OutDoorIndexFields.PurposeCate, OutDoorIndexFields.MediaCateName, OutDoorIndexFields.CityCateName, OutDoorIndexFields.FormatName, OutDoorIndexFields.PeriodName, OutDoorIndexFields.OwnerName }; var analyzer = new PanGuAnalyzer(); //var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields, analyzer); var query = queryParser.Parse(searchFilter.SearchTerm); //conjuction 一起选择 var conjuctionQuery = new BooleanQuery(); conjuctionQuery.Boost = 2.0f; //disjunction 分离 var disjunctionQuery = new BooleanQuery(); disjunctionQuery.Boost = 0.1f; //wildCard 通配符 var wildCardQuery = new BooleanQuery(); wildCardQuery.Boost = 0.5f; var escapedSearchTerm = Escape(searchFilter.SearchTerm); var exactIdQuery = new TermQuery(new Term(OutDoorIndexFields.Title, escapedSearchTerm)); exactIdQuery.Boost = 2.5f; var wildCardIdQuery = new WildcardQuery(new Term(OutDoorIndexFields.Title, "*" + escapedSearchTerm + "*")); foreach (var term in GetSearchTerms(searchFilter.SearchTerm)) { var termQuery = queryParser.Parse(term); conjuctionQuery.Add(termQuery, Occur.MUST); disjunctionQuery.Add(termQuery, Occur.SHOULD); foreach (var field in fields) { var wildCardTermQuery = new WildcardQuery(new Term(field, term + "*")); wildCardTermQuery.Boost = 0.7f; wildCardQuery.Add(wildCardTermQuery, Occur.SHOULD); } } //关键查询 var keywordsQuery = conjuctionQuery.Combine(new Query[] { exactIdQuery, wildCardIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }); combineQuery.Add(keywordsQuery, Occur.MUST); } #endregion #region 指定媒体ID查询 if (queryTerm.MediaID != 0) { var mediaIdQuery = new TermQuery(new Term(OutDoorIndexFields.ID, queryTerm.MediaID.ToString())); combineQuery.Add(mediaIdQuery, Occur.MUST); } #endregion #region 用户状态 var memberStatusQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.MemberStatus, (int)MemberStatus.CompanyAuth, 99, true, true); combineQuery.Add(memberStatusQuery, Occur.MUST); #endregion #region 审核状态查询构建 var verifyStatus = NumericRangeQuery.NewIntRange(OutDoorIndexFields.Status, (int)OutDoorStatus.ShowOnline, 99, true, true); combineQuery.Add(verifyStatus, Occur.MUST); #endregion #region 指定用户ID查询 if (queryTerm.MemberID != 0) { var memberIdQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.MemberID, queryTerm.MemberID, queryTerm.MemberID, true, true); combineQuery.Add(memberIdQuery, Occur.MUST); } #endregion #region 城市查询 if (queryTerm.City != 0) { var cityQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.CityCateCode, queryTerm.CityCateCode, queryTerm.CityMaxCode, true, true); combineQuery.Add(cityQuery, Occur.MUST); } #endregion #region 认证状态 if (queryTerm.AuthStatus != 0) { var authStatusQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.AuthStatus, queryTerm.AuthStatus, queryTerm.AuthStatus, true, true); combineQuery.Add(authStatusQuery, Occur.MUST); } #endregion #region 经纬度搜索 if (queryTerm.MinX != 0) { var latQuery = NumericRangeQuery.NewDoubleRange(OutDoorIndexFields.Lat, queryTerm.MinX, queryTerm.MaxX, true, true); var lngQuery = NumericRangeQuery.NewDoubleRange(OutDoorIndexFields.Lng, queryTerm.MinY, queryTerm.MaxY, true, true); combineQuery.Add(latQuery, Occur.MUST); combineQuery.Add(lngQuery, Occur.MUST); } #endregion #region 媒体类别查询 if (queryTerm.MediaCode != 0) { var mediaCodeQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.MediaCateCode, queryTerm.MediaCateCode, queryTerm.MediaMaxCode, true, true); combineQuery.Add(mediaCodeQuery, Occur.MUST); } #endregion #region 媒体表现形式查询 if (queryTerm.FormatCode != 0) { var FormatCodeCodeQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.FormatCode, queryTerm.FormatCode, queryTerm.FormatCode, true, true); combineQuery.Add(FormatCodeCodeQuery, Occur.MUST); } #endregion #region 媒体所有权查询 //if (queryTerm.OwnerCode != 0) //{ // var OwnerCodeCodeQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.OwnerCode, // queryTerm.OwnerCode, queryTerm.OwnerCode, true, true); // combineQuery.Add(OwnerCodeCodeQuery, Occur.MUST); //} #endregion #region 媒体购买周期查询 if (queryTerm.PeriodCode != 0) { var PeriodCodeCodeQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.PeriodCode, queryTerm.PeriodCode, queryTerm.PeriodCode, true, true); combineQuery.Add(PeriodCodeCodeQuery, Occur.MUST); } #endregion #region 媒体价格区间查询 if (queryTerm.Price != 0) { var rangeValue = EnumHelper.GetPriceValue(queryTerm.Price); var PriceQuery = NumericRangeQuery.NewDoubleRange(OutDoorIndexFields.Price, Convert.ToDouble(rangeValue.Min), Convert.ToDouble(rangeValue.Max), true, true); combineQuery.Add(PriceQuery, Occur.MUST); } #endregion #region 媒体档期查询 if (queryTerm.DeadLine != 0) { var minValue = (DateTime.Now.AddYears(-10)).Ticks; var maxValue = (new DateTime(DateTime.Now.Year, queryTerm.DeadLine, 1)).Ticks; var DeadLineQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.DeadLine, Convert.ToInt64(minValue), Convert.ToInt64(maxValue), true, true); combineQuery.Add(DeadLineQuery, Occur.MUST); } #endregion #region 媒体档期时间查询 if (!string.IsNullOrEmpty(queryTerm.Dq)) { var minValue = (DateTime.Now.AddYears(-10)).Ticks; var maxValue = Convert.ToDateTime(queryTerm.Dq).Ticks; var DqQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.DeadLine, Convert.ToInt64(minValue), Convert.ToInt64(maxValue), true, true); combineQuery.Add(DqQuery, Occur.MUST); } #endregion return(combineQuery); }
private bool Load(IndexSearcher searcher) { var query = new BooleanQuery(); query.add(new TermQuery(new Term(PreferencesDocumentField, PreferencesName)), BooleanClause.Occur.MUST); nextFolderId = 1; var results = searcher.search(query, 1); if (results.totalHits > 0) { var doc = searcher.doc(results.scoreDocs[0].doc); if (doc.get(RegisteredFoldersField) != null) { var prefs = JsonConvert.DeserializeObject<FolderPreferences>(doc.get(RegisteredFoldersField)); nextFolderId = prefs.NextFolderId; foreach (var fm in prefs.FolderMapping) registeredFolders.Add(fm.Id, fm.Path); return true; } } return false; }
public List <LinkItem> Search(GenerateSchemeViewModel model, out int totalHits) { if (!Directory.Exists(LuceneCommon.IndexOutDoorDirectory)) { totalHits = 0; return(new List <LinkItem>()); } var combineQuery = new BooleanQuery(); SortField sortField = GetSortField(model.generateType); #region 用户状态 var memberStatusQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.MemberStatus, (int)MemberStatus.CompanyAuth, 99, true, true); combineQuery.Add(memberStatusQuery, Occur.MUST); #endregion #region 审核状态查询构建 var verifyStatus = NumericRangeQuery.NewIntRange(OutDoorIndexFields.Status, (int)OutDoorStatus.ShowOnline, 99, true, true); combineQuery.Add(verifyStatus, Occur.MUST); #endregion #region 媒体类别查询 if (!string.IsNullOrEmpty(model.mediaCode)) { var mediaCodes = model.mediaCode.Split(',').Select(x => Convert.ToInt32(x)); var mediaCodeCombineQuery = new BooleanQuery(); foreach (var code in mediaCodes) { var maxCode = Utilities.GetMaxCode(code); var mediaCodeQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.MediaCateCode, code, maxCode, true, true); mediaCodeCombineQuery.Add(mediaCodeQuery, Occur.SHOULD); } combineQuery.Add(mediaCodeCombineQuery, Occur.MUST); } #endregion #region 媒体档期查询 if (!string.IsNullOrEmpty(model.dq)) { var minValue = (DateTime.Now.AddYears(-10)).Ticks; var maxValue = Convert.ToDateTime(model.dq).Ticks; var DeadLineQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.DeadLine, Convert.ToInt64(minValue), Convert.ToInt64(maxValue), true, true); combineQuery.Add(DeadLineQuery, Occur.MUST); } #endregion #region 地区查询 if (!string.IsNullOrEmpty(model.cityCode)) { var cityCodes = model.cityCode.Split(',').Select(x => Convert.ToInt32(x)); var cityCodeCombineQuery = new BooleanQuery(); foreach (var code in cityCodes) { var maxCode = Utilities.GetMaxCode(code); var cityCodeQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.CityCateCode, code, maxCode, true, true); cityCodeCombineQuery.Add(cityCodeQuery, Occur.SHOULD); } combineQuery.Add(cityCodeCombineQuery, Occur.MUST); } #endregion #region 关键字查询 if (!string.IsNullOrEmpty(model.formatCate) || !string.IsNullOrEmpty(model.crowdCate) || !string.IsNullOrEmpty(model.industryCate) || !string.IsNullOrEmpty(model.purposeCate)) { var fields = new[] { OutDoorIndexFields.IndustryCate, OutDoorIndexFields.CrowdCate, OutDoorIndexFields.PurposeCate, OutDoorIndexFields.FormatName }; var keywords = (string.IsNullOrEmpty(model.formatCate) ? string.Empty : model.formatCate + ",") + (string.IsNullOrEmpty(model.crowdCate) ? string.Empty : model.crowdCate + ",") + (string.IsNullOrEmpty(model.industryCate) ? string.Empty : model.industryCate + ",") + (string.IsNullOrEmpty(model.purposeCate) ? string.Empty : model.purposeCate); var analyzer = new PanGuAnalyzer(); //var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields, analyzer); //conjuction 一起选择 var conjuctionQuery = new BooleanQuery(); conjuctionQuery.Boost = 2.0f; //disjunction 分离 var disjunctionQuery = new BooleanQuery(); disjunctionQuery.Boost = 0.1f; //wildCard 通配符 var wildCardQuery = new BooleanQuery(); wildCardQuery.Boost = 0.5f; var escapedSearchTerm = Escape(keywords); foreach (var term in GetSearchTerms(keywords)) { var termQuery = queryParser.Parse(term); conjuctionQuery.Add(termQuery, Occur.MUST); disjunctionQuery.Add(termQuery, Occur.SHOULD); foreach (var field in fields) { var wildCardTermQuery = new WildcardQuery(new Term(field, term + "*")); wildCardTermQuery.Boost = 0.7f; wildCardQuery.Add(wildCardTermQuery, Occur.SHOULD); } } //关键查询 var keywordsQuery = conjuctionQuery.Combine(new Query[] { conjuctionQuery, disjunctionQuery, wildCardQuery }); combineQuery.Add(keywordsQuery, Occur.MUST); } #endregion #region 媒体价格区间查询 if (model.priceCate != 0) { var rangeValue = EnumHelper.GetPriceValue(model.priceCate); if (rangeValue.Max > 99999) { rangeValue.Max = 1000; } var PriceQuery = NumericRangeQuery.NewDoubleRange(OutDoorIndexFields.Price, 0, Convert.ToDouble(rangeValue.Max), true, true); combineQuery.Add(PriceQuery, Occur.MUST); } #endregion using (var directory = new SimpleFSDirectory(new DirectoryInfo(LuceneCommon.IndexOutDoorDirectory))) { var searcher = new IndexSearcher(directory, readOnly: true); var results = searcher.Search(combineQuery, filter: null, n: 30, sort: new Sort(sortField)); var keys = results.ScoreDocs.Skip(0) .Select(c => GetMediaItem(searcher.Doc(c.Doc))) .ToList(); totalHits = results.TotalHits; searcher.Dispose(); return(keys); } }
public override Query Rewrite(IndexReader r) { if (!query.Clauses.Any()) { return new MatchAllDocsQuery(); } IList<Filter> filters = new List<Filter>(); IList<Query> queries = new List<Query>(); IList<BooleanClause> clauses = query.Clauses; Query baseQuery; int startIndex; if (drillDownDims.Count == query.Clauses.Count()) { baseQuery = new MatchAllDocsQuery(); startIndex = 0; } else { baseQuery = clauses[0].Query; startIndex = 1; } for (int i = startIndex; i < clauses.Count; i++) { BooleanClause clause = clauses[i]; Query queryClause = clause.Query; Filter filter = GetFilter(queryClause); if (filter != null) { filters.Add(filter); } else { queries.Add(queryClause); } } if (filters.Count == 0) { return query; } else { // Wrap all filters using FilteredQuery // TODO: this is hackish; we need to do it because // BooleanQuery can't be trusted to handle the // "expensive filter" case. Really, each Filter should // know its cost and we should take that more // carefully into account when picking the right // strategy/optimization: Query wrapped; if (queries.Count == 0) { wrapped = baseQuery; } else { // disable coord BooleanQuery wrappedBQ = new BooleanQuery(true); if ((baseQuery is MatchAllDocsQuery) == false) { wrappedBQ.Add(baseQuery, Occur.MUST); } foreach (Query q in queries) { wrappedBQ.Add(q, Occur.MUST); } wrapped = wrappedBQ; } foreach (Filter filter in filters) { wrapped = new FilteredQuery(wrapped, filter, FilteredQuery.QUERY_FIRST_FILTER_STRATEGY); } return wrapped; } }
public Query CreateQuery(ILuceneQueryService builder, LuceneQueryContext context, string type, JObject query) { if (type != "bool") { return(null); } var boolQuery = new BooleanQuery(); foreach (var property in query.Properties()) { var occur = Occur.MUST; switch (property.Name.ToLowerInvariant()) { case "must": occur = Occur.MUST; break; case "mustnot": case "must_not": occur = Occur.MUST_NOT; break; case "should": occur = Occur.SHOULD; break; case "boost": boolQuery.Boost = query.Value <float>(); break; case "minimum_should_match": boolQuery.MinimumNumberShouldMatch = query.Value <int>(); break; default: throw new ArgumentException($"Invalid property '{property.Name}' in boolean query"); } switch (property.Value.Type) { case JTokenType.Object: break; case JTokenType.Array: foreach (var item in ((JArray)property.Value)) { if (item.Type != JTokenType.Object) { throw new ArgumentException($"Invalid value in boolean query"); } boolQuery.Add(builder.CreateQueryFragment(context, (JObject)item), occur); } break; default: throw new ArgumentException($"Invalid value in boolean query"); } } return(boolQuery); }
public virtual void TestNoPayload() { PayloadTermQuery q1 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"), new MaxPayloadFunction()); PayloadTermQuery q2 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"), new MaxPayloadFunction()); BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST); BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT); BooleanQuery query = new BooleanQuery(); query.Add(c1); query.Add(c2); TopDocs hits = Searcher.Search(query, null, 100); Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); Assert.IsTrue(hits.TotalHits == 1, "hits Size: " + hits.TotalHits + " is not: " + 1); int[] results = new int[1]; results[0] = 0; //hits.ScoreDocs[0].Doc; CheckHits.CheckHitCollector(Random(), query, PayloadHelper.NO_PAYLOAD_FIELD, Searcher, results); }
public void TestInsideBooleanQuery() { const string idField = "id"; const string toField = "productId"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMergePolicy(NewLogMergePolicy())); // 0 Document doc = new Document(); doc.Add(new TextField("description", "random text", Field.Store.NO)); doc.Add(new TextField("name", "name1", Field.Store.NO)); doc.Add(new TextField(idField, "7", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "2", Field.Store.NO)); doc.Add(new TextField(toField, "7", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "3", Field.Store.NO)); doc.Add(new TextField(toField, "7", Field.Store.NO)); w.AddDocument(doc); // 3 doc = new Document(); doc.Add(new TextField("description", "more random text", Field.Store.NO)); doc.Add(new TextField("name", "name2", Field.Store.NO)); doc.Add(new TextField(idField, "0", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // 4 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "5", Field.Store.NO)); doc.Add(new TextField(toField, "0", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "6", Field.Store.NO)); doc.Add(new TextField(toField, "0", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); IndexSearcher indexSearcher = new IndexSearcher(w.GetReader()); w.Dispose(); // Search for product Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg); BooleanQuery bq = new BooleanQuery(); bq.Add(joinQuery, Occur.SHOULD); bq.Add(new TermQuery(new Term("id", "3")), Occur.SHOULD); indexSearcher.Search(bq, new CollectorAnonymousClass()); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestOmitTFAndNorms() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; ft.OmitNorms = true; ft.Freeze(); Field f = NewField("foo", "bar", ft); doc.Add(f); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; BooleanQuery query = new BooleanQuery(true); query.Add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); Assert.AreEqual(1, @is.Search(query, 10).TotalHits); } ir.Dispose(); dir.Dispose(); }
public override LuceneNet.Search.Query CalculateLuceneQuery(ParametrizedSearchQuery query, ref IndexObjectType searchType) { BooleanQuery.MaxClauseCount = 50000; BooleanQuery mainQuery = new BooleanQuery(); if ((query.SearchIn & IndexObjectType.DocumentIndex) == IndexObjectType.DocumentIndex && m_useDocumentSimilarity) { if (m_useDocumentTokens) { BooleanQuery documentTokens = new BooleanQuery(); foreach (var term in query.DocumentTokens) { if (!string.IsNullOrWhiteSpace(term)) { var docQuery = GetWeightedTermQuery("DocumentToken", term, 1); //Add topic to compare if (docQuery != null) { documentTokens.Add(docQuery, LuceneNet.Search.Occur.SHOULD); } } } documentTokens.MinimumNumberShouldMatch = (int)(Math.Max(documentTokens.Clauses.Count * m_documentTokenLimit, 1)); documentTokens.Boost = (float)m_documentSingleTermWeight; if (documentTokens.Clauses.Count > 0) { mainQuery.Add(documentTokens, Occur.MUST); } } if (m_useDocumentTopics) { BooleanQuery documentTopics = new BooleanQuery(); foreach (var term in query.DocumentTopics) { if (!string.IsNullOrWhiteSpace(term)) { var docQuery = GetWeightedTermQuery("DocumentTopic", term, 1); //Add topic to compare if (docQuery != null) { documentTopics.Add(docQuery, LuceneNet.Search.Occur.SHOULD); } } } documentTopics.MinimumNumberShouldMatch = (int)(Math.Max(documentTopics.Clauses.Count * m_documentTopicLimit, 1)); documentTopics.Boost = (float)m_documentTopicWeight; if (documentTopics.Clauses.Count > 0) { mainQuery.Add(documentTopics, Occur.MUST); } } } if ((query.SearchIn & IndexObjectType.SectionIndex) == IndexObjectType.SectionIndex && m_useSectionSimilarity) { if (searchType < IndexObjectType.SectionIndex) { searchType = IndexObjectType.SectionIndex; } if (m_useSectionTokens) { BooleanQuery pSectionTokens = new BooleanQuery(); foreach (var term in query.SectionTokens) { if (!string.IsNullOrWhiteSpace(term)) { var docQuery = GetWeightedTermQuery("SectionToken", term, 1); //Add topic to compare if (docQuery != null) { pSectionTokens.Add(docQuery, LuceneNet.Search.Occur.SHOULD); } } } pSectionTokens.MinimumNumberShouldMatch = (int)(Math.Max(pSectionTokens.Clauses.Count * m_sectionTokenLimit, 1)); pSectionTokens.Boost = (float)m_sectionSingleTermWeight; if (pSectionTokens.Clauses.Count > 0) { mainQuery.Add(pSectionTokens, Occur.MUST); } } if (m_useSectionTopics) { BooleanQuery pSectionTopic = new BooleanQuery(); foreach (var term in query.SectionTopics) { if (!string.IsNullOrWhiteSpace(term)) { var docQuery = GetWeightedTermQuery("SectionTopic", term, 1); //Add topic to compare if (docQuery != null) { pSectionTopic.Add(docQuery, LuceneNet.Search.Occur.SHOULD); } } } pSectionTopic.MinimumNumberShouldMatch = (int)(Math.Max(pSectionTopic.Clauses.Count * m_sectionTopicLimit, 1)); pSectionTopic.Boost = (float)m_sectionTopicWeight; if (pSectionTopic.Clauses.Count > 0) { mainQuery.Add(pSectionTopic, Occur.MUST); } } } if ((query.SearchIn & IndexObjectType.ParagraphIndex) == IndexObjectType.ParagraphIndex) { if (searchType < IndexObjectType.ParagraphIndex) { searchType = IndexObjectType.ParagraphIndex; } if (m_useParagraphTokens) { BooleanQuery pTokenQuery = new BooleanQuery(); foreach (var term in query.ParagraphTokens) { if (!string.IsNullOrWhiteSpace(term)) { pTokenQuery.Add(GetWeightedTermQuery("ParagraphToken", term, 1), Occur.SHOULD); } } pTokenQuery.MinimumNumberShouldMatch = (int)(Math.Max(pTokenQuery.Clauses.Count * m_paragraphTokenLimit, 1)); pTokenQuery.Boost = (float)m_paragraphSingleTermWeight; mainQuery.Add(pTokenQuery, Occur.MUST); } if (m_useParagraphTopics) { BooleanQuery pParagraphTopic = new BooleanQuery(); foreach (var term in query.ParagraphTopics) { if (!string.IsNullOrWhiteSpace(term)) { var docQuery = GetWeightedTermQuery("ParagraphTopic", term, 1); //Add topic to compare if (docQuery != null) { pParagraphTopic.Add(docQuery, LuceneNet.Search.Occur.SHOULD); } } } pParagraphTopic.MinimumNumberShouldMatch = (int)(Math.Max(pParagraphTopic.Clauses.Count * m_paragraphTopicLimit, 1)); pParagraphTopic.Boost = (float)m_paragraphTopicWeight; mainQuery.Add(pParagraphTopic, Occur.MUST); } } if ((query.SearchIn & IndexObjectType.SentenceIndex) == IndexObjectType.SentenceIndex) { if (searchType < IndexObjectType.SentenceIndex) { searchType = IndexObjectType.SentenceIndex; } if (m_useSentenceTokens) { //If we are searching within the sentence index, use the uncosolidated IATE and EuroVoc tokens & consolidated Sentence Topics BooleanQuery pIateTokenQuery = new BooleanQuery(); BooleanQuery pEVTokenQuery = new BooleanQuery(); foreach (var term in query.SentenceTokens) { if (!string.IsNullOrWhiteSpace(term)) { if (term.StartsWith("IATE")) { pIateTokenQuery.Add(GetWeightedTermQuery("ContainedTokenIATE", term, 1), Occur.SHOULD); } else { pEVTokenQuery.Add(GetWeightedTermQuery("ContainedTokenEV", term, 1), Occur.SHOULD); } } } BooleanQuery pSentenceTokenQuery = new BooleanQuery(); if (pIateTokenQuery.Clauses.Count > 0) { pIateTokenQuery.MinimumNumberShouldMatch = (int)(Math.Max(pIateTokenQuery.Clauses.Count * m_sentenceIATELimit, 0)); pIateTokenQuery.Boost = (float)m_sentenceIATEWeight; pSentenceTokenQuery.Add(pIateTokenQuery, Occur.SHOULD); } if (pEVTokenQuery.Clauses.Count > 0) { pEVTokenQuery.MinimumNumberShouldMatch = (int)(Math.Max(pEVTokenQuery.Clauses.Count * m_sentenceEVLimit, 0)); pEVTokenQuery.Boost = (float)m_sentenceEuroVocWeight; pSentenceTokenQuery.Add(pEVTokenQuery, Occur.SHOULD); } if (pSentenceTokenQuery.Clauses.Count > 0) { mainQuery.Add(pSentenceTokenQuery, Occur.MUST); } } if (m_useSentenceTopics) { BooleanQuery pSentenceTopics = new BooleanQuery(); foreach (var term in query.SentenceTopics) { var senTopic = GetWeightedTermQuery("SentenceTopic", term, 1); if (senTopic != null) { pSentenceTopics.Add(senTopic, LuceneNet.Search.Occur.SHOULD); } } if (pSentenceTopics.Clauses.Count > 0) { pSentenceTopics.MinimumNumberShouldMatch = (int)(Math.Max(pSentenceTopics.Clauses.Count * m_sentenceTopicLimit, 1)); pSentenceTopics.Boost = (float)m_sentenceTopicWeight; mainQuery.Add(pSentenceTopics, Occur.MUST); } } } return(mainQuery); }
/// <summary> /// Used by <see cref="Clone"/> /// </summary> internal DrillDownQuery(FacetsConfig config, BooleanQuery query, IDictionary <string, int?> drillDownDims) { this.query = (BooleanQuery)query.Clone(); this.drillDownDims.PutAll(drillDownDims); this.config = config; }
/// <summary> /// Suggest similar words (optionally restricted to a field of an index). /// <para> /// As the Lucene similarity that is used to fetch the most relevant n-grammed terms /// is not the same as the edit distance strategy used to calculate the best /// matching spell-checked word from the hits that Lucene found, one usually has /// to retrieve a couple of numSug's in order to get the true best match. /// </para> /// <para> /// I.e. if numSug == 1, don't count on that suggestion being the best one. /// Thus, you should set this value to <b>at least</b> 5 for a good suggestion. /// </para> /// </summary> /// <param name="word"> the word you want a spell check done on </param> /// <param name="numSug"> the number of suggested words </param> /// <param name="ir"> the indexReader of the user index (can be null see field param) </param> /// <param name="field"> the field of the user index: if field is not null, the suggested /// words are restricted to the words present in this field. </param> /// <param name="suggestMode"> /// (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS) </param> /// <param name="accuracy"> The minimum score a suggestion must have in order to qualify for inclusion in the results </param> /// <exception cref="System.IO.IOException"> if the underlying index throws an <see cref="System.IO.IOException"/> </exception> /// <exception cref="ObjectDisposedException"> if the <see cref="SpellChecker"/> is already disposed </exception> /// <returns> string[] the sorted list of the suggest words with these 2 criteria: /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity /// of the suggest words in the field of the user index /// </returns> public virtual string[] SuggestSimilar(string word, int numSug, IndexReader ir, string field, SuggestMode suggestMode, float accuracy) { // obtainSearcher calls ensureOpen IndexSearcher indexSearcher = ObtainSearcher(); try { if (ir == null || field == null) { suggestMode = SuggestMode.SUGGEST_ALWAYS; } if (suggestMode == SuggestMode.SUGGEST_ALWAYS) { ir = null; field = null; } int lengthWord = word.Length; int freq = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0; int goalFreq = suggestMode == SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0; // if the word exists in the real index and we don't care for word frequency, return the word itself if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0) { return(new string[] { word }); } BooleanQuery query = new BooleanQuery(); string[] grams; string key; for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++) { key = "gram" + ng; // form key grams = FormGrams(word, ng); // form word into ngrams (allow dups too) if (grams.Length == 0) { continue; // hmm } if (bStart > 0) // should we boost prefixes? { Add(query, "start" + ng, grams[0], bStart); // matches start of word } if (bEnd > 0) // should we boost suffixes { Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word } for (int i = 0; i < grams.Length; i++) { Add(query, key, grams[i]); } } int maxHits = 10 * numSug; // System.out.println("Q: " + query); ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs; // System.out.println("HITS: " + hits.length()); SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparer); // go thru more than 'maxr' matches in case the distance filter triggers int stop = Math.Min(hits.Length, maxHits); SuggestWord sugWord = new SuggestWord(); for (int i = 0; i < stop; i++) { sugWord.String = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word // don't suggest a word for itself, that would be silly if (sugWord.String.Equals(word, StringComparison.Ordinal)) { continue; } // edit distance sugWord.Score = sd.GetDistance(word, sugWord.String); if (sugWord.Score < accuracy) { continue; } if (ir != null && field != null) // use the user index { sugWord.Freq = ir.DocFreq(new Term(field, sugWord.String)); // freq in the index // don't suggest a word that is not present in the field if ((suggestMode == SuggestMode.SUGGEST_MORE_POPULAR && goalFreq > sugWord.Freq) || sugWord.Freq < 1) { continue; } } sugQueue.InsertWithOverflow(sugWord); if (sugQueue.Count == numSug) { // if queue full, maintain the minScore score accuracy = sugQueue.Top.Score; } sugWord = new SuggestWord(); } // convert to array string string[] list = new string[sugQueue.Count]; for (int i = sugQueue.Count - 1; i >= 0; i--) { list[i] = sugQueue.Pop().String; } return(list); } finally { ReleaseSearcher(indexSearcher); } }
/// <summary> /// Initializes a new instance of the <see cref="QueryBuilder"/> class. /// </summary> /// <param name="disableCoord">if set to <c>true</c> [disable coordinate scoring].</param> public QueryBuilder(bool disableCoord) { this.disableCoord = disableCoord; this.luceneQuery = new BooleanQuery(disableCoord); }
/// <summary> /// Merges the specified builder with this one. /// </summary> /// <param name="builder">The QueryBuilders to merge into this one.</param> public void Merge(QueryBuilder builder) { if (builder == null) throw new ArgumentNullException("builder", "builder cannot be null"); if (builder.TotalClauses == 0) return; IncrementTotalClauses(builder.totalClauses); this.luceneQuery = (BooleanQuery)this.luceneQuery.Combine(new Query[] { this.luceneQuery, builder.luceneQuery }); }
/// <summary> /// Add a clause to a boolean query. /// </summary> private static void Add(BooleanQuery q, string name, string value) { q.Add(new BooleanClause(new TermQuery(new Term(name, value)), Occur.SHOULD)); }
private void lucene_serach_Click(object sender, EventArgs e) { results.Rows.Clear(); var query = search_field.Text.ToLower(); var array = query.Split(' ').ToList(); var searcher = new IndexSearcher(writer1.GetReader(applyAllDeletes: true)); var totalResults = new List <Document>(); //одно слово QueryParser parser = new QueryParser(AppLuceneVersion, "name", analyzer); var phrase = new MultiPhraseQuery(); foreach (var word in array) { var q = parser.Parse(query); if (!String.IsNullOrEmpty(word)) { var res = searcher.Search(q, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } } } // полное название phrase.Add(new Term("name", query)); var hits = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } //части слов foreach (var word in array) { if (!string.IsNullOrEmpty(word)) { var wild = new WildcardQuery(new Term("name", "*" + word + "*")); var res = searcher.Search(wild, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } } } //год и часть слова var year_to_find = ""; var number = 0; foreach (var word in array) { var result = TryParse(word, out number); if (result && number > 1800 && number <= 9999) { year_to_find = word; array.RemoveAt(array.IndexOf(word)); break; } } Console.WriteLine(number != 0); if (number != 0) { phrase = new MultiPhraseQuery(); foreach (var word in array) { if (!string.IsNullOrEmpty(word)) { var booleanQuery = new BooleanQuery(); var wild = new WildcardQuery(new Term("name", "*" + word + "*")); var num = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true); booleanQuery.Add(wild, Occur.SHOULD); booleanQuery.Add(num, Occur.SHOULD); var res = searcher.Search(booleanQuery, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } } } } foreach (var doc in totalResults) { results.Rows.Add(doc.GetField("id").GetInt32Value().ToString(), doc.GetValues("name")[0], doc.GetField("year").GetInt32Value().ToString()); } }
/// <summary> /// Clears all clauses from this instance /// </summary> public void Clear() { this.luceneQuery = new BooleanQuery(); this.totalClauses = 0; }
protected virtual Query BuildQuery(int maxDoc, TermContext[] contextArray, Term[] queryTerms) { var lowFreq = new BooleanQuery(m_disableCoord); var highFreq = new BooleanQuery(m_disableCoord) { Boost = m_highFreqBoost }; lowFreq.Boost = m_lowFreqBoost; var query = new BooleanQuery(true); for (int i = 0; i < queryTerms.Length; i++) { TermContext termContext = contextArray[i]; if (termContext == null) { lowFreq.Add(NewTermQuery(queryTerms[i], null), m_lowFreqOccur); } else { if ((m_maxTermFrequency >= 1f && termContext.DocFreq > m_maxTermFrequency) || (termContext.DocFreq > (int)Math.Ceiling(m_maxTermFrequency * (float)maxDoc))) { highFreq.Add(NewTermQuery(queryTerms[i], termContext), m_highFreqOccur); } else { lowFreq.Add(NewTermQuery(queryTerms[i], termContext), m_lowFreqOccur); } } } int numLowFreqClauses = lowFreq.GetClauses().Length; int numHighFreqClauses = highFreq.GetClauses().Length; if (m_lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) { int minMustMatch = CalcLowFreqMinimumNumberShouldMatch(numLowFreqClauses); lowFreq.MinimumNumberShouldMatch = minMustMatch; } if (m_highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) { int minMustMatch = CalcHighFreqMinimumNumberShouldMatch(numHighFreqClauses); highFreq.MinimumNumberShouldMatch = minMustMatch; } if (lowFreq.GetClauses().Length == 0) { /* * if lowFreq is empty we rewrite the high freq terms in a conjunction to * prevent slow queries. */ if (highFreq.MinimumNumberShouldMatch == 0 && m_highFreqOccur != Occur.MUST) { foreach (BooleanClause booleanClause in highFreq) { booleanClause.Occur = Occur.MUST; } } highFreq.Boost = Boost; return(highFreq); } else if (highFreq.GetClauses().Length == 0) { // only do low freq terms - we don't have high freq terms lowFreq.Boost = Boost; return(lowFreq); } else { query.Add(highFreq, Occur.SHOULD); query.Add(lowFreq, Occur.MUST); query.Boost = Boost; return(query); } }
/// <summary> /// Takes the manaually generated query and runs it through a Lucene analyzer /// </summary> /// <param name="analyzer">Analyzer to use when parsing this query</param> /// <param name="occurrence">Occurrence type of this query</param> internal void Analyze(Lucene29.Net.Analysis.Analyzer analyzer, ClauseOccurrence occurrence) { if (analyzer == null) throw new ArgumentNullException("analyzer", "Analyzer cannot be null"); try { AnalyzerType requestedType = TypeConverter.GetAnalyzerType(analyzer); if (cachedAnalyzer != requestedType) { lock (syncRoot) { if (cachedAnalyzer != requestedType) { cachedParser = new Lucene29.Net.QueryParsers.QueryParser(StaticValues.LibraryVersion, "Analyzer", analyzer); cachedAnalyzer = requestedType; cachedParser.SetAllowLeadingWildcard(this.allowLeadingWildcard); } } } Query query = cachedParser.Parse(this.luceneQuery.ToString()); this.luceneQuery = null; this.luceneQuery = new BooleanQuery(this.disableCoord); this.luceneQuery.Add(query, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence)); } catch (Exception ex) { throw new FormatException("There was an unexpected exception thrown during the analyzing process of the instance.", ex); } }
public Query GetRule() { BooleanQuery query = new BooleanQuery(); return(GetRule(query)); }
/// <summary> /// Adds a <see cref="IndexLibrary.SearchInfo"/> to this summary /// </summary> /// <param name="info">The info to add.</param> public void AddSearchInfo(SearchInfo info) { if (info == null) throw new ArgumentNullException("info", "info cannot be null"); if (info.IsEmpty) throw new ArgumentNullException("info", "info cannot be empty"); this.totalSearches += 1L; if ((info.TotalResultsFound > 0) && (this.features & SearchInfoSummaryFeature.TotalSearches) == SearchInfoSummaryFeature.TotalSearches) this.totalSearchesWithResults += 1L; if (info.Canceled && (this.features & SearchInfoSummaryFeature.CanceledSearches) == SearchInfoSummaryFeature.CanceledSearches) this.totalCanceledSearches += 1L; if ((this.features & SearchInfoSummaryFeature.TotalSearchesByMethod) == SearchInfoSummaryFeature.TotalSearchesByMethod) { Dictionary<SearchMethodType, int> dictionary; SearchMethodType type; (dictionary = this.totalSearchesFromEachMethod)[type = info.SearchMethodType] = dictionary[type] + 1; } if ((this.features & SearchInfoSummaryFeature.TotalSearchesByIndex) == SearchInfoSummaryFeature.TotalSearchesByIndex) { if (!this.totalSearchesFromEachIndex.ContainsKey(info.IndexName)) this.totalSearchesFromEachIndex.Add(info.IndexName, 0); this.totalSearchesFromEachIndex[info.IndexName]++; } if ((this.features & SearchInfoSummaryFeature.SearchTimeSpread) == SearchInfoSummaryFeature.SearchTimeSpread) { DateTime key = new DateTime(info.CreatedTime.Year, info.CreatedTime.Month, info.CreatedTime.Day, info.CreatedTime.Hour, info.CreatedTime.Minute, 0); if (!this.searchSpread.ContainsKey(key)) this.searchSpread.Add(key, 0); this.searchSpread[key]++; } bool flag = (this.features & SearchInfoSummaryFeature.UniqueQueries) == SearchInfoSummaryFeature.UniqueQueries; bool flag2 = (this.features & SearchInfoSummaryFeature.UniqueClauses) == SearchInfoSummaryFeature.UniqueClauses; if (!string.IsNullOrEmpty(info.Query) && (flag || flag2)) { try { BooleanQuery query = new BooleanQuery(); query.Add(this.parser.Parse(info.Query), BooleanClause.Occur.SHOULD); if (flag) { string str = query.ToString(); if (!this.totalUniqueSearchesAndTotalTimesUsed.ContainsKey(str)) this.totalUniqueSearchesAndTotalTimesUsed.Add(str, 0); this.totalUniqueSearchesAndTotalTimesUsed[str]++; str = null; } if (flag2) { System.Collections.Hashtable terms = new System.Collections.Hashtable(); query.ExtractTerms(terms); // its okay to fail, if its a term with something like Field:Value~0.5 we don't want the primitives, the list would be too large string value = null; foreach (var term in terms) { value = ((System.Collections.DictionaryEntry)term).Key.ToString(); if (!this.totalUniqueClausesAndTotalTimesUsed.ContainsKey(value)) this.totalUniqueClausesAndTotalTimesUsed.Add(value, 0); this.totalUniqueClausesAndTotalTimesUsed[value]++; } } } catch { } } }
public override Query Rewrite(IndexReader reader) { if (rewrittenQuery != null) { return(rewrittenQuery); } //load up the list of possible terms foreach (var f in fieldVals) { AddTerms(reader, f); } //clear the list of fields fieldVals.Clear(); BooleanQuery bq = new BooleanQuery(); //create BooleanQueries to hold the variants for each token/field pair and ensure it // has no coord factor //Step 1: sort the termqueries by term/field IDictionary <Term, IList <ScoreTerm> > variantQueries = new Dictionary <Term, IList <ScoreTerm> >(); int size = q.Count; for (int i = 0; i < size; i++) { ScoreTerm st = q.Pop(); if (!variantQueries.TryGetValue(st.FuzziedSourceTerm, out IList <ScoreTerm> l) || l is null) { l = new JCG.List <ScoreTerm>(); variantQueries[st.FuzziedSourceTerm] = l; } l.Add(st); } //Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries foreach (IList <ScoreTerm> variants in variantQueries.Values) { if (variants.Count == 1) { //optimize where only one selected variant ScoreTerm st = variants[0]; Query tq = ignoreTF ? (Query) new ConstantScoreQuery(new TermQuery(st.Term)) : new TermQuery(st.Term, 1); tq.Boost = st.Score; // set the boost to a mix of IDF and score bq.Add(tq, Occur.SHOULD); } else { BooleanQuery termVariants = new BooleanQuery(true); //disable coord and IDF for these term variants foreach (ScoreTerm st in variants) { // found a match Query tq = ignoreTF ? (Query) new ConstantScoreQuery(new TermQuery(st.Term)) : new TermQuery(st.Term, 1); tq.Boost = st.Score; // set the boost using the ScoreTerm's score termVariants.Add(tq, Occur.SHOULD); // add to query } bq.Add(termVariants, Occur.SHOULD); // add to query } } //TODO possible alternative step 3 - organize above booleans into a new layer of field-based // booleans with a minimum-should-match of NumFields-1? bq.Boost = Boost; this.rewrittenQuery = bq; return(bq); }
/* * How to construct a boolean query with shingles. A query like this will * implicitly score those documents higher that contain the words in the query * in the right order and adjacent to each other. */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testShingleAnalyzerWrapperBooleanQuery() throws Exception public virtual void testShingleAnalyzerWrapperBooleanQuery() { BooleanQuery q = new BooleanQuery(); TokenStream ts = analyzer.tokenStream("content", "test sentence"); try { CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute)); ts.reset(); while (ts.incrementToken()) { string termText = termAtt.ToString(); q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD); } ts.end(); } finally { IOUtils.closeWhileHandlingException(ts); } ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; int[] ranks = new int[] {1, 2, 0}; compareRanks(hits, ranks); }
public void Boost_ReturnsBooleanQuery() { var query = new BooleanQuery().Boost(2.2); Assert.IsType <BooleanQuery>(query); }
public override Query Rewrite(IndexReader reader, MultiTermQuery query) { // Get the enum and start visiting terms. If we // exhaust the enum before hitting either of the // cutoffs, we use ConstantBooleanQueryRewrite; else, // ConstantFilterRewrite: ICollection<Term> pendingTerms = new List<Term>(); int docCountCutoff = (int) ((docCountPercent / 100.0) * reader.MaxDoc); int termCountLimit = System.Math.Min(BooleanQuery.MaxClauseCount, termCountCutoff); int docVisitCount = 0; FilteredTermEnum enumerator = query.GetEnum(reader); try { while (true) { Term t = enumerator.Term; if (t != null) { pendingTerms.Add(t); // Loading the TermInfo from the terms dict here // should not be costly, because 1) the // query/filter will load the TermInfo when it // runs, and 2) the terms dict has a cache: docVisitCount += reader.DocFreq(t); } if (pendingTerms.Count >= termCountLimit || docVisitCount >= docCountCutoff) { // Too many terms -- make a filter. Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter<MultiTermQuery>(query)); result.Boost = query.Boost; return result; } else if (!enumerator.Next()) { // Enumeration is done, and we hit a small // enough number of terms & docs -- just make a // BooleanQuery, now BooleanQuery bq = new BooleanQuery(true); foreach(Term term in pendingTerms) { TermQuery tq = new TermQuery(term); bq.Add(tq, Occur.SHOULD); } // Strip scores Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); result.Boost = query.Boost; query.IncTotalNumberOfTerms(pendingTerms.Count); return result; } } } finally { enumerator.Close(); } }
public void Boost_WhenBoostIsLessThanZero_ThrowsArgumentOutOfRangeException() { var query = new BooleanQuery(); Assert.Throws <ArgumentOutOfRangeException>(() => query.Boost(-.1)); }
private void Save(IndexWriter writer) { var query = new BooleanQuery(); query.add(new TermQuery(new Term(PreferencesDocumentField, PreferencesName)), BooleanClause.Occur.MUST); writer.deleteDocuments(query); var prefs = new FolderPreferences(); prefs.NextFolderId = nextFolderId; prefs.FolderMapping = new List<FolderMap>(); foreach (var key in registeredFolders.Keys) prefs.FolderMapping.Add(new FolderMap { Id = key, Path = registeredFolders[key] }); var json = JsonConvert.SerializeObject(prefs); // Create and store the document var doc = new Document(); doc.add(new StringField(PreferencesDocumentField, PreferencesName, Field.Store.YES)); doc.add(new StringField(RegisteredFoldersField, json, Field.Store.YES)); writer.addDocument(doc); writer.commit(); }
public void Throws_InvalidOperationException_When_No_Sub_Queries() { var query = new BooleanQuery(); Assert.Throws <InvalidOperationException>(() => query.Export()); }
/// <summary> /// Adds one dimension of drill downs; if you pass the same /// dimension more than once it is OR'd with the previous /// cofnstraints on that dimension, and all dimensions are /// AND'd against each other and the base query. /// </summary> public void Add(string dim, params string[] path) { if (drillDownDims.ContainsKey(dim)) { Merge(dim, path); return; } string indexedField = config.GetDimConfig(dim).IndexFieldName; BooleanQuery bq = new BooleanQuery(true); // disable coord bq.Add(new TermQuery(Term(indexedField, dim, path)), Occur.SHOULD); Add(dim, bq); }
//TODO this is basically old code that hasn't been verified well and should probably be removed public Query MakeQueryDistanceScore(SpatialArgs args) { // For starters, just limit the bbox var shape = args.Shape; if (!(shape is Rectangle || shape is Circle)) { throw new InvalidOperationException("Only Rectangles and Circles are currently supported, found [" + shape.GetType().Name + "]");//TODO } Rectangle bbox = shape.GetBoundingBox(); if (bbox.GetCrossesDateLine()) { throw new InvalidOperationException("Crossing dateline not yet supported"); } ValueSource valueSource = null; Query spatial = null; SpatialOperation op = args.Operation; if (SpatialOperation.Is(op, SpatialOperation.BBoxWithin, SpatialOperation.BBoxIntersects)) { spatial = MakeWithin(bbox); } else if (SpatialOperation.Is(op, SpatialOperation.Intersects, SpatialOperation.IsWithin)) { spatial = MakeWithin(bbox); var circle = args.Shape as Circle; if (circle != null) { // Make the ValueSource valueSource = MakeDistanceValueSource(shape.GetCenter()); var vsf = new ValueSourceFilter( new QueryWrapperFilter(spatial), valueSource, 0, circle.GetRadius()); spatial = new FilteredQuery(new MatchAllDocsQuery(), vsf); } } else if (op == SpatialOperation.IsDisjointTo) { spatial = MakeDisjoint(bbox); } if (spatial == null) { throw new UnsupportedSpatialOperation(args.Operation); } if (valueSource != null) { valueSource = new CachingDoubleValueSource(valueSource); } else { valueSource = MakeDistanceValueSource(shape.GetCenter()); } Query spatialRankingQuery = new FunctionQuery(valueSource); var bq = new BooleanQuery(); bq.Add(spatial, Occur.MUST); bq.Add(spatialRankingQuery, Occur.MUST); return(bq); }
/// <summary> /// Used by <see cref="Clone"/> /// </summary> internal DrillDownQuery(FacetsConfig config, BooleanQuery query, IDictionary<string, int?> drillDownDims) { this.query = (BooleanQuery)query.Clone(); this.drillDownDims.AddAll(drillDownDims); this.config = config; }
public override Query Rewrite(IndexReader reader) { // ArrayList spanClauses = new ArrayList(); if (contents is TermQuery) { return(contents); } // Build a sequence of Span clauses arranged in a SpanNear - child // clauses can be complex // Booleans e.g. nots and ors etc int numNegatives = 0; if (!(contents is BooleanQuery)) { throw new ArgumentException("Unknown query type \"" + contents.GetType().Name + "\" found in phrase query string \"" + phrasedQueryStringContents + "\""); } BooleanQuery bq = (BooleanQuery)contents; BooleanClause[] bclauses = bq.Clauses; SpanQuery[] allSpanClauses = new SpanQuery[bclauses.Length]; // For all clauses e.g. one* two~ for (int i = 0; i < bclauses.Length; i++) { // HashSet bclauseterms=new HashSet(); Query qc = bclauses[i].Query; // Rewrite this clause e.g one* becomes (one OR onerous) qc = qc.Rewrite(reader); if (bclauses[i].Occur_.Equals(BooleanClause.Occur.MUST_NOT)) { numNegatives++; } if (qc is BooleanQuery) { List <SpanQuery> sc = new List <SpanQuery>(); AddComplexPhraseClause(sc, (BooleanQuery)qc); if (sc.Count > 0) { allSpanClauses[i] = sc.ElementAt(0); } else { // Insert fake term e.g. phrase query was for "Fred Smithe*" and // there were no "Smithe*" terms - need to // prevent match on just "Fred". allSpanClauses[i] = new SpanTermQuery(new Term(field, "Dummy clause because no terms found - must match nothing")); } } else { if (qc is TermQuery) { TermQuery tq = (TermQuery)qc; allSpanClauses[i] = new SpanTermQuery(tq.Term); } else { throw new ArgumentException("Unknown query type \"" + qc.GetType().Name + "\" found in phrase query string \"" + phrasedQueryStringContents + "\""); } } } if (numNegatives == 0) { // The simple case - no negative elements in phrase return(new SpanNearQuery(allSpanClauses, slopFactor, inOrder)); } // Complex case - we have mixed positives and negatives in the // sequence. // Need to return a SpanNotQuery List <SpanQuery> positiveClauses = new List <SpanQuery>(); for (int j = 0; j < allSpanClauses.Length; j++) { if (!bclauses[j].Occur_.Equals(BooleanClause.Occur.MUST_NOT)) { positiveClauses.Add(allSpanClauses[j]); } } SpanQuery[] includeClauses = positiveClauses .ToArray(); SpanQuery include = null; if (includeClauses.Length == 1) { include = includeClauses[0]; // only one positive clause } else { // need to increase slop factor based on gaps introduced by // negatives include = new SpanNearQuery(includeClauses, slopFactor + numNegatives, inOrder); } // Use sequence of positive and negative values as the exclude. SpanNearQuery exclude = new SpanNearQuery(allSpanClauses, slopFactor, inOrder); SpanNotQuery snot = new SpanNotQuery(include, exclude); return(snot); }
/// <summary> /// Used by <see cref="DrillSideways"/> /// </summary> internal DrillDownQuery(FacetsConfig config, Query baseQuery, IList<Query> clauses, IDictionary<string, int?> drillDownDims) { query = new BooleanQuery(true); if (baseQuery != null) { query.Add(baseQuery, Occur.MUST); } foreach (Query clause in clauses) { query.Add(clause, Occur.MUST); } this.drillDownDims.AddAll(drillDownDims); this.config = config; }
private static Query Convert(ExpressionEx exp) { #region TODO 参数设置 int maxSupportedIntValue = int.MaxValue; int minSupportedIntValue = int.MinValue; long minSupportedLongValue = long.MinValue; long maxSupportedLongValue = long.MaxValue; float minSupportedFloatValue = float.MinValue; float maxSupportedFloatValue = float.MaxValue; long minSupportedDateNum = 000000000000; #endregion switch (exp.ExpressionOperator) { #region eq case ExpressionOperators.Eq: object value = exp.Value; #region //if (value is bool) //{ // if((bool)value) // { value = SysDefinition.DATA_TYPE_BOOL_TRUE; } // else // { value = SysDefinition.DATA_TYPE_BOOL_FALSE; } //} #endregion if (value is DateTime) { string startTime = DateTools.DateToString((DateTime)exp.Value, DateTools.Resolution.MINUTE); return(new TermQuery(new Term(exp.PropertyName, startTime))); } return(new TermQuery(new Term(exp.PropertyName, PrepareValueString(value.ToString())))); #endregion #region ge case ExpressionOperators.Ge: if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.INT) { return(NumericRangeQuery.NewIntRange(exp.PropertyName, (int)exp.Value, maxSupportedIntValue, true, true)); } if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.LONG) { return(NumericRangeQuery.NewLongRange(exp.PropertyName, (long)exp.Value, maxSupportedLongValue, true, true)); } if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.FLOAT) { return(NumericRangeQuery.NewFloatRange(exp.PropertyName, (float)exp.Value, maxSupportedFloatValue, true, true)); } #region if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.DOUBLE) { return(NumericRangeQuery.NewDoubleRange(exp.PropertyName, (double)exp.Value, double.MaxValue, true, true)); } #endregion if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.DATETIME) { string startTime = DateTools.DateToString((DateTime)exp.Value, DateTools.Resolution.MINUTE); long end = long.Parse(startTime); return(NumericRangeQuery.NewLongRange(exp.PropertyName, end, maxSupportedLongValue, true, true)); } return(new TermRangeQuery(exp.PropertyName, PrepareValueString(exp.Value.ToString()), null, true, true)); #endregion #region gt case ExpressionOperators.Gt: if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.INT) { return(NumericRangeQuery.NewIntRange(exp.PropertyName, (int)exp.Value, maxSupportedIntValue, false, false)); } if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.LONG) { return(NumericRangeQuery.NewLongRange(exp.PropertyName, (long)exp.Value, maxSupportedLongValue, false, false)); } if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.FLOAT) { return(NumericRangeQuery.NewFloatRange(exp.PropertyName, (float)exp.Value, maxSupportedFloatValue, false, false)); } #region if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.DOUBLE) { return(NumericRangeQuery.NewDoubleRange(exp.PropertyName, (double)exp.Value, double.MaxValue, false, false)); } #endregion if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.DATETIME) { string startTime = DateTools.DateToString((DateTime)exp.Value, DateTools.Resolution.MINUTE); long end = long.Parse(startTime); return(NumericRangeQuery.NewLongRange(exp.PropertyName, end, maxSupportedLongValue, false, false)); } return(new TermRangeQuery(exp.PropertyName, PrepareValueString(exp.Value.ToString()), null, false, false)); #endregion #region le case ExpressionOperators.Le: if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.INT) { return(NumericRangeQuery.NewIntRange(exp.PropertyName, minSupportedIntValue, (int)exp.Value, true, true)); } if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.LONG) { return(NumericRangeQuery.NewLongRange(exp.PropertyName, minSupportedLongValue, (long)exp.Value, true, true)); } if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.FLOAT) { return(NumericRangeQuery.NewFloatRange(exp.PropertyName, minSupportedFloatValue, (float)exp.Value, true, true)); } #region if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.DOUBLE) { return(NumericRangeQuery.NewDoubleRange(exp.PropertyName, double.MinValue, (double)exp.Value, true, true)); } #endregion if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.DATETIME) { string endTime = DateTools.DateToString((DateTime)exp.Value, DateTools.Resolution.MINUTE); long end = long.Parse(endTime); return(NumericRangeQuery.NewLongRange(exp.PropertyName, minSupportedDateNum, end, true, true)); } return(new TermRangeQuery(exp.PropertyName, null, PrepareValueString(exp.Value.ToString()), true, true)); #endregion #region lt case ExpressionOperators.Lt: if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.INT) { return(NumericRangeQuery.NewIntRange(exp.PropertyName, minSupportedIntValue, (int)exp.Value, false, false)); } if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.LONG) { return(NumericRangeQuery.NewLongRange(exp.PropertyName, minSupportedLongValue, (long)exp.Value, false, false)); } if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.FLOAT) { return(NumericRangeQuery.NewFloatRange(exp.PropertyName, minSupportedFloatValue, (float)exp.Value, false, false)); } #region if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.DOUBLE) { return(NumericRangeQuery.NewDoubleRange(exp.PropertyName, double.MinValue, (double)exp.Value, false, false)); } #endregion if (exp.Value1 is RangeType && (RangeType)exp.Value1 == RangeType.DATETIME) { string startTime = DateTools.DateToString((DateTime)exp.Value, DateTools.Resolution.MINUTE); long end = long.Parse(startTime); return(NumericRangeQuery.NewLongRange(exp.PropertyName, minSupportedDateNum, end, false, false)); } return(new TermRangeQuery(exp.PropertyName, null, PrepareValueString(exp.Value.ToString()), false, false)); #endregion #region noteq case ExpressionOperators.NotEq: object value1 = exp.Value; if (value1 is bool) { if ((bool)value1) { value1 = SysDefinition.DATA_TYPE_BOOL_TRUE; } else { value1 = SysDefinition.DATA_TYPE_BOOL_FALSE; } } if (value1 is DateTime) { value1 = DateTools.DateToString((DateTime)value1, DateTools.Resolution.MINUTE); } var bq = new BooleanQuery(); bq.Add(new BooleanClause(new TermQuery(new Term(exp.PropertyName, PrepareValueString(value1.ToString()))), BooleanClause.Occur.MUST_NOT)); return(bq); #endregion #region between case ExpressionOperators.Between: int precisionStep = 4; if (exp.Value4 is int) { precisionStep = (int)exp.Value4; } switch (exp.Value3 is RangeType ? (RangeType)exp.Value3 : (RangeType)0) { case RangeType.INT: return(NumericRangeQuery.NewIntRange(exp.PropertyName, precisionStep, (int)exp.Value, (int)exp.Value1, (bool)exp.Value2, (bool)exp.Value2)); case RangeType.LONG: return(NumericRangeQuery.NewLongRange(exp.PropertyName, precisionStep, (long)exp.Value, (long)exp.Value1, (bool)exp.Value2, (bool)exp.Value2)); case RangeType.FLOAT: return(NumericRangeQuery.NewFloatRange(exp.PropertyName, precisionStep, (float)exp.Value, (float)exp.Value1, (bool)exp.Value2, (bool)exp.Value2)); case RangeType.DOUBLE: return(NumericRangeQuery.NewDoubleRange(exp.PropertyName, precisionStep, (double)exp.Value, (double)exp.Value1, (bool)exp.Value2, (bool)exp.Value2)); case RangeType.DATETIME: string startTime = DateTools.DateToString((DateTime)exp.Value, DateTools.Resolution.MINUTE); string endTime = DateTools.DateToString((DateTime)exp.Value1, DateTools.Resolution.MINUTE); long start = long.Parse(startTime); long end = long.Parse(endTime); return(NumericRangeQuery.NewLongRange(exp.PropertyName, precisionStep, start, end, (bool)exp.Value2, (bool)exp.Value2)); default: { return(new TermRangeQuery(exp.PropertyName, PrepareValueString(exp.Value.ToString()), PrepareValueString(exp.Value1.ToString()), (bool)exp.Value2, (bool)exp.Value2)); } } #endregion case ExpressionOperators.Like: int prefixLength = 0; float similarity = 0.5f; if (exp.Value4 is float) { similarity = (float)exp.Value4; } var boolQuery = new BooleanQuery(); var wildQuery = new WildcardQuery(new Term(exp.PropertyName, PrepareValueString("*" + exp.Value + "*"))); boolQuery.Add(wildQuery, BooleanClause.Occur.SHOULD); boolQuery.Add(new FuzzyQuery(new Term(exp.PropertyName, PrepareValueString(exp.Value.ToString())), similarity), BooleanClause.Occur.SHOULD); return(boolQuery); case ExpressionOperators.Fuzzy: similarity = 0.9f; if (exp.Value4 is float) { similarity = (float)exp.Value4; } prefixLength = 0; if (exp.Value3 is int) { prefixLength = (int)exp.Value3; } return(new FuzzyQuery(new Term(exp.PropertyName, PrepareValueString(exp.Value.ToString())), similarity, prefixLength)); // case ExpressionOperators.IsEmpty: // return new TermQuery(new Term(exp.PropertyName, SysDefinition.DATA_TYPE_NULL_OR_EMPTY)); // // case ExpressionOperators.IsNotEmpty: // var bq2 = new BooleanQuery(); // bq2.Add(new BooleanClause(new TermQuery(new Term(exp.PropertyName, SysDefinition.DATA_TYPE_NULL_OR_EMPTY)), // BooleanClause.Occur.MUST_NOT)); // return bq2; case ExpressionOperators.StartsWith: return(new PrefixQuery(new Term(exp.PropertyName, PrepareValueString(exp.Value.ToString())))); case ExpressionOperators.EndWith: return(new WildcardQuery(new Term(exp.PropertyName, PrepareValueString("*" + exp.Value)))); case ExpressionOperators.Contains: return(new WildcardQuery(new Term(exp.PropertyName, PrepareValueString("*" + exp.Value + "*")))); default: throw new ArgumentOutOfRangeException(); } }
public virtual void TestBoolean() { BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term("field", "seventy")), BooleanClause.Occur.MUST); query.Add(new TermQuery(new Term("field", "seven")), BooleanClause.Occur.MUST); CheckHits(query, new int[] { 77, 177, 277, 377, 477, 577, 677, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 877, 977, 1077, 1177, 1277, 1377, 1477, 1577, 1677, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1877, 1977 }); }
public void clearBooleanQuery() { booleanQuery = new BooleanQuery(); }
public virtual void TestEmptyTerm() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewTextField("foo", "bar", Field.Store.NO)); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; BooleanQuery query = new BooleanQuery(true); query.Add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); query.Add(new TermQuery(new Term("foo", "baz")), BooleanClause.Occur.SHOULD); Assert.AreEqual(1, @is.Search(query, 10).TotalHits); } ir.Dispose(); dir.Dispose(); }
public IEnumerable <Document> MoreLikeThis(MoreLikeThisQueryServerSide query, HashSet <string> stopWords, Func <SelectField[], IQueryResultRetriever> createRetriever, JsonOperationContext documentsContext, Func <string, SpatialField> getSpatialField, CancellationToken token) { var documentQuery = new BooleanQuery(); if (string.IsNullOrWhiteSpace(query.DocumentId) == false) { documentQuery.Add(new TermQuery(new Term(Constants.Documents.Indexing.Fields.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST); } foreach (var key in query.MapGroupFields.Keys) { documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST); } var td = _searcher.Search(documentQuery, 1, _state); // get the current Lucene docid for the given RavenDB doc ID if (td.ScoreDocs.Length == 0) { throw new InvalidOperationException("Document " + query.DocumentId + " could not be found"); } var ir = _searcher.IndexReader; var mlt = new RavenMoreLikeThis(ir, query, _state); if (stopWords != null) { mlt.SetStopWords(stopWords); } string[] fieldNames; if (query.Fields != null && query.Fields.Length > 0) { fieldNames = query.Fields; } else { fieldNames = ir.GetFieldNames(IndexReader.FieldOption.INDEXED) .Where(x => x != Constants.Documents.Indexing.Fields.DocumentIdFieldName && x != Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName) .ToArray(); } mlt.SetFieldNames(fieldNames); mlt.Analyzer = _analyzer; var pageSize = GetPageSize(_searcher, query.PageSize); var mltQuery = mlt.Like(td.ScoreDocs[0].Doc); var tsdc = TopScoreDocCollector.Create(pageSize, true); if (query.Metadata.WhereFields.Count > 0) { var additionalQuery = QueryBuilder.BuildQuery(documentsContext, query.Metadata, query.Metadata.Query.Where, null, _analyzer, getSpatialField); mltQuery = new BooleanQuery { { mltQuery, Occur.MUST }, { additionalQuery, Occur.MUST } }; } _searcher.Search(mltQuery, tsdc, _state); var hits = tsdc.TopDocs().ScoreDocs; var baseDocId = td.ScoreDocs[0].Doc; var ids = new HashSet <string>(StringComparer.OrdinalIgnoreCase); var fieldsToFetch = string.IsNullOrWhiteSpace(query.DocumentId) ? _searcher.Doc(baseDocId, _state).GetFields().Cast <AbstractField>().Select(x => x.Name).Distinct().Select(x => SelectField.Create(x)).ToArray() : null; var retriever = createRetriever(fieldsToFetch); foreach (var hit in hits) { if (hit.Doc == baseDocId) { continue; } var doc = _searcher.Doc(hit.Doc, _state); var id = doc.Get(Constants.Documents.Indexing.Fields.DocumentIdFieldName, _state) ?? doc.Get(Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName, _state); if (id == null) { continue; } if (ids.Add(id) == false) { continue; } yield return(retriever.Get(doc, hit.Score, _state)); } }
/// <summary> extractTerms is currently the only query-independent means of introspecting queries but it only reveals /// a list of terms for that query - not the boosts each individual term in that query may or may not have. /// "Container" queries such as BooleanQuery should be unwrapped to get at the boost info held /// in each child element. /// Some discussion around this topic here: /// http://www.gossamer-threads.com/lists/lucene/java-dev/34208?search_string=introspection;#34208 /// Unfortunately there seemed to be limited interest in requiring all Query objects to implement /// something common which would allow access to child queries so what follows here are query-specific /// implementations for accessing embedded query elements. /// </summary> private static void GetTermsFromBooleanQuery(BooleanQuery query, HashSet<WeightedTerm> terms, bool prohibited, string fieldName) { BooleanClause[] queryClauses = query.GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (prohibited || queryClauses[i].Occur != Occur.MUST_NOT) GetTerms(queryClauses[i].Query, terms, prohibited, fieldName); } }
public override Query Rewrite(IndexReader r) { if (query.Clauses.Count == 0) { return(new MatchAllDocsQuery()); } IList <Filter> filters = new List <Filter>(); IList <Query> queries = new List <Query>(); IList <BooleanClause> clauses = query.Clauses; Query baseQuery; int startIndex; if (drillDownDims.Count == query.Clauses.Count) { baseQuery = new MatchAllDocsQuery(); startIndex = 0; } else { baseQuery = clauses[0].Query; startIndex = 1; } for (int i = startIndex; i < clauses.Count; i++) { BooleanClause clause = clauses[i]; Query queryClause = clause.Query; Filter filter = GetFilter(queryClause); if (filter != null) { filters.Add(filter); } else { queries.Add(queryClause); } } if (filters.Count == 0) { return(query); } else { // Wrap all filters using FilteredQuery // TODO: this is hackish; we need to do it because // BooleanQuery can't be trusted to handle the // "expensive filter" case. Really, each Filter should // know its cost and we should take that more // carefully into account when picking the right // strategy/optimization: Query wrapped; if (queries.Count == 0) { wrapped = baseQuery; } else { // disable coord BooleanQuery wrappedBQ = new BooleanQuery(true); if ((baseQuery is MatchAllDocsQuery) == false) { wrappedBQ.Add(baseQuery, Occur.MUST); } foreach (Query q in queries) { wrappedBQ.Add(q, Occur.MUST); } wrapped = wrappedBQ; } foreach (Filter filter in filters) { wrapped = new FilteredQuery(wrapped, filter, FilteredQuery.QUERY_FIRST_FILTER_STRATEGY); } return(wrapped); } }