private static Query ParseQuery(string searchTerm) { var fields = new Dictionary<string, float> { { "Id", 1.2f }, { "Title", 1.0f }, { "Tags", 1.0f}, { "Description", 0.8f }, { "Author", 0.6f } }; var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); searchTerm = QueryParser.Escape(searchTerm).ToLowerInvariant(); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields.Keys.ToArray(), analyzer, fields); var conjuctionQuery = new BooleanQuery(); conjuctionQuery.SetBoost(1.5f); var disjunctionQuery = new BooleanQuery(); var wildCardQuery = new BooleanQuery(); wildCardQuery.SetBoost(0.7f); var exactIdQuery = new TermQuery(new Term("Id-Exact", searchTerm)); exactIdQuery.SetBoost(2.5f); foreach(var term in searchTerm.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)) { conjuctionQuery.Add(queryParser.Parse(term), BooleanClause.Occur.MUST); disjunctionQuery.Add(queryParser.Parse(term), BooleanClause.Occur.SHOULD); foreach (var field in fields) { var wildCardTermQuery = new WildcardQuery(new Term(field.Key, term + "*")); wildCardTermQuery.SetBoost(0.7f * field.Value); wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD); } } return conjuctionQuery.Combine(new Query[] { exactIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }); }
public static Lucene.Net.Search.Query FilterQueryByClasses(IESI.ISet<System.Type> classesAndSubclasses, Lucene.Net.Search.Query luceneQuery) { // A query filter is more practical than a manual class filtering post query (esp on scrollable resultsets) // it also probably minimise the memory footprint if (classesAndSubclasses == null) { return luceneQuery; } BooleanQuery classFilter = new BooleanQuery(); // annihilate the scoring impact of DocumentBuilder.CLASS_FIELDNAME classFilter.SetBoost(0); foreach (System.Type clazz in classesAndSubclasses) { Term t = new Term(DocumentBuilder.CLASS_FIELDNAME, TypeHelper.LuceneTypeName(clazz)); TermQuery termQuery = new TermQuery(t); classFilter.Add(termQuery, BooleanClause.Occur.SHOULD); } BooleanQuery filteredQuery = new BooleanQuery(); filteredQuery.Add(luceneQuery, BooleanClause.Occur.MUST); filteredQuery.Add(classFilter, BooleanClause.Occur.MUST); return filteredQuery; }
private Query GetElevatedQuery(System.String[] vals) { BooleanQuery q = new BooleanQuery(false); q.SetBoost(0); int max = (vals.Length / 2) + 5; for (int i = 0; i < vals.Length - 1; i += 2) { q.Add(new TermQuery(new Term(vals[i], vals[i + 1])), BooleanClause.Occur.SHOULD); priority[vals[i + 1]] = (System.Int32)max--; // System.out.println(" pri doc=" + vals[i+1] + " pri=" + (1+max)); } return(q); }
protected internal override Weight CreateWeight(Searcher searcher) { if (termArrays.Count == 1) { // optimize one-term case Term[] terms = (Term[])termArrays[0]; BooleanQuery boq = new BooleanQuery(true); for (int i = 0; i < terms.Length; i++) { boq.Add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD); } boq.SetBoost(GetBoost()); return(boq.CreateWeight(searcher)); } return(new PhrasePrefixWeight(this, searcher)); }
public virtual void Test2() { BooleanQuery q = new BooleanQuery(); q.Add(qp.Parse("\"w1 w2\"~1"), Occur.MUST); q.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true), Occur.SHOULD); q.Add(Snear(Sf("w3", 2), St("w2"), St("w3"), 5, true), Occur.SHOULD); Query t = new FilteredQuery(qp.Parse("xx"), new ItemizedFilter(new int[] { 1, 3 })); t.SetBoost(1000); q.Add(t, Occur.SHOULD); t = new ConstantScoreQuery(new ItemizedFilter(new int[] { 0, 2 })); t.SetBoost(-20.0f); q.Add(t, Occur.SHOULD); DisjunctionMaxQuery dm = new DisjunctionMaxQuery(0.2f); dm.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true)); dm.Add(qp.Parse("QQ")); dm.Add(qp.Parse("xx yy -zz")); dm.Add(qp.Parse("-xx -w1")); DisjunctionMaxQuery dm2 = new DisjunctionMaxQuery(0.5f); dm2.Add(qp.Parse("w1")); dm2.Add(qp.Parse("w2")); dm2.Add(qp.Parse("w3")); dm.Add(dm2); q.Add(dm, Occur.SHOULD); BooleanQuery b = new BooleanQuery(); b.SetMinimumNumberShouldMatch(2); b.Add(Snear("w1", "w2", 1, true), Occur.SHOULD); b.Add(Snear("w2", "w3", 1, true), Occur.SHOULD); b.Add(Snear("w1", "w3", 3, true), Occur.SHOULD); b.SetBoost(0.0f); q.Add(b, Occur.SHOULD); Qtest(q, new int[] { 0, 1, 2 }); }
public override Query Rewrite(IndexReader reader) { if (termArrays.Count == 1) { // optimize one-term case Term[] terms = (Term[])termArrays[0]; BooleanQuery boq = new BooleanQuery(true); for (int i = 0; i < terms.Length; i++) { boq.Add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD); } boq.SetBoost(GetBoost()); return(boq); } else { return(this); } }
protected internal override Weight CreateWeight(Searcher searcher) { if (termArrays.Count == 1) { // optimize one-term case Term[] terms = (Term[]) termArrays[0]; BooleanQuery boq = new BooleanQuery(true); for (int i = 0; i < terms.Length; i++) { boq.Add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD); } boq.SetBoost(GetBoost()); return boq.CreateWeight(searcher); } return new PhrasePrefixWeight(this, searcher); }
private Query GetElevatedQuery(System.String[] vals) { BooleanQuery q = new BooleanQuery(false); q.SetBoost(0); int max = (vals.Length / 2) + 5; for (int i = 0; i < vals.Length - 1; i += 2) { q.Add(new TermQuery(new Term(vals[i], vals[i + 1])), BooleanClause.Occur.SHOULD); priority[vals[i + 1]] = (System.Int32) max--; // System.out.println(" pri doc=" + vals[i+1] + " pri=" + (1+max)); } return q; }
public override Query Rewrite(IndexReader reader) { if (rewrittenQuery != null) { return(rewrittenQuery); } //load up the list of possible terms foreach (FieldVals f in fieldVals) { AddTerms(reader, f); } //for (Iterator iter = fieldVals.iterator(); iter.hasNext(); ) //{ // FieldVals f = (FieldVals)iter.next(); // addTerms(reader, f); //} //clear the list of fields fieldVals.Clear(); BooleanQuery bq = new BooleanQuery(); //create BooleanQueries to hold the variants for each token/field pair and ensure it // has no coord factor //Step 1: sort the termqueries by term/field Hashtable variantQueries = new Hashtable(); int size = q.Size(); for (int i = 0; i < size; i++) { ScoreTerm st = (ScoreTerm)q.Pop(); ArrayList l = (ArrayList)variantQueries[st.fuzziedSourceTerm]; if (l == null) { l = new ArrayList(); variantQueries.Add(st.fuzziedSourceTerm, l); } l.Add(st); } //Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries foreach (ArrayList variants in variantQueries.Values) //for (Iterator iter = variantQueries.values().iterator(); iter.hasNext(); ) { //ArrayList variants = (ArrayList)iter.next(); if (variants.Count == 1) { //optimize where only one selected variant ScoreTerm st = (ScoreTerm)variants[0]; TermQuery tq = new FuzzyTermQuery(st.term, ignoreTF); tq.SetBoost(st.score); // set the boost to a mix of IDF and score bq.Add(tq, BooleanClause.Occur.SHOULD); } else { BooleanQuery termVariants = new BooleanQuery(true); //disable coord and IDF for these term variants foreach (ScoreTerm st in variants) //for (Iterator iterator2 = variants.iterator(); iterator2.hasNext(); ) { //ScoreTerm st = (ScoreTerm)iterator2.next(); TermQuery tq = new FuzzyTermQuery(st.term, ignoreTF); // found a match tq.SetBoost(st.score); // set the boost using the ScoreTerm's score termVariants.Add(tq, BooleanClause.Occur.SHOULD); // add to query } bq.Add(termVariants, BooleanClause.Occur.SHOULD); // add to query } } //TODO possible alternative step 3 - organize above booleans into a new layer of field-based // booleans with a minimum-should-match of NumFields-1? bq.SetBoost(GetBoost()); this.rewrittenQuery = bq; return(bq); }
private static Query ParseQuery(string searchTerm) { var fields = new Dictionary<string, float> { { "Id", 1.2f }, { "Title", 1.0f }, { "Tags", 0.8f }, { "Description", 0.1f }, { "Author", 1.0f } }; var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields.Keys.ToArray(), analyzer, fields); var conjuctionQuery = new BooleanQuery(); conjuctionQuery.SetBoost(2.0f); var disjunctionQuery = new BooleanQuery(); disjunctionQuery.SetBoost(0.1f); var wildCardQuery = new BooleanQuery(); wildCardQuery.SetBoost(0.5f); // Escape the entire term we use for exact searches. var escapedSearchTerm = Escape(searchTerm); var exactIdQuery = new TermQuery(new Term("Id-Exact", escapedSearchTerm)); exactIdQuery.SetBoost(2.5f); var wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + escapedSearchTerm + "*")); foreach(var term in GetSearchTerms(searchTerm)) { var termQuery = queryParser.Parse(term); conjuctionQuery.Add(termQuery, BooleanClause.Occur.MUST); disjunctionQuery.Add(termQuery, BooleanClause.Occur.SHOULD); foreach (var field in fields) { var wildCardTermQuery = new WildcardQuery(new Term(field.Key, term + "*")); wildCardTermQuery.SetBoost(0.7f * field.Value); wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD); } } var downloadCountBooster = new FieldScoreQuery("DownloadCount", FieldScoreQuery.Type.INT); return new CustomScoreQuery(conjuctionQuery.Combine(new Query[] { exactIdQuery, wildCardIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }), downloadCountBooster); }
public override Query Rewrite(IndexReader reader) { if (termArrays.Count == 1) { // optimize one-term case Term[] terms = (Term[]) termArrays[0]; BooleanQuery boq = new BooleanQuery(true); for (int i = 0; i < terms.Length; i++) { boq.Add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD); } boq.SetBoost(GetBoost()); return boq; } else { return this; } }
public virtual void Test2() { BooleanQuery q = new BooleanQuery(); q.Add(qp.Parse("\"w1 w2\"~1"), Occur.MUST); q.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true), Occur.SHOULD); q.Add(Snear(Sf("w3", 2), St("w2"), St("w3"), 5, true), Occur.SHOULD); Query t = new FilteredQuery(qp.Parse("xx"), new ItemizedFilter(new int[]{1, 3})); t.SetBoost(1000); q.Add(t, Occur.SHOULD); t = new ConstantScoreQuery(new ItemizedFilter(new int[]{0, 2})); t.SetBoost(- 20.0f); q.Add(t, Occur.SHOULD); DisjunctionMaxQuery dm = new DisjunctionMaxQuery(0.2f); dm.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true)); dm.Add(qp.Parse("QQ")); dm.Add(qp.Parse("xx yy -zz")); dm.Add(qp.Parse("-xx -w1")); DisjunctionMaxQuery dm2 = new DisjunctionMaxQuery(0.5f); dm2.Add(qp.Parse("w1")); dm2.Add(qp.Parse("w2")); dm2.Add(qp.Parse("w3")); dm.Add(dm2); q.Add(dm, Occur.SHOULD); BooleanQuery b = new BooleanQuery(); b.SetMinimumNumberShouldMatch(2); b.Add(Snear("w1", "w2", 1, true), Occur.SHOULD); b.Add(Snear("w2", "w3", 1, true), Occur.SHOULD); b.Add(Snear("w1", "w3", 3, true), Occur.SHOULD); b.SetBoost(0.0f); q.Add(b, Occur.SHOULD); Qtest(q, new int[]{0, 1, 2}); }
private static Query ParseQuery(SearchFilter searchFilter) { var fields = new[] { "Id", "Title", "Tags", "Description", "Author" }; var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields, analyzer); // All terms in the multi-term query appear in at least one of the fields. var conjuctionQuery = new BooleanQuery(); conjuctionQuery.SetBoost(2.0f); // Some terms in the multi-term query appear in at least one of the fields. var disjunctionQuery = new BooleanQuery(); disjunctionQuery.SetBoost(0.1f); // Suffix wildcard search e.g. jquer* var wildCardQuery = new BooleanQuery(); wildCardQuery.SetBoost(0.5f); // Escape the entire term we use for exact searches. var escapedSearchTerm = Escape(searchFilter.SearchTerm); var exactIdQuery = new TermQuery(new Term("Id-Exact", escapedSearchTerm)); exactIdQuery.SetBoost(2.5f); var wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + escapedSearchTerm + "*")); foreach (var term in GetSearchTerms(searchFilter.SearchTerm)) { var termQuery = queryParser.Parse(term); conjuctionQuery.Add(termQuery, BooleanClause.Occur.MUST); disjunctionQuery.Add(termQuery, BooleanClause.Occur.SHOULD); foreach (var field in fields) { var wildCardTermQuery = new WildcardQuery(new Term(field, term + "*")); wildCardTermQuery.SetBoost(0.7f); wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD); } } // Create an OR of all the queries that we have var combinedQuery = conjuctionQuery.Combine(new Query[] { exactIdQuery, wildCardIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }); if (searchFilter.SortProperty == SortProperty.Relevance) { // If searching by relevance, boost scores by download count. var downloadCountBooster = new FieldScoreQuery("DownloadCount", FieldScoreQuery.Type.INT); return new CustomScoreQuery(combinedQuery, downloadCountBooster); } return combinedQuery; }
private static Query BuildGeneralQuery( bool doExactId, string originalSearchText, Analyzer analyzer, IEnumerable<NuGetSearchTerm> generalTerms, IEnumerable<Query> generalQueries) { // All terms in the multi-term query appear in at least one of the target fields. var conjuctionQuery = new BooleanQuery(); conjuctionQuery.SetBoost(2.0f); // Some terms in the multi-term query appear in at least one of the target fields. var disjunctionQuery = new BooleanQuery(); disjunctionQuery.SetBoost(0.1f); // Suffix wildcard search e.g. jquer* var wildCardQuery = new BooleanQuery(); wildCardQuery.SetBoost(0.5f); string escapedExactId = originalSearchText.ToLowerInvariant(); Query exactIdQuery = null; Query wildCardIdQuery = null; if (doExactId) { exactIdQuery = new TermQuery(new Term("Id-Exact", escapedExactId)); exactIdQuery.SetBoost(7.5f); wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + escapedExactId + "*")); } Query nearlyExactIdQuery = null; if (generalTerms.Any()) { string escapedApproximateId = string.Join(" ", generalTerms.Select(c => c.TermOrPhrase)); nearlyExactIdQuery = AnalysisHelper.GetFieldQuery(analyzer, "Id", escapedApproximateId); nearlyExactIdQuery.SetBoost(2.0f); } foreach (var termQuery in generalQueries) { conjuctionQuery.Add(termQuery, BooleanClause.Occur.MUST); disjunctionQuery.Add(termQuery, BooleanClause.Occur.SHOULD); } var sanitizedTerms = generalTerms.Select(c => c.TermOrPhrase.ToLowerInvariant()); foreach (var sanitizedTerm in sanitizedTerms) { foreach (var field in Fields) { var wildCardTermQuery = new WildcardQuery(new Term(field, sanitizedTerm + "*")); wildCardTermQuery.SetBoost(0.7f); wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD); } } // OR of all the applicable queries var queries = new Query[] { exactIdQuery, wildCardIdQuery, nearlyExactIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }; var queriesToCombine = queries.Where(q => !IsDegenerateQuery(q)); var query = conjuctionQuery.Combine(queriesToCombine.ToArray()); return query; }
public SearchResultSet Search(string query, int offset = 0, int count = 20) { using (TraceSources.ContentSearcherSource.TraceActivity("Search [{1}-{2}]: {0}", query, offset, offset + count)) { SearchResultSet ret = new SearchResultSet(); //Query q = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "body", this._analyzer).Parse(query); string[] rawTerms = query.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries); BooleanQuery titleQuery = new BooleanQuery(); BooleanQuery summaryQuery = new BooleanQuery(); BooleanQuery contentQuery = new BooleanQuery(); for (int i = 0; i < rawTerms.Length; i++) { string rawTerm = rawTerms[i]; BooleanClause.Occur occur; if (rawTerms[i].StartsWith("-")) { rawTerm = rawTerm.Substring(1); occur = BooleanClause.Occur.MUST_NOT; } else occur = BooleanClause.Occur.MUST; titleQuery.Add(new TermQuery(new Term("title", rawTerm)), occur); summaryQuery.Add(new TermQuery(new Term("summary", rawTerm)), occur); contentQuery.Add(new TermQuery(new Term("content", rawTerm)), occur); } BooleanQuery q = new BooleanQuery(); titleQuery.SetBoost(8f); contentQuery.SetBoost(0.7f); q.Add(titleQuery, BooleanClause.Occur.SHOULD); q.Add(summaryQuery, BooleanClause.Occur.SHOULD); q.Add(contentQuery, BooleanClause.Occur.SHOULD); TopDocs docs = this._indexSearcher.Search(titleQuery, offset + count); if (docs.ScoreDocs.Length < offset) throw new ArgumentOutOfRangeException("offset", "Offset is smaller than result count!"); ret.HitCount = docs.TotalHits; ret.Results = new SearchResult[Math.Min(docs.ScoreDocs.Length - offset, count)]; for (int i = 0; i < ret.Results.Length; i++) { var scoreDoc = docs.ScoreDocs[offset + i]; Document doc = this._indexSearcher.Doc(scoreDoc.doc); ret.Results[i] = new SearchResult { AssetId = AssetIdentifier.Parse(doc.GetField("aid").StringValue()), Title = doc.GetField("title").StringValue(), Url = new Uri(doc.GetField("uri").StringValue(), UriKind.RelativeOrAbsolute), Blurb = doc.GetField("summary").StringValue(), }; } return ret; } }