private static Query ParseQuery(string searchTerm)
        {
            var fields = new Dictionary<string, float> { { "Id", 1.2f }, { "Title", 1.0f }, { "Tags", 1.0f}, { "Description", 0.8f }, { "Author", 0.6f } };
            var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion);
            searchTerm = QueryParser.Escape(searchTerm).ToLowerInvariant();

            var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields.Keys.ToArray(), analyzer, fields);

            var conjuctionQuery = new BooleanQuery();
            conjuctionQuery.SetBoost(1.5f);
            var disjunctionQuery = new BooleanQuery();
            var wildCardQuery = new BooleanQuery();
            wildCardQuery.SetBoost(0.7f);
            var exactIdQuery = new TermQuery(new Term("Id-Exact", searchTerm));
            exactIdQuery.SetBoost(2.5f);

            foreach(var term in searchTerm.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))
            {
                conjuctionQuery.Add(queryParser.Parse(term), BooleanClause.Occur.MUST);
                disjunctionQuery.Add(queryParser.Parse(term), BooleanClause.Occur.SHOULD);

                foreach (var field in fields)
                {
                    var wildCardTermQuery = new WildcardQuery(new Term(field.Key, term + "*"));
                    wildCardTermQuery.SetBoost(0.7f * field.Value);
                    wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD);
                }
            }

            return conjuctionQuery.Combine(new Query[] { exactIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery });
        }
        public static Lucene.Net.Search.Query FilterQueryByClasses(IESI.ISet<System.Type> classesAndSubclasses, Lucene.Net.Search.Query luceneQuery)
        {
            // A query filter is more practical than a manual class filtering post query (esp on scrollable resultsets)
            // it also probably minimise the memory footprint
            if (classesAndSubclasses == null)
            {
                return luceneQuery;
            }

            BooleanQuery classFilter = new BooleanQuery();

            // annihilate the scoring impact of DocumentBuilder.CLASS_FIELDNAME
            classFilter.SetBoost(0);
            foreach (System.Type clazz in classesAndSubclasses)
            {
                Term t = new Term(DocumentBuilder.CLASS_FIELDNAME, TypeHelper.LuceneTypeName(clazz));
                TermQuery termQuery = new TermQuery(t);
                classFilter.Add(termQuery, BooleanClause.Occur.SHOULD);
            }

            BooleanQuery filteredQuery = new BooleanQuery();
            filteredQuery.Add(luceneQuery, BooleanClause.Occur.MUST);
            filteredQuery.Add(classFilter, BooleanClause.Occur.MUST);
            return filteredQuery;
        }
        private Query GetElevatedQuery(System.String[] vals)
        {
            BooleanQuery q = new BooleanQuery(false);

            q.SetBoost(0);
            int max = (vals.Length / 2) + 5;

            for (int i = 0; i < vals.Length - 1; i += 2)
            {
                q.Add(new TermQuery(new Term(vals[i], vals[i + 1])), BooleanClause.Occur.SHOULD);
                priority[vals[i + 1]] = (System.Int32)max--;
                // System.out.println(" pri doc=" + vals[i+1] + " pri=" + (1+max));
            }
            return(q);
        }
Пример #4
0
 protected internal override Weight CreateWeight(Searcher searcher)
 {
     if (termArrays.Count == 1)
     {
         // optimize one-term case
         Term[]       terms = (Term[])termArrays[0];
         BooleanQuery boq   = new BooleanQuery(true);
         for (int i = 0; i < terms.Length; i++)
         {
             boq.Add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
         }
         boq.SetBoost(GetBoost());
         return(boq.CreateWeight(searcher));
     }
     return(new PhrasePrefixWeight(this, searcher));
 }
Пример #5
0
        public virtual void  Test2()
        {
            BooleanQuery q = new BooleanQuery();

            q.Add(qp.Parse("\"w1 w2\"~1"), Occur.MUST);
            q.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true), Occur.SHOULD);
            q.Add(Snear(Sf("w3", 2), St("w2"), St("w3"), 5, true), Occur.SHOULD);

            Query t = new FilteredQuery(qp.Parse("xx"), new ItemizedFilter(new int[] { 1, 3 }));

            t.SetBoost(1000);
            q.Add(t, Occur.SHOULD);

            t = new ConstantScoreQuery(new ItemizedFilter(new int[] { 0, 2 }));
            t.SetBoost(-20.0f);
            q.Add(t, Occur.SHOULD);

            DisjunctionMaxQuery dm = new DisjunctionMaxQuery(0.2f);

            dm.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true));
            dm.Add(qp.Parse("QQ"));
            dm.Add(qp.Parse("xx yy -zz"));
            dm.Add(qp.Parse("-xx -w1"));

            DisjunctionMaxQuery dm2 = new DisjunctionMaxQuery(0.5f);

            dm2.Add(qp.Parse("w1"));
            dm2.Add(qp.Parse("w2"));
            dm2.Add(qp.Parse("w3"));
            dm.Add(dm2);

            q.Add(dm, Occur.SHOULD);

            BooleanQuery b = new BooleanQuery();

            b.SetMinimumNumberShouldMatch(2);
            b.Add(Snear("w1", "w2", 1, true), Occur.SHOULD);
            b.Add(Snear("w2", "w3", 1, true), Occur.SHOULD);
            b.Add(Snear("w1", "w3", 3, true), Occur.SHOULD);
            b.SetBoost(0.0f);

            q.Add(b, Occur.SHOULD);

            Qtest(q, new int[] { 0, 1, 2 });
        }
Пример #6
0
 public override Query Rewrite(IndexReader reader)
 {
     if (termArrays.Count == 1)
     {
         // optimize one-term case
         Term[]       terms = (Term[])termArrays[0];
         BooleanQuery boq   = new BooleanQuery(true);
         for (int i = 0; i < terms.Length; i++)
         {
             boq.Add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
         }
         boq.SetBoost(GetBoost());
         return(boq);
     }
     else
     {
         return(this);
     }
 }
Пример #7
0
		protected internal override Weight CreateWeight(Searcher searcher)
		{
			if (termArrays.Count == 1)
			{
				// optimize one-term case
				Term[] terms = (Term[]) termArrays[0];
				BooleanQuery boq = new BooleanQuery(true);
				for (int i = 0; i < terms.Length; i++)
				{
					boq.Add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
				}
				boq.SetBoost(GetBoost());
				return boq.CreateWeight(searcher);
			}
			return new PhrasePrefixWeight(this, searcher);
		}
		private Query GetElevatedQuery(System.String[] vals)
		{
			BooleanQuery q = new BooleanQuery(false);
			q.SetBoost(0);
			int max = (vals.Length / 2) + 5;
			for (int i = 0; i < vals.Length - 1; i += 2)
			{
				q.Add(new TermQuery(new Term(vals[i], vals[i + 1])), BooleanClause.Occur.SHOULD);
				priority[vals[i + 1]] = (System.Int32) max--;
				// System.out.println(" pri doc=" + vals[i+1] + " pri=" + (1+max));
			}
			return q;
		}
Пример #9
0
        public override Query Rewrite(IndexReader reader)
        {
            if (rewrittenQuery != null)
            {
                return(rewrittenQuery);
            }
            //load up the list of possible terms
            foreach (FieldVals f in fieldVals)
            {
                AddTerms(reader, f);
            }
            //for (Iterator iter = fieldVals.iterator(); iter.hasNext(); )
            //{
            //    FieldVals f = (FieldVals)iter.next();
            //    addTerms(reader, f);
            //}
            //clear the list of fields
            fieldVals.Clear();

            BooleanQuery bq = new BooleanQuery();


            //create BooleanQueries to hold the variants for each token/field pair and ensure it
            // has no coord factor
            //Step 1: sort the termqueries by term/field
            Hashtable variantQueries = new Hashtable();
            int       size           = q.Size();

            for (int i = 0; i < size; i++)
            {
                ScoreTerm st = (ScoreTerm)q.Pop();
                ArrayList l  = (ArrayList)variantQueries[st.fuzziedSourceTerm];
                if (l == null)
                {
                    l = new ArrayList();
                    variantQueries.Add(st.fuzziedSourceTerm, l);
                }
                l.Add(st);
            }
            //Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries
            foreach (ArrayList variants in variantQueries.Values)
            //for (Iterator iter = variantQueries.values().iterator(); iter.hasNext(); )
            {
                //ArrayList variants = (ArrayList)iter.next();
                if (variants.Count == 1)
                {
                    //optimize where only one selected variant
                    ScoreTerm st = (ScoreTerm)variants[0];
                    TermQuery tq = new FuzzyTermQuery(st.term, ignoreTF);
                    tq.SetBoost(st.score); // set the boost to a mix of IDF and score
                    bq.Add(tq, BooleanClause.Occur.SHOULD);
                }
                else
                {
                    BooleanQuery termVariants = new BooleanQuery(true); //disable coord and IDF for these term variants
                    foreach (ScoreTerm st in variants)
                    //for (Iterator iterator2 = variants.iterator(); iterator2.hasNext(); )
                    {
                        //ScoreTerm st = (ScoreTerm)iterator2.next();
                        TermQuery tq = new FuzzyTermQuery(st.term, ignoreTF); // found a match
                        tq.SetBoost(st.score);                                // set the boost using the ScoreTerm's score
                        termVariants.Add(tq, BooleanClause.Occur.SHOULD);     // add to query
                    }
                    bq.Add(termVariants, BooleanClause.Occur.SHOULD);         // add to query
                }
            }
            //TODO possible alternative step 3 - organize above booleans into a new layer of field-based
            // booleans with a minimum-should-match of NumFields-1?
            bq.SetBoost(GetBoost());
            this.rewrittenQuery = bq;
            return(bq);
        }
        private static Query ParseQuery(string searchTerm)
        {
            var fields = new Dictionary<string, float> { { "Id", 1.2f }, { "Title", 1.0f }, { "Tags", 0.8f }, { "Description", 0.1f },
                                                         { "Author", 1.0f } };
            var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion);
            var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields.Keys.ToArray(), analyzer, fields);

            var conjuctionQuery = new BooleanQuery();
            conjuctionQuery.SetBoost(2.0f);
            var disjunctionQuery = new BooleanQuery();
            disjunctionQuery.SetBoost(0.1f);
            var wildCardQuery = new BooleanQuery();
            wildCardQuery.SetBoost(0.5f);

            // Escape the entire term we use for exact searches.
            var escapedSearchTerm = Escape(searchTerm);
            var exactIdQuery = new TermQuery(new Term("Id-Exact", escapedSearchTerm));
            exactIdQuery.SetBoost(2.5f);
            var wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + escapedSearchTerm + "*"));

            foreach(var term in GetSearchTerms(searchTerm))
            {
                var termQuery = queryParser.Parse(term);
                conjuctionQuery.Add(termQuery, BooleanClause.Occur.MUST);
                disjunctionQuery.Add(termQuery, BooleanClause.Occur.SHOULD);

                foreach (var field in fields)
                {
                    var wildCardTermQuery = new WildcardQuery(new Term(field.Key, term + "*"));
                    wildCardTermQuery.SetBoost(0.7f * field.Value);
                    wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD);
                }
            }

            var downloadCountBooster = new FieldScoreQuery("DownloadCount", FieldScoreQuery.Type.INT);
            return new CustomScoreQuery(conjuctionQuery.Combine(new Query[] { exactIdQuery, wildCardIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }),
                                       downloadCountBooster);
        }
Пример #11
0
 public override Query Rewrite(IndexReader reader)
 {
     if (termArrays.Count == 1)
     {
         // optimize one-term case
         Term[] terms = (Term[]) termArrays[0];
         BooleanQuery boq = new BooleanQuery(true);
         for (int i = 0; i < terms.Length; i++)
         {
             boq.Add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
         }
         boq.SetBoost(GetBoost());
         return boq;
     }
     else
     {
         return this;
     }
 }
		public virtual void  Test2()
		{
			
			BooleanQuery q = new BooleanQuery();
			
			q.Add(qp.Parse("\"w1 w2\"~1"), Occur.MUST);
			q.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true), Occur.SHOULD);
			q.Add(Snear(Sf("w3", 2), St("w2"), St("w3"), 5, true), Occur.SHOULD);
			
			Query t = new FilteredQuery(qp.Parse("xx"), new ItemizedFilter(new int[]{1, 3}));
			t.SetBoost(1000);
			q.Add(t, Occur.SHOULD);
			
			t = new ConstantScoreQuery(new ItemizedFilter(new int[]{0, 2}));
			t.SetBoost(- 20.0f);
			q.Add(t, Occur.SHOULD);
			
			DisjunctionMaxQuery dm = new DisjunctionMaxQuery(0.2f);
			dm.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true));
			dm.Add(qp.Parse("QQ"));
			dm.Add(qp.Parse("xx yy -zz"));
			dm.Add(qp.Parse("-xx -w1"));
			
			DisjunctionMaxQuery dm2 = new DisjunctionMaxQuery(0.5f);
			dm2.Add(qp.Parse("w1"));
			dm2.Add(qp.Parse("w2"));
			dm2.Add(qp.Parse("w3"));
			dm.Add(dm2);
			
			q.Add(dm, Occur.SHOULD);
			
			BooleanQuery b = new BooleanQuery();
			b.SetMinimumNumberShouldMatch(2);
			b.Add(Snear("w1", "w2", 1, true), Occur.SHOULD);
			b.Add(Snear("w2", "w3", 1, true), Occur.SHOULD);
			b.Add(Snear("w1", "w3", 3, true), Occur.SHOULD);
			b.SetBoost(0.0f);
			
			q.Add(b, Occur.SHOULD);
			
			Qtest(q, new int[]{0, 1, 2});
		}
Пример #13
0
        private static Query ParseQuery(SearchFilter searchFilter)
        {
            var fields = new[] { "Id", "Title", "Tags", "Description", "Author" };
            var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion);
            var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields, analyzer);

            // All terms in the multi-term query appear in at least one of the fields.
            var conjuctionQuery = new BooleanQuery();
            conjuctionQuery.SetBoost(2.0f);

            // Some terms in the multi-term query appear in at least one of the fields.
            var disjunctionQuery = new BooleanQuery();
            disjunctionQuery.SetBoost(0.1f);

            // Suffix wildcard search e.g. jquer*
            var wildCardQuery = new BooleanQuery();
            wildCardQuery.SetBoost(0.5f);

            // Escape the entire term we use for exact searches.
            var escapedSearchTerm = Escape(searchFilter.SearchTerm);
            var exactIdQuery = new TermQuery(new Term("Id-Exact", escapedSearchTerm));
            exactIdQuery.SetBoost(2.5f);
            var wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + escapedSearchTerm + "*"));

            foreach (var term in GetSearchTerms(searchFilter.SearchTerm))
            {
                var termQuery = queryParser.Parse(term);
                conjuctionQuery.Add(termQuery, BooleanClause.Occur.MUST);
                disjunctionQuery.Add(termQuery, BooleanClause.Occur.SHOULD);

                foreach (var field in fields)
                {
                    var wildCardTermQuery = new WildcardQuery(new Term(field, term + "*"));
                    wildCardTermQuery.SetBoost(0.7f);
                    wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD);
                }
            }

            // Create an OR of all the queries that we have
            var combinedQuery = conjuctionQuery.Combine(new Query[] { exactIdQuery, wildCardIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery });

            if (searchFilter.SortProperty == SortProperty.Relevance)
            {
                // If searching by relevance, boost scores by download count.
                var downloadCountBooster = new FieldScoreQuery("DownloadCount", FieldScoreQuery.Type.INT);
                return new CustomScoreQuery(combinedQuery, downloadCountBooster);
            }
            return combinedQuery;
        }
Пример #14
0
        private static Query BuildGeneralQuery(
            bool doExactId,
            string originalSearchText,
            Analyzer analyzer,
            IEnumerable<NuGetSearchTerm> generalTerms, 
            IEnumerable<Query> generalQueries)
        {
            // All terms in the multi-term query appear in at least one of the target fields.
            var conjuctionQuery = new BooleanQuery();
            conjuctionQuery.SetBoost(2.0f);

            // Some terms in the multi-term query appear in at least one of the target fields.
            var disjunctionQuery = new BooleanQuery();
            disjunctionQuery.SetBoost(0.1f);

            // Suffix wildcard search e.g. jquer*
            var wildCardQuery = new BooleanQuery();
            wildCardQuery.SetBoost(0.5f);

            string escapedExactId = originalSearchText.ToLowerInvariant();

            Query exactIdQuery = null;
            Query wildCardIdQuery = null;
            if (doExactId)
            {
                exactIdQuery = new TermQuery(new Term("Id-Exact", escapedExactId));
                exactIdQuery.SetBoost(7.5f);

                wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + escapedExactId + "*"));
            }

            Query nearlyExactIdQuery = null;
            if (generalTerms.Any())
            {
                string escapedApproximateId = string.Join(" ", generalTerms.Select(c => c.TermOrPhrase));
                nearlyExactIdQuery = AnalysisHelper.GetFieldQuery(analyzer, "Id", escapedApproximateId);
                nearlyExactIdQuery.SetBoost(2.0f);
            }

            foreach (var termQuery in generalQueries)
            {
                conjuctionQuery.Add(termQuery, BooleanClause.Occur.MUST);
                disjunctionQuery.Add(termQuery, BooleanClause.Occur.SHOULD);
            }

            var sanitizedTerms = generalTerms.Select(c => c.TermOrPhrase.ToLowerInvariant());
            foreach (var sanitizedTerm in sanitizedTerms)
            {
                foreach (var field in Fields)
                {
                    var wildCardTermQuery = new WildcardQuery(new Term(field, sanitizedTerm + "*"));
                    wildCardTermQuery.SetBoost(0.7f);
                    wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD);
                }
            }

            // OR of all the applicable queries
            var queries = new Query[]
            {
                exactIdQuery, wildCardIdQuery, nearlyExactIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery
            };

            var queriesToCombine = queries.Where(q => !IsDegenerateQuery(q));
            var query = conjuctionQuery.Combine(queriesToCombine.ToArray());
            return query;
        }
Пример #15
0
        public SearchResultSet Search(string query, int offset = 0, int count = 20)
        {
            using (TraceSources.ContentSearcherSource.TraceActivity("Search [{1}-{2}]: {0}", query, offset, offset + count))
            {
                SearchResultSet ret = new SearchResultSet();

                //Query q = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "body", this._analyzer).Parse(query);

                string[] rawTerms = query.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries);


                BooleanQuery titleQuery = new BooleanQuery();
                BooleanQuery summaryQuery = new BooleanQuery();
                BooleanQuery contentQuery = new BooleanQuery();

                for (int i = 0; i < rawTerms.Length; i++)
                {
                    string rawTerm = rawTerms[i];
                    BooleanClause.Occur occur;
                    if (rawTerms[i].StartsWith("-"))
                    {
                        rawTerm = rawTerm.Substring(1);
                        occur = BooleanClause.Occur.MUST_NOT;
                    }
                    else
                        occur = BooleanClause.Occur.MUST;

                    titleQuery.Add(new TermQuery(new Term("title", rawTerm)), occur);

                    summaryQuery.Add(new TermQuery(new Term("summary", rawTerm)), occur);

                    contentQuery.Add(new TermQuery(new Term("content", rawTerm)), occur);
                }

                BooleanQuery q = new BooleanQuery();

                titleQuery.SetBoost(8f);
                contentQuery.SetBoost(0.7f);
                
                q.Add(titleQuery, BooleanClause.Occur.SHOULD);
                q.Add(summaryQuery, BooleanClause.Occur.SHOULD);
                q.Add(contentQuery, BooleanClause.Occur.SHOULD);

                TopDocs docs = this._indexSearcher.Search(titleQuery, offset + count);

                if (docs.ScoreDocs.Length < offset)
                    throw new ArgumentOutOfRangeException("offset", "Offset is smaller than result count!");

                ret.HitCount = docs.TotalHits;

                ret.Results = new SearchResult[Math.Min(docs.ScoreDocs.Length - offset, count)];

                for (int i = 0; i < ret.Results.Length; i++)
                {
                    var scoreDoc = docs.ScoreDocs[offset + i];

                    Document doc = this._indexSearcher.Doc(scoreDoc.doc);

                    ret.Results[i] = new SearchResult
                                         {
                                             AssetId = AssetIdentifier.Parse(doc.GetField("aid").StringValue()),
                                             Title = doc.GetField("title").StringValue(),
                                             Url = new Uri(doc.GetField("uri").StringValue(), UriKind.RelativeOrAbsolute),
                                             Blurb = doc.GetField("summary").StringValue(),
                                         };
                }

                return ret;
            }
        }