Beispiel #1
0
        public Result Search(string term, int count, int start)
        {
            try {
                term = term.ToLower();
                Term  htTerm = new Term("hottext", term);
                Query qq1    = new FuzzyQuery(htTerm);
                Query qq2    = new TermQuery(htTerm);
                qq2.SetBoost(10f);
                Query qq3 = new PrefixQuery(htTerm);
                qq3.SetBoost(10f);
                DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0f);
                q1.Add(qq1);
                q1.Add(qq2);
                q1.Add(qq3);
                Query q2 = new TermQuery(new Term("text", term));
                q2.SetBoost(3f);
                Query q3 = new TermQuery(new Term("examples", term));
                q3.SetBoost(3f);
                DisjunctionMaxQuery q = new DisjunctionMaxQuery(0f);

                q.Add(q1);
                q.Add(q2);
                q.Add(q3);

                TopDocs top = SearchInternal(q, count, start);
                Result  r   = new Result(term, searcher, top.ScoreDocs);
                Results.Add(r);
                return(r);
            } catch (IOException) {
                Console.WriteLine("No index in {0}", dir);
                return(null);
            }
        }
        protected Query Tq(float boost, String field, String text)
        {
            Query query = new TermQuery(new Term(field, text));

            query.SetBoost(boost);
            return(query);
        }
Beispiel #3
0
        /// <summary>
        /// Adds a standard type clause to this instance
        /// </summary>
        /// <param name="term">Term to add to this query.</param>
        /// <param name="occurrence">Defines how the term is added to this query.</param>
        /// <param name="slop">The amount of allowed slop in a phrase query.</param>
        /// <remarks>
        /// Slop is the amount of movement each word is allowed in a non-exact phrase query.
        /// For instance if you search for "Adobe Systems Incorporated" and the slop is set to 0 then
        /// only results with that term is allowed. If you set the slop to 2 then two movements can be
        /// made, max, for each word. In the same example with slop set to 2 results would be returned
        /// for "Adobe Systems Incorporated", "Adobe Incorporated Systems", "Systems Adobe Incorporated",
        /// and "Systems Incorporated Adobe".
        /// </remarks>
        public void AddBooleanClause(SearchTerm term, ClauseOccurrence occurrence, int slop)
        {
            if (term == null)
            {
                throw new ArgumentNullException("term", "term cannot be null");
            }
            IncrementTotalClauses(1);

            if (term.IsPhrase)
            {
                PhraseQuery phraseQuery = new PhraseQuery();
                phraseQuery.Add(term.GetLuceneTerm());
                phraseQuery.SetSlop(slop);
                phraseQuery.SetBoost(term.Boost);
                this.luceneQuery.Add(phraseQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence));
                phraseQuery = null;
            }
            else
            {
                TermQuery termQuery = new TermQuery(term.GetLuceneTerm());
                termQuery.SetBoost(term.Boost);
                this.luceneQuery.Add(termQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence));
                termQuery = null;
            }
        }
Beispiel #4
0
        private static Query ParseQuery(SearchFilter searchFilter)
        {
            if (String.IsNullOrWhiteSpace(searchFilter.SearchTerm))
            {
                return(new MatchAllDocsQuery());
            }

            var fields      = new[] { "Id", "Title", "Tags", "Description", "Author" };
            var analyzer    = new StandardAnalyzer(LuceneCommon.LuceneVersion);
            var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields, analyzer);

            // All terms in the multi-term query appear in at least one of the fields.
            var conjuctionQuery = new BooleanQuery();

            conjuctionQuery.SetBoost(2.0f);

            // Some terms in the multi-term query appear in at least one of the fields.
            var disjunctionQuery = new BooleanQuery();

            disjunctionQuery.SetBoost(0.1f);

            // Suffix wildcard search e.g. jquer*
            var wildCardQuery = new BooleanQuery();

            wildCardQuery.SetBoost(0.5f);

            // Escape the entire term we use for exact searches.
            var escapedSearchTerm = Escape(searchFilter.SearchTerm);
            var exactIdQuery      = new TermQuery(new Term("Id-Exact", escapedSearchTerm));

            exactIdQuery.SetBoost(2.5f);
            var wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + escapedSearchTerm + "*"));

            foreach (var term in GetSearchTerms(searchFilter.SearchTerm))
            {
                var termQuery = queryParser.Parse(term);
                conjuctionQuery.Add(termQuery, BooleanClause.Occur.MUST);
                disjunctionQuery.Add(termQuery, BooleanClause.Occur.SHOULD);

                foreach (var field in fields)
                {
                    var wildCardTermQuery = new WildcardQuery(new Term(field, term + "*"));
                    wildCardTermQuery.SetBoost(0.7f);
                    wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD);
                }
            }

            // Create an OR of all the queries that we have
            var combinedQuery =
                conjuctionQuery.Combine(new Query[] { exactIdQuery, wildCardIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery });

            if (searchFilter.SortProperty == SortProperty.Relevance)
            {
                // If searching by relevance, boost scores by download count.
                var downloadCountBooster = new FieldScoreQuery("DownloadCount", FieldScoreQuery.Type.INT);
                return(new CustomScoreQuery(combinedQuery, downloadCountBooster));
            }
            return(combinedQuery);
        }
Beispiel #5
0
        /// <summary> Perform synonym expansion on a query.
        ///
        /// </summary>
        /// <param name="">query
        /// </param>
        /// <param name="">syns
        /// </param>
        /// <param name="">a
        /// </param>
        /// <param name="">field
        /// </param>
        /// <param name="">boost
        /// </param>
        public static Query Expand(System.String query, Searcher syns, Analyzer a, System.String field, float boost)
        {
            System.Collections.Hashtable already = new System.Collections.Hashtable();     // avoid dups
            System.Collections.IList     top     = new System.Collections.ArrayList();     // needs to be separately listed..

            // [1] Parse query into separate words so that when we expand we can avoid dups
            TokenStream ts = a.TokenStream(field, new System.IO.StringReader(query));

            Lucene.Net.Analysis.Token t;
            while ((t = ts.Next()) != null)
            {
                System.String word = t.TermText();
                if (already.Contains(word) == false)
                {
                    already.Add(word, word);
                    top.Add(word);
                }
            }
            BooleanQuery tmp = new BooleanQuery();

            // [2] form query
            System.Collections.IEnumerator it = top.GetEnumerator();
            while (it.MoveNext())
            {
                // [2a] add to level words in
                System.String word = (System.String)it.Current;
                TermQuery     tq   = new TermQuery(new Term(field, word));
                tmp.Add(tq, BooleanClause.Occur.SHOULD);

                // [2b] add in unique synonums
                Hits hits = syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)));
                for (int i = 0; i < hits.Length(); i++)
                {
                    Document        doc    = hits.Doc(i);
                    System.String[] values = doc.GetValues(Syns2Index.F_SYN);
                    for (int j = 0; j < values.Length; j++)
                    {
                        System.String syn = values[j];
                        if (already.Contains(syn) == false)
                        {
                            already.Add(syn, syn);
                            tq = new TermQuery(new Term(field, syn));
                            if (boost > 0)
                            {
                                // else keep normal 1.0
                                tq.SetBoost(boost);
                            }
                            tmp.Add(tq, BooleanClause.Occur.SHOULD);
                        }
                    }
                }
            }


            return(tmp);
        }
Beispiel #6
0
        public void SerializerTermQuery()
        {
            JsonSerializer jsonSerializer = new JsonSerializer();
            TermQuery      termQuery      = new TermQuery("fieldname", "fieldvalue");

            termQuery.SetBoost(2.0);
            string jsonString = jsonSerializer.Serialize(termQuery);

            Console.WriteLine(jsonString);
            Assert.AreEqual(jsonString, "{\"term\":{\"fieldname\":{\"term\":\"fieldvalue\",\"boost\":2.0}}}");
        }
        /// <summary> Create the More like query from a PriorityQueue</summary>
        private Query CreateQuery(PriorityQueue q)
        {
            BooleanQuery query = new BooleanQuery();
            Object       cur;
            int          qterms    = 0;
            float        bestScore = 0;

            while (((cur = q.Pop()) != null))
            {
                Object[]  ar = (Object[])cur;
                TermQuery tq = new TermQuery(new Term((String)ar[1], (String)ar[0]));

                if (boost)
                {
                    if (qterms == 0)
                    {
                        bestScore = (float)((Single)ar[2]);
                    }
                    float myScore = (float)((Single)ar[2]);

                    tq.SetBoost(myScore / bestScore);
                }

                try
                {
                    query.Add(tq, BooleanClause.Occur.SHOULD);
                }
                catch (BooleanQuery.TooManyClauses)
                {
                    break;
                }

                qterms++;
                if (maxQueryTerms > 0 && qterms >= maxQueryTerms)
                {
                    break;
                }
            }

            return(query);
        }
        private static Query ParseQuery(string searchTerm)
        {
            var fields = new Dictionary <string, float> {
                { "Id", 1.2f }, { "Title", 1.0f }, { "Tags", 1.0f }, { "Description", 0.8f }, { "Author", 0.6f }
            };
            var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion);

            searchTerm = QueryParser.Escape(searchTerm).ToLowerInvariant();

            var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields.Keys.ToArray(), analyzer, fields);

            var conjuctionQuery = new BooleanQuery();

            conjuctionQuery.SetBoost(1.5f);
            var disjunctionQuery = new BooleanQuery();
            var wildCardQuery    = new BooleanQuery();

            wildCardQuery.SetBoost(0.7f);
            var exactIdQuery = new TermQuery(new Term("Id-Exact", searchTerm));

            exactIdQuery.SetBoost(2.5f);

            foreach (var term in searchTerm.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))
            {
                conjuctionQuery.Add(queryParser.Parse(term), BooleanClause.Occur.MUST);
                disjunctionQuery.Add(queryParser.Parse(term), BooleanClause.Occur.SHOULD);

                foreach (var field in fields)
                {
                    var wildCardTermQuery = new WildcardQuery(new Term(field.Key, term + "*"));
                    wildCardTermQuery.SetBoost(0.7f * field.Value);
                    wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD);
                }
            }

            return(conjuctionQuery.Combine(new Query[] { exactIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }));
        }
        /// <summary>
        /// Adds a standard type clause to this instance
        /// </summary>
        /// <param name="term">Term to add to this query.</param>
        /// <param name="occurrence">Defines how the term is added to this query.</param>
        /// <param name="slop">The amount of allowed slop in a phrase query.</param>
        /// <remarks>
        /// Slop is the amount of movement each word is allowed in a non-exact phrase query.
        /// For instance if you search for "Adobe Systems Incorporated" and the slop is set to 0 then
        /// only results with that term is allowed. If you set the slop to 2 then two movements can be
        /// made, max, for each word. In the same example with slop set to 2 results would be returned 
        /// for "Adobe Systems Incorporated", "Adobe Incorporated Systems", "Systems Adobe Incorporated",
        /// and "Systems Incorporated Adobe". 
        /// </remarks>
        public void AddBooleanClause(SearchTerm term, ClauseOccurrence occurrence, int slop)
        {
            if (term == null)
                throw new ArgumentNullException("term", "term cannot be null");
            IncrementTotalClauses(1);

            if (term.IsPhrase) {
                PhraseQuery phraseQuery = new PhraseQuery();
                phraseQuery.Add(term.GetLuceneTerm());
                phraseQuery.SetSlop(slop);
                phraseQuery.SetBoost(term.Boost);
                this.luceneQuery.Add(phraseQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence));
                phraseQuery = null;
            }
            else {
                TermQuery termQuery = new TermQuery(term.GetLuceneTerm());
                termQuery.SetBoost(term.Boost);
                this.luceneQuery.Add(termQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence));
                termQuery = null;
            }
        }
Beispiel #10
0
        public virtual PagedList <Models.ResultObject> Search(string key, int pageIndex, int pageSize, params string[] folders)
        {
            var indexDirectory = FSDirectory.Open(new DirectoryInfo(indexDir));

            if (!IndexReader.IndexExists(indexDirectory) || string.IsNullOrEmpty(key) && (folders == null || folders.Length == 0))
            {
                return(new PagedList <ResultObject>(new ResultObject[0], pageIndex, pageSize, 0));
            }

            var query = new BooleanQuery();

            key = QueryParser.Escape(key.Trim().ToLower());

            if (string.IsNullOrEmpty(key))
            {
                key = "*:*";
            }

            QueryParser titleParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, Converter.TitleFieldName, this.Analyzer);
            var         titleQuery  = titleParser.Parse(key);

            titleQuery.SetBoost(2);
            query.Add(titleQuery, BooleanClause.Occur.SHOULD);

            QueryParser bodyParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, Converter.BodyFieldName, this.Analyzer);
            var         bodyQuery  = bodyParser.Parse(key);

            bodyQuery.SetBoost(1);
            query.Add(bodyQuery, BooleanClause.Occur.SHOULD);

            QueryWrapperFilter filter = null;

            if (folders != null && folders.Length > 0)
            {
                var folderQuery = new BooleanQuery();
                //QueryParser folderParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "FolderName", this.Analyzer);
                foreach (var folder in folders)
                {
                    var termQuery = new TermQuery(new Term("FolderName", folder));
                    termQuery.SetBoost(3);
                    folderQuery.Add(termQuery, BooleanClause.Occur.SHOULD);
                }

                filter = new QueryWrapperFilter(folderQuery);
            }

            var searcher = new IndexSearcher(indexDirectory, true);
            TopDocsCollector collecltor = TopScoreDocCollector.create(searcher.MaxDoc(), false);

            if (filter == null)
            {
                searcher.Search(query, collecltor);
            }
            else
            {
                searcher.Search(query, filter, collecltor);
            }


            Lucene.Net.Highlight.Highlighter lighter =
                new Highlighter(new SimpleHTMLFormatter("<strong class='highlight'>", "</strong>"), new Lucene.Net.Highlight.QueryScorer((Query)query));


            var startIndex = (pageIndex - 1) * pageSize;

            List <ResultObject> results = new List <ResultObject>();

            foreach (var doc in collecltor.TopDocs(startIndex, pageSize).ScoreDocs)
            {
                var          document = searcher.Doc(doc.doc);
                ResultObject result   = Converter.ToResultObject(lighter, document);
                if (result != null)
                {
                    results.Add(result);
                }
            }

            return(new PagedList <ResultObject>(results, pageIndex, pageSize, collecltor.GetTotalHits()));
        }
        private static Query BuildGeneralQuery(
            bool doExactId,
            string originalSearchText,
            Analyzer analyzer,
            IEnumerable <NuGetSearchTerm> generalTerms,
            IEnumerable <Query> generalQueries)
        {
            // All terms in the multi-term query appear in at least one of the target fields.
            var conjuctionQuery = new BooleanQuery();

            conjuctionQuery.SetBoost(2.0f);

            // Some terms in the multi-term query appear in at least one of the target fields.
            var disjunctionQuery = new BooleanQuery();

            disjunctionQuery.SetBoost(0.1f);

            // Suffix wildcard search e.g. jquer*
            var wildCardQuery = new BooleanQuery();

            wildCardQuery.SetBoost(0.5f);

            string escapedExactId = originalSearchText.ToLowerInvariant();

            Query exactIdQuery    = null;
            Query wildCardIdQuery = null;

            if (doExactId)
            {
                exactIdQuery = new TermQuery(new Term("Id-Exact", escapedExactId));
                exactIdQuery.SetBoost(7.5f);

                wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + escapedExactId + "*"));
            }

            Query nearlyExactIdQuery = null;

            if (generalTerms.Any())
            {
                string escapedApproximateId = string.Join(" ", generalTerms.Select(c => c.TermOrPhrase));
                nearlyExactIdQuery = AnalysisHelper.GetFieldQuery(analyzer, "Id", escapedApproximateId);
                nearlyExactIdQuery.SetBoost(2.0f);
            }

            foreach (var termQuery in generalQueries)
            {
                conjuctionQuery.Add(termQuery, BooleanClause.Occur.MUST);
                disjunctionQuery.Add(termQuery, BooleanClause.Occur.SHOULD);
            }

            var sanitizedTerms = generalTerms.Select(c => c.TermOrPhrase.ToLowerInvariant());

            foreach (var sanitizedTerm in sanitizedTerms)
            {
                foreach (var field in Fields)
                {
                    var wildCardTermQuery = new WildcardQuery(new Term(field, sanitizedTerm + "*"));
                    wildCardTermQuery.SetBoost(0.7f);
                    wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD);
                }
            }

            // OR of all the applicable queries
            var queries = new Query[]
            {
                exactIdQuery, wildCardIdQuery, nearlyExactIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery
            };

            var queriesToCombine = queries.Where(q => !IsDegenerateQuery(q));
            var query            = conjuctionQuery.Combine(queriesToCombine.ToArray());

            return(query);
        }