public Result Search(string term, int count, int start) { try { term = term.ToLower(); Term htTerm = new Term("hottext", term); Query qq1 = new FuzzyQuery(htTerm); Query qq2 = new TermQuery(htTerm); qq2.SetBoost(10f); Query qq3 = new PrefixQuery(htTerm); qq3.SetBoost(10f); DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0f); q1.Add(qq1); q1.Add(qq2); q1.Add(qq3); Query q2 = new TermQuery(new Term("text", term)); q2.SetBoost(3f); Query q3 = new TermQuery(new Term("examples", term)); q3.SetBoost(3f); DisjunctionMaxQuery q = new DisjunctionMaxQuery(0f); q.Add(q1); q.Add(q2); q.Add(q3); TopDocs top = SearchInternal(q, count, start); Result r = new Result(term, searcher, top.ScoreDocs); Results.Add(r); return(r); } catch (IOException) { Console.WriteLine("No index in {0}", dir); return(null); } }
protected Query Tq(float boost, String field, String text) { Query query = new TermQuery(new Term(field, text)); query.SetBoost(boost); return(query); }
/// <summary> /// Adds a standard type clause to this instance /// </summary> /// <param name="term">Term to add to this query.</param> /// <param name="occurrence">Defines how the term is added to this query.</param> /// <param name="slop">The amount of allowed slop in a phrase query.</param> /// <remarks> /// Slop is the amount of movement each word is allowed in a non-exact phrase query. /// For instance if you search for "Adobe Systems Incorporated" and the slop is set to 0 then /// only results with that term is allowed. If you set the slop to 2 then two movements can be /// made, max, for each word. In the same example with slop set to 2 results would be returned /// for "Adobe Systems Incorporated", "Adobe Incorporated Systems", "Systems Adobe Incorporated", /// and "Systems Incorporated Adobe". /// </remarks> public void AddBooleanClause(SearchTerm term, ClauseOccurrence occurrence, int slop) { if (term == null) { throw new ArgumentNullException("term", "term cannot be null"); } IncrementTotalClauses(1); if (term.IsPhrase) { PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(term.GetLuceneTerm()); phraseQuery.SetSlop(slop); phraseQuery.SetBoost(term.Boost); this.luceneQuery.Add(phraseQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence)); phraseQuery = null; } else { TermQuery termQuery = new TermQuery(term.GetLuceneTerm()); termQuery.SetBoost(term.Boost); this.luceneQuery.Add(termQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence)); termQuery = null; } }
private static Query ParseQuery(SearchFilter searchFilter) { if (String.IsNullOrWhiteSpace(searchFilter.SearchTerm)) { return(new MatchAllDocsQuery()); } var fields = new[] { "Id", "Title", "Tags", "Description", "Author" }; var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields, analyzer); // All terms in the multi-term query appear in at least one of the fields. var conjuctionQuery = new BooleanQuery(); conjuctionQuery.SetBoost(2.0f); // Some terms in the multi-term query appear in at least one of the fields. var disjunctionQuery = new BooleanQuery(); disjunctionQuery.SetBoost(0.1f); // Suffix wildcard search e.g. jquer* var wildCardQuery = new BooleanQuery(); wildCardQuery.SetBoost(0.5f); // Escape the entire term we use for exact searches. var escapedSearchTerm = Escape(searchFilter.SearchTerm); var exactIdQuery = new TermQuery(new Term("Id-Exact", escapedSearchTerm)); exactIdQuery.SetBoost(2.5f); var wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + escapedSearchTerm + "*")); foreach (var term in GetSearchTerms(searchFilter.SearchTerm)) { var termQuery = queryParser.Parse(term); conjuctionQuery.Add(termQuery, BooleanClause.Occur.MUST); disjunctionQuery.Add(termQuery, BooleanClause.Occur.SHOULD); foreach (var field in fields) { var wildCardTermQuery = new WildcardQuery(new Term(field, term + "*")); wildCardTermQuery.SetBoost(0.7f); wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD); } } // Create an OR of all the queries that we have var combinedQuery = conjuctionQuery.Combine(new Query[] { exactIdQuery, wildCardIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }); if (searchFilter.SortProperty == SortProperty.Relevance) { // If searching by relevance, boost scores by download count. var downloadCountBooster = new FieldScoreQuery("DownloadCount", FieldScoreQuery.Type.INT); return(new CustomScoreQuery(combinedQuery, downloadCountBooster)); } return(combinedQuery); }
/// <summary> Perform synonym expansion on a query. /// /// </summary> /// <param name="">query /// </param> /// <param name="">syns /// </param> /// <param name="">a /// </param> /// <param name="">field /// </param> /// <param name="">boost /// </param> public static Query Expand(System.String query, Searcher syns, Analyzer a, System.String field, float boost) { System.Collections.Hashtable already = new System.Collections.Hashtable(); // avoid dups System.Collections.IList top = new System.Collections.ArrayList(); // needs to be separately listed.. // [1] Parse query into separate words so that when we expand we can avoid dups TokenStream ts = a.TokenStream(field, new System.IO.StringReader(query)); Lucene.Net.Analysis.Token t; while ((t = ts.Next()) != null) { System.String word = t.TermText(); if (already.Contains(word) == false) { already.Add(word, word); top.Add(word); } } BooleanQuery tmp = new BooleanQuery(); // [2] form query System.Collections.IEnumerator it = top.GetEnumerator(); while (it.MoveNext()) { // [2a] add to level words in System.String word = (System.String)it.Current; TermQuery tq = new TermQuery(new Term(field, word)); tmp.Add(tq, BooleanClause.Occur.SHOULD); // [2b] add in unique synonums Hits hits = syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word))); for (int i = 0; i < hits.Length(); i++) { Document doc = hits.Doc(i); System.String[] values = doc.GetValues(Syns2Index.F_SYN); for (int j = 0; j < values.Length; j++) { System.String syn = values[j]; if (already.Contains(syn) == false) { already.Add(syn, syn); tq = new TermQuery(new Term(field, syn)); if (boost > 0) { // else keep normal 1.0 tq.SetBoost(boost); } tmp.Add(tq, BooleanClause.Occur.SHOULD); } } } } return(tmp); }
public void SerializerTermQuery() { JsonSerializer jsonSerializer = new JsonSerializer(); TermQuery termQuery = new TermQuery("fieldname", "fieldvalue"); termQuery.SetBoost(2.0); string jsonString = jsonSerializer.Serialize(termQuery); Console.WriteLine(jsonString); Assert.AreEqual(jsonString, "{\"term\":{\"fieldname\":{\"term\":\"fieldvalue\",\"boost\":2.0}}}"); }
/// <summary> Create the More like query from a PriorityQueue</summary> private Query CreateQuery(PriorityQueue q) { BooleanQuery query = new BooleanQuery(); Object cur; int qterms = 0; float bestScore = 0; while (((cur = q.Pop()) != null)) { Object[] ar = (Object[])cur; TermQuery tq = new TermQuery(new Term((String)ar[1], (String)ar[0])); if (boost) { if (qterms == 0) { bestScore = (float)((Single)ar[2]); } float myScore = (float)((Single)ar[2]); tq.SetBoost(myScore / bestScore); } try { query.Add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses) { break; } qterms++; if (maxQueryTerms > 0 && qterms >= maxQueryTerms) { break; } } return(query); }
private static Query ParseQuery(string searchTerm) { var fields = new Dictionary <string, float> { { "Id", 1.2f }, { "Title", 1.0f }, { "Tags", 1.0f }, { "Description", 0.8f }, { "Author", 0.6f } }; var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); searchTerm = QueryParser.Escape(searchTerm).ToLowerInvariant(); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields.Keys.ToArray(), analyzer, fields); var conjuctionQuery = new BooleanQuery(); conjuctionQuery.SetBoost(1.5f); var disjunctionQuery = new BooleanQuery(); var wildCardQuery = new BooleanQuery(); wildCardQuery.SetBoost(0.7f); var exactIdQuery = new TermQuery(new Term("Id-Exact", searchTerm)); exactIdQuery.SetBoost(2.5f); foreach (var term in searchTerm.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)) { conjuctionQuery.Add(queryParser.Parse(term), BooleanClause.Occur.MUST); disjunctionQuery.Add(queryParser.Parse(term), BooleanClause.Occur.SHOULD); foreach (var field in fields) { var wildCardTermQuery = new WildcardQuery(new Term(field.Key, term + "*")); wildCardTermQuery.SetBoost(0.7f * field.Value); wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD); } } return(conjuctionQuery.Combine(new Query[] { exactIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery })); }
/// <summary> /// Adds a standard type clause to this instance /// </summary> /// <param name="term">Term to add to this query.</param> /// <param name="occurrence">Defines how the term is added to this query.</param> /// <param name="slop">The amount of allowed slop in a phrase query.</param> /// <remarks> /// Slop is the amount of movement each word is allowed in a non-exact phrase query. /// For instance if you search for "Adobe Systems Incorporated" and the slop is set to 0 then /// only results with that term is allowed. If you set the slop to 2 then two movements can be /// made, max, for each word. In the same example with slop set to 2 results would be returned /// for "Adobe Systems Incorporated", "Adobe Incorporated Systems", "Systems Adobe Incorporated", /// and "Systems Incorporated Adobe". /// </remarks> public void AddBooleanClause(SearchTerm term, ClauseOccurrence occurrence, int slop) { if (term == null) throw new ArgumentNullException("term", "term cannot be null"); IncrementTotalClauses(1); if (term.IsPhrase) { PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(term.GetLuceneTerm()); phraseQuery.SetSlop(slop); phraseQuery.SetBoost(term.Boost); this.luceneQuery.Add(phraseQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence)); phraseQuery = null; } else { TermQuery termQuery = new TermQuery(term.GetLuceneTerm()); termQuery.SetBoost(term.Boost); this.luceneQuery.Add(termQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence)); termQuery = null; } }
public virtual PagedList <Models.ResultObject> Search(string key, int pageIndex, int pageSize, params string[] folders) { var indexDirectory = FSDirectory.Open(new DirectoryInfo(indexDir)); if (!IndexReader.IndexExists(indexDirectory) || string.IsNullOrEmpty(key) && (folders == null || folders.Length == 0)) { return(new PagedList <ResultObject>(new ResultObject[0], pageIndex, pageSize, 0)); } var query = new BooleanQuery(); key = QueryParser.Escape(key.Trim().ToLower()); if (string.IsNullOrEmpty(key)) { key = "*:*"; } QueryParser titleParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, Converter.TitleFieldName, this.Analyzer); var titleQuery = titleParser.Parse(key); titleQuery.SetBoost(2); query.Add(titleQuery, BooleanClause.Occur.SHOULD); QueryParser bodyParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, Converter.BodyFieldName, this.Analyzer); var bodyQuery = bodyParser.Parse(key); bodyQuery.SetBoost(1); query.Add(bodyQuery, BooleanClause.Occur.SHOULD); QueryWrapperFilter filter = null; if (folders != null && folders.Length > 0) { var folderQuery = new BooleanQuery(); //QueryParser folderParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "FolderName", this.Analyzer); foreach (var folder in folders) { var termQuery = new TermQuery(new Term("FolderName", folder)); termQuery.SetBoost(3); folderQuery.Add(termQuery, BooleanClause.Occur.SHOULD); } filter = new QueryWrapperFilter(folderQuery); } var searcher = new IndexSearcher(indexDirectory, true); TopDocsCollector collecltor = TopScoreDocCollector.create(searcher.MaxDoc(), false); if (filter == null) { searcher.Search(query, collecltor); } else { searcher.Search(query, filter, collecltor); } Lucene.Net.Highlight.Highlighter lighter = new Highlighter(new SimpleHTMLFormatter("<strong class='highlight'>", "</strong>"), new Lucene.Net.Highlight.QueryScorer((Query)query)); var startIndex = (pageIndex - 1) * pageSize; List <ResultObject> results = new List <ResultObject>(); foreach (var doc in collecltor.TopDocs(startIndex, pageSize).ScoreDocs) { var document = searcher.Doc(doc.doc); ResultObject result = Converter.ToResultObject(lighter, document); if (result != null) { results.Add(result); } } return(new PagedList <ResultObject>(results, pageIndex, pageSize, collecltor.GetTotalHits())); }
private static Query BuildGeneralQuery( bool doExactId, string originalSearchText, Analyzer analyzer, IEnumerable <NuGetSearchTerm> generalTerms, IEnumerable <Query> generalQueries) { // All terms in the multi-term query appear in at least one of the target fields. var conjuctionQuery = new BooleanQuery(); conjuctionQuery.SetBoost(2.0f); // Some terms in the multi-term query appear in at least one of the target fields. var disjunctionQuery = new BooleanQuery(); disjunctionQuery.SetBoost(0.1f); // Suffix wildcard search e.g. jquer* var wildCardQuery = new BooleanQuery(); wildCardQuery.SetBoost(0.5f); string escapedExactId = originalSearchText.ToLowerInvariant(); Query exactIdQuery = null; Query wildCardIdQuery = null; if (doExactId) { exactIdQuery = new TermQuery(new Term("Id-Exact", escapedExactId)); exactIdQuery.SetBoost(7.5f); wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + escapedExactId + "*")); } Query nearlyExactIdQuery = null; if (generalTerms.Any()) { string escapedApproximateId = string.Join(" ", generalTerms.Select(c => c.TermOrPhrase)); nearlyExactIdQuery = AnalysisHelper.GetFieldQuery(analyzer, "Id", escapedApproximateId); nearlyExactIdQuery.SetBoost(2.0f); } foreach (var termQuery in generalQueries) { conjuctionQuery.Add(termQuery, BooleanClause.Occur.MUST); disjunctionQuery.Add(termQuery, BooleanClause.Occur.SHOULD); } var sanitizedTerms = generalTerms.Select(c => c.TermOrPhrase.ToLowerInvariant()); foreach (var sanitizedTerm in sanitizedTerms) { foreach (var field in Fields) { var wildCardTermQuery = new WildcardQuery(new Term(field, sanitizedTerm + "*")); wildCardTermQuery.SetBoost(0.7f); wildCardQuery.Add(wildCardTermQuery, BooleanClause.Occur.SHOULD); } } // OR of all the applicable queries var queries = new Query[] { exactIdQuery, wildCardIdQuery, nearlyExactIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }; var queriesToCombine = queries.Where(q => !IsDegenerateQuery(q)); var query = conjuctionQuery.Combine(queriesToCombine.ToArray()); return(query); }