/// <summary> Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query /// 2. extract necessary terms /// 3. collect dfs for these terms from the Searchables /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables /// 6. merge results /// /// Steps 1-4 are done here, 5+6 in the search() methods /// /// </summary> /// <returns> rewritten queries /// </returns> public /*protected internal*/ override Weight CreateWeight(Query original, IState state) { // step 1 Query rewrittenQuery = Rewrite(original, state); // step 2 ISet <Term> terms = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet <Term>(); rewrittenQuery.ExtractTerms(terms); // step3 Term[] allTermsArray = terms.ToArray(); int[] aggregatedDfs = new int[terms.Count]; for (int i = 0; i < searchables.Length; i++) { int[] dfs = searchables[i].DocFreqs(allTermsArray, state); for (int j = 0; j < aggregatedDfs.Length; j++) { aggregatedDfs[j] += dfs[j]; } } var dfMap = new Dictionary <Term, int>(); for (int i = 0; i < allTermsArray.Length; i++) { dfMap[allTermsArray[i]] = aggregatedDfs[i]; } // step4 int numDocs = MaxDoc; CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, Similarity); return(rewrittenQuery.Weight(cacheSim, state)); }
public override void ExtractTerms(ISet <Term> terms) { // TODO: OK to not add any terms when wrapped a filter // and used with MultiSearcher, but may not be OK for // highlighting. // If a query was wrapped, we delegate to query. if (m_query != null) { m_query.ExtractTerms(terms); } }
public void TestNoMatchFirstWordBug() { FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer); flt.AddTerms("fernando smith", "name", 0.3f, 1); Query q = flt.Rewrite(searcher.GetIndexReader()); Hashtable queryTerms = new Hashtable(); q.ExtractTerms(queryTerms); Assert.IsTrue(queryTerms.Contains(new Term("name", "smith")), "Should have variant smith"); TopDocs topDocs = searcher.Search(flt, 1); ScoreDoc[] sd = topDocs.scoreDocs; Assert.IsTrue((sd != null) && (sd.Length > 0), "score docs must match 1 doc"); Document doc = searcher.Doc(sd[0].doc); Assert.AreEqual("2", doc.Get("id"), "Should match most similar when using 2 words"); }
public void TestMultiWord() { FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer); flt.AddTerms("jonathin smoth", "name", 0.3f, 1); Query q = flt.Rewrite(searcher.IndexReader); ISet <Term> queryTerms = Support.Compatibility.SetFactory.CreateHashSet <Term>(); q.ExtractTerms(queryTerms); Assert.IsTrue(queryTerms.Contains(new Term("name", "jonathan")), "Should have variant jonathan"); Assert.IsTrue(queryTerms.Contains(new Term("name", "smith")), "Should have variant smith"); TopDocs topDocs = searcher.Search(flt, 1); ScoreDoc[] sd = topDocs.ScoreDocs; Assert.IsTrue((sd != null) && (sd.Length > 0), "score docs must match 1 doc"); Document doc = searcher.Doc(sd[0].Doc); Assert.AreEqual("2", doc.Get("id"), "Should match most similar when using 2 words"); }
/// <summary> Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query /// 2. extract necessary terms /// 3. collect dfs for these terms from the Searchables /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables /// 6. merge results /// /// Steps 1-4 are done here, 5+6 in the search() methods /// /// </summary> /// <returns> rewritten queries /// </returns> public /*protected internal*/ override Weight CreateWeight(Query original) { // step 1 Query rewrittenQuery = Rewrite(original); // step 2 Support.Set <Lucene.Net.Index.Term> terms = new Support.Set <Term>(); rewrittenQuery.ExtractTerms(terms); // step3 Term[] allTermsArray = new Term[terms.Count]; int index = 0; foreach (Term t in terms) { allTermsArray[index++] = t; } int[] aggregatedDfs = new int[terms.Count]; for (int i = 0; i < searchables.Length; i++) { int[] dfs = searchables[i].DocFreqs(allTermsArray); for (int j = 0; j < aggregatedDfs.Length; j++) { aggregatedDfs[j] += dfs[j]; } } IDictionary <Term, int> dfMap = new Support.Dictionary <Term, int>(); for (int i = 0; i < allTermsArray.Length; i++) { dfMap[allTermsArray[i]] = aggregatedDfs[i]; } // step4 int numDocs = MaxDoc(); CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity()); return(rewrittenQuery.Weight(cacheSim)); }
/// <summary> Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query /// 2. extract necessary terms /// 3. collect dfs for these terms from the Searchables /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables /// 6. merge results /// /// Steps 1-4 are done here, 5+6 in the search() methods /// /// </summary> /// <returns> rewritten queries /// </returns> protected internal override Weight CreateWeight(Query original) { // step 1 Query rewrittenQuery = Rewrite(original); // step 2 System.Collections.Hashtable terms = new System.Collections.Hashtable(); rewrittenQuery.ExtractTerms(terms); // step3 Term[] allTermsArray = new Term[terms.Count]; int index = 0; System.Collections.IEnumerator e = terms.Keys.GetEnumerator(); while (e.MoveNext()) { allTermsArray[index++] = e.Current as Term; } int[] aggregatedDfs = new int[terms.Count]; for (int i = 0; i < searchables.Length; i++) { int[] dfs = searchables[i].DocFreqs(allTermsArray); for (int j = 0; j < aggregatedDfs.Length; j++) { aggregatedDfs[j] += dfs[j]; } } System.Collections.Hashtable dfMap = new System.Collections.Hashtable(); for (int i = 0; i < allTermsArray.Length; i++) { dfMap[allTermsArray[i]] = (System.Int32)aggregatedDfs[i]; } // step4 int numDocs = MaxDoc(); CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs); return(rewrittenQuery.Weight(cacheSim)); }
public override Query Rewrite(Query original) { Query rewritten = base.Rewrite(original); ISet <Term> terms = new JCG.HashSet <Term>(); rewritten.ExtractTerms(terms); // Make a single request to remote nodes for term // stats: for (int nodeID = 0; nodeID < nodeVersions.Length; nodeID++) { if (nodeID == MyNodeID) { continue; } ISet <Term> missing = new JCG.HashSet <Term>(); foreach (Term term in terms) { TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], term); if (!outerInstance.termStatsCache.ContainsKey(key)) { missing.Add(term); } } if (missing.Count != 0) { foreach (KeyValuePair <Term, TermStatistics> ent in outerInstance.outerInstance.GetNodeTermStats(missing, nodeID, nodeVersions[nodeID])) { TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], ent.Key); outerInstance.termStatsCache[key] = ent.Value; } } } return(rewritten); }
public void flatten(Query sourceQuery, Dictionary<Query, Query> flatQueries) { if (sourceQuery is BooleanQuery) { BooleanQuery bq = (BooleanQuery)sourceQuery; foreach (BooleanClause clause in bq.GetClauses()) { if (!clause.IsProhibited) flatten(clause.Query, flatQueries); } } else if (sourceQuery is PrefixQuery) { if (!flatQueries.ContainsKey(sourceQuery)) flatQueries.Add(sourceQuery, sourceQuery); } else if (sourceQuery is DisjunctionMaxQuery) { DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery; foreach (Query query in dmq) { flatten(query, flatQueries); } } else if (sourceQuery is TermQuery) { if (!flatQueries.ContainsKey(sourceQuery)) flatQueries.Add(sourceQuery, sourceQuery); } else if (sourceQuery is PhraseQuery) { if (!flatQueries.ContainsKey(sourceQuery)) { PhraseQuery pq = (PhraseQuery)sourceQuery; if (pq.GetTerms().Length > 1) flatQueries.Add(pq, pq); else if (pq.GetTerms().Length == 1) { Query q = new TermQuery(pq.GetTerms()[0]); flatQueries.Add(q, q); } } } else { // Fallback to using extracted terms ISet<Term> terms = SetFactory.CreateHashSet<Term>(); try { sourceQuery.ExtractTerms(terms); } catch (NotSupportedException) { // thrown by default impl return; // ignore error and discard query } foreach (var term in terms) { flatten(new TermQuery(term), flatQueries); } } }
public override void ExtractTerms(ISet <Term> terms) { @in.ExtractTerms(terms); }
// inherit javadoc public override void ExtractTerms(System.Collections.Generic.ISet <Term> terms) { Query.ExtractTerms(terms); }
private void DoSearch(Query query, IEnumerable<SortField> sortField, int maxResults) { //This try catch is because analyzers strip out stop words and sometimes leave the query //with null values. This simply tries to extract terms, if it fails with a null //reference then its an invalid null query, NotSupporteException occurs when the query is //valid but the type of query can't extract terms. //This IS a work-around, theoretically Lucene itself should check for null query parameters //before throwing exceptions. try { var set = new Hashtable(); query.ExtractTerms(set); } catch (NullReferenceException) { //this means that an analyzer has stipped out stop words and now there are //no words left to search on TotalItemCount = 0; return; } catch (NotSupportedException) { //swallow this exception, we should continue if this occurs. } maxResults = maxResults > 1 ? maxResults : LuceneSearcher.MaxDoc(); if (sortField.Count() == 0) { var topDocs = LuceneSearcher.Search(query, null, maxResults, new Sort()); _collector = new AllHitsCollector(topDocs.scoreDocs); topDocs = null; } else { var topDocs = LuceneSearcher.Search(query, null, maxResults, new Sort(sortField.ToArray())); _collector = new AllHitsCollector(topDocs.scoreDocs); topDocs = null; } TotalItemCount = _collector.Count; }
// inherit javadoc public override void ExtractTerms(ISet <Term> terms) { Query.ExtractTerms(terms); }