/// <summary> /// Retrieve suggestions, specifying whether all terms /// must match (<paramref name="allTermsRequired"/>) and whether the hits /// should be highlighted (<paramref name="doHighlight"/>). /// </summary> public virtual IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, int num, bool allTermsRequired, bool doHighlight) { if (m_searcherMgr == null) { throw new InvalidOperationException("suggester was not built"); } Occur occur; if (allTermsRequired) { occur = Occur.MUST; } else { occur = Occur.SHOULD; } TokenStream ts = null; BooleanQuery query; var matchedTokens = new HashSet <string>(); string prefixToken = null; try { ts = m_queryAnalyzer.GetTokenStream("", new StringReader(key)); //long t0 = System.currentTimeMillis(); ts.Reset(); var termAtt = ts.AddAttribute <ICharTermAttribute>(); var offsetAtt = ts.AddAttribute <IOffsetAttribute>(); string lastToken = null; query = new BooleanQuery(); int maxEndOffset = -1; matchedTokens = new HashSet <string>(); while (ts.IncrementToken()) { if (lastToken != null) { matchedTokens.Add(lastToken); query.Add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur); } lastToken = termAtt.ToString(); if (lastToken != null) { maxEndOffset = Math.Max(maxEndOffset, offsetAtt.EndOffset); } } ts.End(); if (lastToken != null) { Query lastQuery; if (maxEndOffset == offsetAtt.EndOffset) { // Use PrefixQuery (or the ngram equivalent) when // there was no trailing discarded chars in the // string (e.g. whitespace), so that if query does // not end with a space we show prefix matches for // that token: lastQuery = GetLastTokenQuery(lastToken); prefixToken = lastToken; } else { // Use TermQuery for an exact match if there were // trailing discarded chars (e.g. whitespace), so // that if query ends with a space we only show // exact matches for that term: matchedTokens.Add(lastToken); lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)); } if (lastQuery != null) { query.Add(lastQuery, occur); } } if (contexts != null) { BooleanQuery sub = new BooleanQuery(); query.Add(sub, Occur.MUST); foreach (BytesRef context in contexts) { // NOTE: we "should" wrap this in // ConstantScoreQuery, or maybe send this as a // Filter instead to search, but since all of // these are MUST'd, the change to the score won't // affect the overall ranking. Since we indexed // as DOCS_ONLY, the perf should be the same // either way (no freq int[] blocks to decode): // TODO: if we had a BinaryTermField we could fix // this "must be valid ut8f" limitation: sub.Add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.Utf8ToString())), Occur.SHOULD); } } } finally { IOUtils.CloseWhileHandlingException(ts); } // TODO: we could allow blended sort here, combining // weight w/ score. Now we ignore score and sort only // by weight: Query finalQuery = FinishQuery(query, allTermsRequired); //System.out.println("finalQuery=" + query); // Sort by weight, descending: TopFieldCollector c = TopFieldCollector.Create(SORT, num, true, false, false, false); // We sorted postings by weight during indexing, so we // only retrieve the first num hits now: ICollector c2 = new EarlyTerminatingSortingCollector(c, SORT, num); IndexSearcher searcher = m_searcherMgr.Acquire(); IList <LookupResult> results = null; try { //System.out.println("got searcher=" + searcher); searcher.Search(finalQuery, c2); TopFieldDocs hits = (TopFieldDocs)c.GetTopDocs(); // Slower way if postings are not pre-sorted by weight: // hits = searcher.search(query, null, num, SORT); results = CreateResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken); } finally { m_searcherMgr.Release(searcher); } //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest"); //System.out.println(results); return(results); }
/// <summary> /// Retrieve suggestions, specifying whether all terms /// must match ({@code allTermsRequired}) and whether the hits /// should be highlighted ({@code doHighlight}). /// </summary> public virtual IList<LookupResult> Lookup(string key, HashSet<BytesRef> contexts, int num, bool allTermsRequired, bool doHighlight) { if (searcherMgr == null) { throw new InvalidOperationException("suggester was not built"); } BooleanClause.Occur occur; if (allTermsRequired) { occur = BooleanClause.Occur.MUST; } else { occur = BooleanClause.Occur.SHOULD; } TokenStream ts = null; BooleanQuery query; var matchedTokens = new HashSet<string>(); string prefixToken = null; try { ts = queryAnalyzer.TokenStream("", new StringReader(key)); //long t0 = System.currentTimeMillis(); ts.Reset(); var termAtt = ts.AddAttribute<CharTermAttribute>(); var offsetAtt = ts.AddAttribute<OffsetAttribute>(); string lastToken = null; query = new BooleanQuery(); int maxEndOffset = -1; matchedTokens = new HashSet<string>(); while (ts.IncrementToken()) { if (lastToken != null) { matchedTokens.Add(lastToken); query.Add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur); } lastToken = termAtt.ToString(); if (lastToken != null) { maxEndOffset = Math.Max(maxEndOffset, offsetAtt.EndOffset()); } } ts.End(); if (lastToken != null) { Query lastQuery; if (maxEndOffset == offsetAtt.EndOffset()) { // Use PrefixQuery (or the ngram equivalent) when // there was no trailing discarded chars in the // string (e.g. whitespace), so that if query does // not end with a space we show prefix matches for // that token: lastQuery = GetLastTokenQuery(lastToken); prefixToken = lastToken; } else { // Use TermQuery for an exact match if there were // trailing discarded chars (e.g. whitespace), so // that if query ends with a space we only show // exact matches for that term: matchedTokens.Add(lastToken); lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)); } if (lastQuery != null) { query.Add(lastQuery, occur); } } if (contexts != null) { BooleanQuery sub = new BooleanQuery(); query.Add(sub, BooleanClause.Occur.MUST); foreach (BytesRef context in contexts) { // NOTE: we "should" wrap this in // ConstantScoreQuery, or maybe send this as a // Filter instead to search, but since all of // these are MUST'd, the change to the score won't // affect the overall ranking. Since we indexed // as DOCS_ONLY, the perf should be the same // either way (no freq int[] blocks to decode): // TODO: if we had a BinaryTermField we could fix // this "must be valid ut8f" limitation: sub.Add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.Utf8ToString())), BooleanClause.Occur.SHOULD); } } } finally { IOUtils.CloseWhileHandlingException(ts); } // TODO: we could allow blended sort here, combining // weight w/ score. Now we ignore score and sort only // by weight: Query finalQuery = FinishQuery(query, allTermsRequired); //System.out.println("finalQuery=" + query); // Sort by weight, descending: TopFieldCollector c = TopFieldCollector.Create(SORT, num, true, false, false, false); // We sorted postings by weight during indexing, so we // only retrieve the first num hits now: Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num); IndexSearcher searcher = searcherMgr.Acquire(); IList<LookupResult> results = null; try { //System.out.println("got searcher=" + searcher); searcher.Search(finalQuery, c2); TopFieldDocs hits = (TopFieldDocs)c.TopDocs(); // Slower way if postings are not pre-sorted by weight: // hits = searcher.search(query, null, num, SORT); results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken); } finally { searcherMgr.Release(searcher); } //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest"); //System.out.println(results); return results; }