// This is a simplified query builder which works for single Terms and single Phrases // Returns null, TermQuery, or PhraseQuery public static Lucene.Net.Search.Query GetFieldQuery(Analyzer analyzer, string field, string queryText) { TokenStream stream = analyzer.TokenStream(field, new StringReader(queryText)); TokenFilter filter = new CachingTokenFilter(stream); filter.Reset(); // This attribute way of getting token properties isn't very good, but it's the non-obsolete one. var attr1 = filter.GetAttribute<ITermAttribute>(); Func<string> getText = () => attr1 != null ? attr1.Term : null; Func<int> getPositionIncrement; if (filter.HasAttribute<IPositionIncrementAttribute>()) { var attr = filter.GetAttribute<IPositionIncrementAttribute>(); getPositionIncrement = () => attr.PositionIncrement; } else { getPositionIncrement = () => 1; } // 0 tokens if (!filter.IncrementToken()) { return new BooleanQuery(); } // 1 token? string token1 = getText(); int position = 0; if (!filter.IncrementToken()) { return new TermQuery(new Term(field, token1)); } // many tokens - handle first token PhraseQuery ret = new PhraseQuery(); ret.Add(new Term(field, token1)); do { // handle rest of tokens string tokenNext = getText(); position += getPositionIncrement(); ret.Add(new Term(field, tokenNext), position); } while (filter.IncrementToken()); return ret; }
protected override IQueryNode PostProcessNode(IQueryNode node) { if (node is ITextableQueryNode && !(node is WildcardQueryNode) && !(node is FuzzyQueryNode) && !(node is RegexpQueryNode) && !(node.Parent is IRangeQueryNode)) { FieldQueryNode fieldNode = ((FieldQueryNode)node); string text = fieldNode.GetTextAsString(); string field = fieldNode.GetFieldAsString(); CachingTokenFilter buffer = null; IPositionIncrementAttribute posIncrAtt = null; int numTokens = 0; int positionCount = 0; bool severalTokensAtSamePosition = false; TokenStream source = null; try { source = this.analyzer.TokenStream(field, text); source.Reset(); buffer = new CachingTokenFilter(source); if (buffer.HasAttribute<IPositionIncrementAttribute>()) { posIncrAtt = buffer.GetAttribute<IPositionIncrementAttribute>(); } try { while (buffer.IncrementToken()) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt .PositionIncrement : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } } } #pragma warning disable 168 catch (IOException e) #pragma warning restore 168 { // ignore } } catch (IOException e) { throw new Exception(e.Message, e); } finally { IOUtils.CloseWhileHandlingException(source); } // rewind the buffer stream buffer.Reset(); if (!buffer.HasAttribute<ICharTermAttribute>()) { return new NoTokenFoundQueryNode(); } ICharTermAttribute termAtt = buffer.GetAttribute<ICharTermAttribute>(); if (numTokens == 0) { return new NoTokenFoundQueryNode(); } else if (numTokens == 1) { string term = null; try { bool hasNext; hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); term = termAtt.ToString(); } #pragma warning disable 168 catch (IOException e) #pragma warning restore 168 { // safe to ignore, because we know the number of tokens } fieldNode.Text = term.ToCharSequence(); return fieldNode; } else if (severalTokensAtSamePosition || !(node is QuotedFieldQueryNode)) { if (positionCount == 1 || !(node is QuotedFieldQueryNode)) { // no phrase query: if (positionCount == 1) { // simple case: only one position, with synonyms List<IQueryNode> children = new List<IQueryNode>(); for (int i = 0; i < numTokens; i++) { string term = null; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); term = termAtt.ToString(); } #pragma warning disable 168 catch (IOException e) #pragma warning restore 168 { // safe to ignore, because we know the number of tokens } children.Add(new FieldQueryNode(field, term, -1, -1)); } return new GroupQueryNode( new StandardBooleanQueryNode(children, positionCount == 1)); } else { // multiple positions IQueryNode q = new StandardBooleanQueryNode(new List<IQueryNode>(), false); IQueryNode currentQuery = null; for (int i = 0; i < numTokens; i++) { string term = null; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); term = termAtt.ToString(); } #pragma warning disable 168 catch (IOException e) #pragma warning restore 168 { // safe to ignore, because we know the number of tokens } if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0) { if (!(currentQuery is BooleanQueryNode)) { IQueryNode t = currentQuery; currentQuery = new StandardBooleanQueryNode(new List<IQueryNode>(), true); ((BooleanQueryNode)currentQuery).Add(t); } ((BooleanQueryNode)currentQuery).Add(new FieldQueryNode(field, term, -1, -1)); } else { if (currentQuery != null) { if (this.defaultOperator == Operator.OR) { q.Add(currentQuery); } else { q.Add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ)); } } currentQuery = new FieldQueryNode(field, term, -1, -1); } } if (this.defaultOperator == Operator.OR) { q.Add(currentQuery); } else { q.Add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ)); } if (q is BooleanQueryNode) { q = new GroupQueryNode(q); } return q; } } else { // phrase query: MultiPhraseQueryNode mpq = new MultiPhraseQueryNode(); List<FieldQueryNode> multiTerms = new List<FieldQueryNode>(); int position = -1; int i = 0; int termGroupCount = 0; for (; i < numTokens; i++) { string term = null; int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); term = termAtt.ToString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } #pragma warning disable 168 catch (IOException e) #pragma warning restore 168 { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.Count > 0) { foreach (FieldQueryNode termNode in multiTerms) { if (this.positionIncrementsEnabled) { termNode.PositionIncrement = position; } else { termNode.PositionIncrement = termGroupCount; } mpq.Add(termNode); } // Only increment once for each "group" of // terms that were in the same position: termGroupCount++; multiTerms.Clear(); } position += positionIncrement; multiTerms.Add(new FieldQueryNode(field, term, -1, -1)); } foreach (FieldQueryNode termNode in multiTerms) { if (this.positionIncrementsEnabled) { termNode.PositionIncrement = position; } else { termNode.PositionIncrement = termGroupCount; } mpq.Add(termNode); } return mpq; } } else { TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); int position = -1; for (int i = 0; i < numTokens; i++) { string term = null; int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); term = termAtt.ToString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } #pragma warning disable 168 catch (IOException e) #pragma warning restore 168 { // safe to ignore, because we know the number of tokens } FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); if (this.positionIncrementsEnabled) { position += positionIncrement; newFieldNode.PositionIncrement = position; } else { newFieldNode.PositionIncrement = i; } pq.Add(newFieldNode); } return pq; } } return node; }
/// <exception cref="ParseException">throw in overridden method to disallow /// </exception> protected internal virtual Query GetFieldQuery(String field, String queryText) { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count TokenStream source; try { source = analyzer.ReusableTokenStream(field, new StringReader(queryText)); source.Reset(); } catch (IOException) { source = analyzer.TokenStream(field, new StringReader(queryText)); } CachingTokenFilter buffer = new CachingTokenFilter(source); ITermAttribute termAtt = null; IPositionIncrementAttribute posIncrAtt = null; int numTokens = 0; bool success = false; try { buffer.Reset(); success = true; } catch (IOException) { // success==false if we hit an exception } if (success) { if (buffer.HasAttribute<ITermAttribute>()) { termAtt = buffer.GetAttribute<ITermAttribute>(); } if (buffer.HasAttribute<IPositionIncrementAttribute>()) { posIncrAtt = buffer.GetAttribute<IPositionIncrementAttribute>(); } } int positionCount = 0; bool severalTokensAtSamePosition = false; bool hasMoreTokens = false; if (termAtt != null) { try { hasMoreTokens = buffer.IncrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.IncrementToken(); } } catch (IOException) { // ignore } } try { // rewind the buffer stream buffer.Reset(); // close original stream - all tokens buffered source.Close(); } catch (IOException) { // ignore } if (numTokens == 0) return null; else if (numTokens == 1) { String term = null; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext); term = termAtt.Term; } catch (IOException) { // safe to ignore, because we know the number of tokens } return NewTermQuery(new Term(field, term)); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = NewBooleanQuery(true); for (int i = 0; i < numTokens; i++) { String term = null; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext); term = termAtt.Term; } catch (IOException) { // safe to ignore, because we know the number of tokens } Query currentQuery = NewTermQuery( new Term(field, term)); q.Add(currentQuery, Occur.SHOULD); } return q; } else { // phrase query: MultiPhraseQuery mpq = NewMultiPhraseQuery(); mpq.Slop = phraseSlop; List<Term> multiTerms = new List<Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); term = termAtt.Term; if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } catch (IOException) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.Count > 0) { if (enablePositionIncrements) { mpq.Add(multiTerms.ToArray(), position); } else { mpq.Add(multiTerms.ToArray()); } multiTerms.Clear(); } position += positionIncrement; multiTerms.Add(new Term(field, term)); } if (enablePositionIncrements) { mpq.Add(multiTerms.ToArray(), position); } else { mpq.Add(multiTerms.ToArray()); } return mpq; } } else { PhraseQuery pq = NewPhraseQuery(); pq.Slop = phraseSlop; int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); term = termAtt.Term; if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } catch (IOException) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.Add(new Term(field, term), position); } else { pq.Add(new Term(field, term)); } } return pq; } } }