public static Query BuildQuery(string query, IndexQuery indexQuery, RavenPerFieldAnalyzerWrapper analyzer) { var originalQuery = query; try { var queryParser = new RangeQueryParser(Version.LUCENE_29, indexQuery.DefaultField ?? string.Empty, analyzer) { DefaultOperator = indexQuery.DefaultOperator == QueryOperator.Or ? QueryParser.Operator.OR : QueryParser.Operator.AND, AllowLeadingWildcard = true }; query = PreProcessComments(query); query = PreProcessMixedInclusiveExclusiveRangeQueries(query); query = PreProcessUntokenizedTerms(query, queryParser); query = PreProcessSearchTerms(query); query = PreProcessDateTerms(query, queryParser); var generatedQuery = queryParser.Parse(query); generatedQuery = HandleMethods(generatedQuery, analyzer); return(generatedQuery); } catch (ParseException pe) { if (originalQuery == query) { throw new ParseException("Could not parse: '" + query + "'", pe); } throw new ParseException("Could not parse modified query: '" + query + "' original was: '" + originalQuery + "'", pe); } }
public static Query BuildQuery(string query, IndexQuery indexQuery, RavenPerFieldAnalyzerWrapper analyzer) { using (CultureHelper.EnsureInvariantCulture()) { if (UseLuceneASTParser) { try { var parser = new LuceneQueryParser(); parser.IsDefaultOperatorAnd = indexQuery.DefaultOperator == QueryOperator.And; parser.Parse(query); var res = parser.LuceneAST.ToQuery( new LuceneASTQueryConfiguration { Analayzer = analyzer, DefaultOperator = indexQuery.DefaultOperator, FieldName = indexQuery.DefaultField ?? string.Empty }); // The parser already throws parse exception if there is a syntax error. // We now return null in the case of a term query that has been fully analyzed, so we need to return a valid query. if (res == null) { return(new BooleanQuery()); } return(res); } catch (ParseException pe) { throw new ParseException("Could not parse: '" + query + "'", pe); } } var originalQuery = query; try { var queryParser = new RangeQueryParser(Version.LUCENE_29, indexQuery.DefaultField ?? string.Empty, analyzer) { DefaultOperator = indexQuery.DefaultOperator == QueryOperator.Or ? QueryParser.Operator.OR : QueryParser.Operator.AND, AllowLeadingWildcard = true }; query = PreProcessComments(query); query = PreProcessMixedInclusiveExclusiveRangeQueries(query); query = PreProcessUntokenizedTerms(query, queryParser); query = PreProcessSearchTerms(query); query = PreProcessDateTerms(query, queryParser); var generatedQuery = queryParser.Parse(query); generatedQuery = HandleMethods(generatedQuery, analyzer); return(generatedQuery); } catch (ParseException pe) { if (originalQuery == query) { throw new ParseException("Could not parse: '" + query + "'", pe); } throw new ParseException("Could not parse modified query: '" + query + "' original was: '" + originalQuery + "'", pe); } } }
private static string PreProcessDateTerms(string query, RangeQueryParser queryParser) { var searchMatches = dateQuery.Matches(query); if (searchMatches.Count < 1) { return(query); } var queryStringBuilder = new StringBuilder(query); for (var i = searchMatches.Count - 1; i >= 0; i--) // reversing the scan so we won't affect positions of later items { var searchMatch = searchMatches[i]; var field = searchMatch.Groups[1].Value; var termReplacement = searchMatch.Groups[2].Value; var replaceToken = queryParser.ReplaceToken(field, termReplacement); queryStringBuilder.Remove(searchMatch.Index, searchMatch.Length); queryStringBuilder .Insert(searchMatch.Index, field) .Insert(searchMatch.Index + field.Length, ":") .Insert(searchMatch.Index + field.Length + 1, replaceToken); } return(queryStringBuilder.ToString()); }
public static Query BuildQuery(string query, IndexQuery indexQuery, PerFieldAnalyzerWrapper analyzer) { var originalQuery = query; Analyzer keywordAnalyzer = new KeywordAnalyzer(); try { var queryParser = new RangeQueryParser(Version.LUCENE_29, indexQuery.DefaultField ?? string.Empty, analyzer) { DefaultOperator = indexQuery.DefaultOperator == QueryOperator.Or ? QueryParser.Operator.OR : QueryParser.Operator.AND, AllowLeadingWildcard = true }; query = PreProcessUntokenizedTerms(query, queryParser); query = PreProcessSearchTerms(query); query = PreProcessDateTerms(query, queryParser); var generatedQuery = queryParser.Parse(query); generatedQuery = HandleMethods(generatedQuery); return generatedQuery; } catch (ParseException pe) { if (originalQuery == query) throw new ParseException("Could not parse: '" + query + "'", pe); throw new ParseException("Could not parse modified query: '" + query + "' original was: '" + originalQuery + "'", pe); } finally { keywordAnalyzer.Close(); } }
public static Query BuildQuery(string query, IndexQuery indexQuery, PerFieldAnalyzerWrapper analyzer) { var originalQuery = query; Analyzer keywordAnalyzer = new KeywordAnalyzer(); try { var queryParser = new RangeQueryParser(Version.LUCENE_29, indexQuery.DefaultField ?? string.Empty, analyzer) { DefaultOperator = indexQuery.DefaultOperator == QueryOperator.Or ? QueryParser.Operator.OR : QueryParser.Operator.AND, AllowLeadingWildcard = true }; query = PreProcessUntokenizedTerms(query, queryParser); query = PreProcessSearchTerms(query); query = PreProcessDateTerms(query, queryParser); return(queryParser.Parse(query)); } catch (ParseException pe) { if (originalQuery == query) { throw new ParseException("Could not parse: '" + query + "'", pe); } throw new ParseException("Could not parse modified query: '" + query + "' original was: '" + originalQuery + "'", pe); } finally { keywordAnalyzer.Close(); } }
/// <summary> /// Detects untokenized fields and sets as NotAnalyzed in analyzer /// </summary> private static string PreProcessUntokenizedTerms(string query, RangeQueryParser queryParser) { var untokenizedMatches = untokenizedQuery.Matches(query); if (untokenizedMatches.Count < 1) { return(query); } var sb = new StringBuilder(query); MatchCollection fieldMatches = null; // process in reverse order to leverage match string indexes for (var i = untokenizedMatches.Count; i > 0; i--) { var match = untokenizedMatches[i - 1]; // specify that term for this field should not be tokenized var value = match.Groups[2].Value; var term = match.Groups[2]; string name = match.Groups[1].Value; if (string.IsNullOrEmpty(value)) { value = match.Groups[3].Value; term = match.Groups[3]; if (fieldMatches == null) { fieldMatches = fieldQuery.Matches(query); } var lastField = fieldMatches.Cast <Match>().LastOrDefault(x => x.Index <= term.Index); if (lastField != null) { name = lastField.Groups[1].Value; } } var rawTerm = value.Substring(2, value.Length - 4); queryParser.SetUntokenized(name, Unescape(rawTerm)); // introduce " " around the term var startIndex = term.Index; var length = term.Length - 2; if (sb[startIndex + length - 1] != '"') { sb.Insert(startIndex + length, '"'); length += 1; } if (sb[startIndex + 2] != '"') { sb.Insert(startIndex + 2, '"'); length += 1; } // remove enclosing "[[" "]]" from term value (again in reverse order) sb.Remove(startIndex + length, 2); sb.Remove(startIndex, 2); } return(sb.ToString()); }
public void MrsJones() { using (var dir = new RAMDirectory()) using (var analyzer = new LowerCaseKeywordAnalyzer()) { using (var writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED)) { var document = new Lucene.Net.Documents.Document(); document.Add(new Field("Name", "MRS. SHABA", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); writer.AddDocument(document); } var searcher = new IndexSearcher(dir, true); var termEnum = searcher.IndexReader.Terms(); while (termEnum.Next()) { var buffer = termEnum.Term.Text; Console.WriteLine(buffer); } var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer); var query = queryParser.Parse("Name:\"MRS. S*\""); Console.WriteLine(query); var result = searcher.Search(query, 10); Assert.NotEqual(0, result.TotalHits); } }
public static Query BuildQuery(string query, IndexQuery indexQuery, RavenPerFieldAnalyzerWrapper analyzer) { using (CultureHelper.EnsureInvariantCulture()) { if (UseLuceneASTParser) { try { var parser = new LuceneQueryParser(); parser.Parse(query); var res = parser.LuceneAST.ToQuery( new LuceneASTQueryConfiguration { Analayzer = analyzer, DefaultOperator = indexQuery.DefaultOperator, FieldName = indexQuery.DefaultField ?? string.Empty }); // The parser should throw ParseException in this case. if (res == null) { throw new GeoAPI.IO.ParseException("Could not parse query"); } return(res); } catch (ParseException pe) { throw new ParseException("Could not parse: '" + query + "'", pe); } } var originalQuery = query; try { var queryParser = new RangeQueryParser(Version.LUCENE_29, indexQuery.DefaultField ?? string.Empty, analyzer) { DefaultOperator = indexQuery.DefaultOperator == QueryOperator.Or ? QueryParser.Operator.OR : QueryParser.Operator.AND, AllowLeadingWildcard = true }; query = PreProcessComments(query); query = PreProcessMixedInclusiveExclusiveRangeQueries(query); query = PreProcessUntokenizedTerms(query, queryParser); query = PreProcessSearchTerms(query); query = PreProcessDateTerms(query, queryParser); var generatedQuery = queryParser.Parse(query); generatedQuery = HandleMethods(generatedQuery, analyzer); return(generatedQuery); } catch (ParseException pe) { if (originalQuery == query) { throw new ParseException("Could not parse: '" + query + "'", pe); } throw new ParseException("Could not parse modified query: '" + query + "' original was: '" + originalQuery + "'", pe); } } }
private static string PreProcessDateTerms(string query, RangeQueryParser queryParser) { var searchMatches = dateQuery.Matches(query); if (searchMatches.Count > 0) { query = TokenReplace(query, searchMatches, queryParser.ReplaceToken); } searchMatches = inDatesQuery.Matches(query); if (searchMatches.Count == 0) { return(query); } return(TokenReplace(query, searchMatches, queryParser.ReplaceDateTimeTokensInMethod)); }
public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer) { var keywordAnalyzer = new KeywordAnalyzer(); try { query = PreProcessUntokenizedTerms(analyzer, query, keywordAnalyzer); var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer); queryParser.SetAllowLeadingWildcard(true); return queryParser.Parse(query);; } finally { keywordAnalyzer.Close(); } }
public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer) { Analyzer keywordAnalyzer = null; try { query = PreProcessUntokenizedTerms(analyzer, query, ref keywordAnalyzer); var queryParser = new RangeQueryParser(Version.LUCENE_29, string.Empty, analyzer); queryParser.SetAllowLeadingWildcard(true); // not the recommended approach, should rather use ReverseFilter return queryParser.Parse(query); } finally { if (keywordAnalyzer != null) keywordAnalyzer.Close(); } }
public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer) { var keywordAnalyzer = new KeywordAnalyzer(); try { query = PreProcessUntokenizedTerms(analyzer, query, keywordAnalyzer); var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer); queryParser.SetAllowLeadingWildcard(true); return(queryParser.Parse(query));; } finally { keywordAnalyzer.Close(); } }
public static Query BuildQuery(string query, string defaultField, PerFieldAnalyzerWrapper analyzer) { Analyzer keywordAnalyzer = new KeywordAnalyzer(); try { var queryParser = new RangeQueryParser(Version.LUCENE_29, defaultField ?? string.Empty, analyzer); query = PreProcessUntokenizedTerms(query, queryParser); query = PreProcessSearchTerms(query); query = PreProcessDateTerms(query, queryParser); queryParser.SetAllowLeadingWildcard(true); // not the recommended approach, should rather use ReverseFilter return queryParser.Parse(query); } finally { keywordAnalyzer.Close(); } }
public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer) { Analyzer keywordAnalyzer = new KeywordAnalyzer(); try { var queryParser = new RangeQueryParser(Version.LUCENE_29, string.Empty, analyzer); query = PreProcessUntokenizedTerms(query, queryParser); query = PreProcessSearchTerms(query); queryParser.SetAllowLeadingWildcard(true); // not the recommended approach, should rather use ReverseFilter return(queryParser.Parse(query)); } finally { keywordAnalyzer.Close(); } }
/// <summary> /// Detects untokenized fields and sets as NotAnalyzed in analyzer /// </summary> private static string PreProcessUntokenizedTerms(string query, RangeQueryParser queryParser) { var untokenizedMatches = untokenizedQuery.Matches(query); if (untokenizedMatches.Count < 1) { return(query); } var sb = new StringBuilder(query); // process in reverse order to leverage match string indexes for (var i = untokenizedMatches.Count; i > 0; i--) { var match = untokenizedMatches[i - 1]; // specify that term for this field should not be tokenized var value = match.Groups[2].Value; var rawTerm = value.Substring(2, value.Length - 4); queryParser.SetUntokenized(match.Groups[1].Value, Unescape(rawTerm)); var term = match.Groups[2]; // introduce " " around the term var startIndex = term.Index; var length = term.Length - 2; if (sb[startIndex + length - 1] != '"') { sb.Insert(startIndex + length, '"'); length += 1; } if (sb[startIndex + 2] != '"') { sb.Insert(startIndex + 2, '"'); length += 1; } // remove enclosing "[[" "]]" from term value (again in reverse order) sb.Remove(startIndex + length, 2); sb.Remove(startIndex, 2); } return(sb.ToString()); }
internal static string PreProcessDateTerms(string query, RangeQueryParser queryParser) { // First we should check if this query might match the regex because regex are expenssive... if (!MightMatchDateTerms(query)) { return(query); } var searchMatches = dateQuery.Matches(query); if (searchMatches.Count > 0) { query = TokenReplace(query, searchMatches, queryParser.ReplaceToken); } searchMatches = inDatesQuery.Matches(query); if (searchMatches.Count == 0) { return(query); } return(TokenReplace(query, searchMatches, queryParser.ReplaceDateTimeTokensInMethod)); }
/// <summary> /// Detects untokenized fields and sets as NotAnalyzed in analyzer /// </summary> private static string PreProcessUntokenizedTerms(string query, RangeQueryParser queryParser) { var untokenizedMatches = untokenizedQuery.Matches(query); if (untokenizedMatches.Count < 1) return query; var sb = new StringBuilder(query); // process in reverse order to leverage match string indexes for (var i = untokenizedMatches.Count; i > 0; i--) { var match = untokenizedMatches[i - 1]; // specify that term for this field should not be tokenized var value = match.Groups[2].Value; var rawTerm = value.Substring(2, value.Length-4); queryParser.SetUntokenized(match.Groups[1].Value, Unescape(rawTerm)); var term = match.Groups[2]; // introduce " " around the term var startIndex = term.Index; var length = term.Length - 2; if (sb[startIndex + length - 1] != '"') { sb.Insert(startIndex + length, '"'); length += 1; } if (sb[startIndex + 2] != '"') { sb.Insert(startIndex + 2, '"'); length += 1; } // remove enclosing "[[" "]]" from term value (again in reverse order) sb.Remove(startIndex + length, 2); sb.Remove(startIndex, 2); } return sb.ToString(); }
private static string PreProcessDateTerms(string query, RangeQueryParser queryParser) { var searchMatches = dateQuery.Matches(query); if (searchMatches.Count < 1) return query; var queryStringBuilder = new StringBuilder(query); for (var i = searchMatches.Count-1; i >= 0; i--) // reversing the scan so we won't affect positions of later items { var searchMatch = searchMatches[i]; var field = searchMatch.Groups[1].Value; var termReplacement = searchMatch.Groups[2].Value; var replaceToken = queryParser.ReplaceToken(field, termReplacement); queryStringBuilder.Remove(searchMatch.Index, searchMatch.Length); queryStringBuilder .Insert(searchMatch.Index, field) .Insert(searchMatch.Index + field.Length, ":") .Insert(searchMatch.Index + field.Length + 1, replaceToken); } return queryStringBuilder.ToString(); }
internal static string PreProcessDateTerms(string query, RangeQueryParser queryParser) { // First we should check if this query might match the regex because regex are expenssive... if (!MightMatchDateTerms(query)) return query; var searchMatches = dateQuery.Matches(query); if (searchMatches.Count > 0) { query = TokenReplace(query, searchMatches,queryParser.ReplaceToken); } searchMatches = inDatesQuery.Matches(query); if (searchMatches.Count == 0) return query; return TokenReplace(query, searchMatches,queryParser.ReplaceDateTimeTokensInMethod); }
/// <summary> /// Detects untokenized fields and sets as NotAnalyzed in analyzer /// </summary> private static string PreProcessUntokenizedTerms(string query, RangeQueryParser queryParser) { var untokenizedMatches = untokenizedQuery.Matches(query); if (untokenizedMatches.Count < 1) return query; var sb = new StringBuilder(query); MatchCollection fieldMatches = null; // process in reverse order to leverage match string indexes for (var i = untokenizedMatches.Count; i > 0; i--) { var match = untokenizedMatches[i - 1]; // specify that term for this field should not be tokenized var value = match.Groups[2].Value; var term = match.Groups[2]; string name = match.Groups[1].Value; if (string.IsNullOrEmpty(value)) { value = match.Groups[3].Value; term = match.Groups[3]; if(fieldMatches == null) fieldMatches = fieldQuery.Matches(query); var lastField = fieldMatches.Cast<Match>().LastOrDefault(x => x.Index <= term.Index); if (lastField != null) { name = lastField.Groups[1].Value; } } var rawTerm = value.Substring(2, value.Length - 4); queryParser.SetUntokenized(name, Unescape(rawTerm)); // introduce " " around the term var startIndex = term.Index; var length = term.Length - 2; if (sb[startIndex + length - 1] != '"') { sb.Insert(startIndex + length, '"'); length += 1; } if (sb[startIndex + 2] != '"') { sb.Insert(startIndex + 2, '"'); length += 1; } // remove enclosing "[[" "]]" from term value (again in reverse order) sb.Remove(startIndex + length, 2); sb.Remove(startIndex, 2); } return sb.ToString(); }
private static string PreProcessDateTerms(string query, RangeQueryParser queryParser) { var searchMatches = dateQuery.Matches(query); if (searchMatches.Count > 0) { query = TokenReplace(query, searchMatches,queryParser.ReplaceToken); } searchMatches = inDatesQuery.Matches(query); if (searchMatches.Count == 0) return query; return TokenReplace(query, searchMatches,queryParser.ReplaceDateTimeTokensInMethod); }