public virtual void TestRewritePrefix() { Automaton pfx = BasicAutomata.MakeString("do"); pfx.ExpandSingleton(); // expand singleton representation for testing Automaton prefixAutomaton = BasicOperations.Concatenate(pfx, BasicAutomata.MakeAnyString()); AutomatonQuery aq = new AutomatonQuery(NewTerm("bogus"), prefixAutomaton); Terms terms = MultiFields.GetTerms(Searcher.IndexReader, FN); Assert.IsTrue(aq.GetTermsEnum(terms) is PrefixTermsEnum); Assert.AreEqual(3, AutomatonQueryNrHits(aq)); }
public virtual void TestBasicAutomata() { AssertAutomatonHits(0, BasicAutomata.MakeEmpty()); AssertAutomatonHits(0, BasicAutomata.MakeEmptyString()); AssertAutomatonHits(2, BasicAutomata.MakeAnyChar()); AssertAutomatonHits(3, BasicAutomata.MakeAnyString()); AssertAutomatonHits(2, BasicAutomata.MakeString("doc")); AssertAutomatonHits(1, BasicAutomata.MakeChar('a')); AssertAutomatonHits(2, BasicAutomata.MakeCharRange('a', 'b')); AssertAutomatonHits(2, BasicAutomata.MakeInterval(1233, 2346, 0)); AssertAutomatonHits(1, BasicAutomata.MakeInterval(0, 2000, 0)); AssertAutomatonHits(2, BasicOperations.Union(BasicAutomata.MakeChar('a'), BasicAutomata.MakeChar('b'))); AssertAutomatonHits(0, BasicOperations.Intersection(BasicAutomata.MakeChar('a'), BasicAutomata.MakeChar('b'))); AssertAutomatonHits(1, BasicOperations.Minus(BasicAutomata.MakeCharRange('a', 'b'), BasicAutomata.MakeChar('a'))); }
/// <summary> /// Convert Lucene wildcard syntax into an automaton. /// <para/> /// @lucene.internal /// </summary> public static Automaton ToAutomaton(Term wildcardquery) { IList <Automaton> automata = new JCG.List <Automaton>(); string wildcardText = wildcardquery.Text; for (int i = 0; i < wildcardText.Length;) { int c = Character.CodePointAt(wildcardText, i); int length = Character.CharCount(c); switch (c) { case WILDCARD_STRING: automata.Add(BasicAutomata.MakeAnyString()); break; case WILDCARD_CHAR: automata.Add(BasicAutomata.MakeAnyChar()); break; case WILDCARD_ESCAPE: // add the next codepoint instead, if it exists if (i + length < wildcardText.Length) { int nextChar = Character.CodePointAt(wildcardText, i + length); length += Character.CharCount(nextChar); automata.Add(BasicAutomata.MakeChar(nextChar)); break; } // else fallthru, lenient parsing with a trailing \ goto default; default: automata.Add(BasicAutomata.MakeChar(c)); break; } i += length; } return(BasicOperations.Concatenate(automata)); }
/// <summary> /// Extracts all <see cref="MultiTermQuery"/>s for <paramref name="field"/>, and returns equivalent /// automata that will match terms. /// </summary> internal static CharacterRunAutomaton[] ExtractAutomata(Query query, string field) { List <CharacterRunAutomaton> list = new List <CharacterRunAutomaton>(); if (query is BooleanQuery) { BooleanClause[] clauses = ((BooleanQuery)query).GetClauses(); foreach (BooleanClause clause in clauses) { if (!clause.IsProhibited) { list.AddAll(Arrays.AsList(ExtractAutomata(clause.Query, field))); } } } else if (query is DisjunctionMaxQuery) { foreach (Query sub in ((DisjunctionMaxQuery)query).Disjuncts) { list.AddAll(Arrays.AsList(ExtractAutomata(sub, field))); } } else if (query is SpanOrQuery) { foreach (Query sub in ((SpanOrQuery)query).GetClauses()) { list.AddAll(Arrays.AsList(ExtractAutomata(sub, field))); } } else if (query is SpanNearQuery) { foreach (Query sub in ((SpanNearQuery)query).GetClauses()) { list.AddAll(Arrays.AsList(ExtractAutomata(sub, field))); } } else if (query is SpanNotQuery) { list.AddAll(Arrays.AsList(ExtractAutomata(((SpanNotQuery)query).Include, field))); } else if (query is SpanPositionCheckQuery) { list.AddAll(Arrays.AsList(ExtractAutomata(((SpanPositionCheckQuery)query).Match, field))); } else if (query is ISpanMultiTermQueryWrapper) { list.AddAll(Arrays.AsList(ExtractAutomata(((ISpanMultiTermQueryWrapper)query).WrappedQuery, field))); } else if (query is AutomatonQuery) { AutomatonQuery aq = (AutomatonQuery)query; if (aq.Field.Equals(field, StringComparison.Ordinal)) { list.Add(new CharacterRunAutomatonToStringAnonymousHelper(aq.Automaton, () => aq.ToString())); } } else if (query is PrefixQuery) { PrefixQuery pq = (PrefixQuery)query; Term prefix = pq.Prefix; if (prefix.Field.Equals(field, StringComparison.Ordinal)) { list.Add(new CharacterRunAutomatonToStringAnonymousHelper( BasicOperations.Concatenate(BasicAutomata.MakeString(prefix.Text()), BasicAutomata.MakeAnyString()), () => pq.ToString())); } } else if (query is FuzzyQuery) { FuzzyQuery fq = (FuzzyQuery)query; if (fq.Field.Equals(field, StringComparison.Ordinal)) { string utf16 = fq.Term.Text(); int[] termText = new int[utf16.CodePointCount(0, utf16.Length)]; for (int cp, i = 0, j = 0; i < utf16.Length; i += Character.CharCount(cp)) { termText[j++] = cp = utf16.CodePointAt(i); } int termLength = termText.Length; int prefixLength = Math.Min(fq.PrefixLength, termLength); string suffix = UnicodeUtil.NewString(termText, prefixLength, termText.Length - prefixLength); LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.Transpositions); Automaton automaton = builder.ToAutomaton(fq.MaxEdits); if (prefixLength > 0) { Automaton prefix = BasicAutomata.MakeString(UnicodeUtil.NewString(termText, 0, prefixLength)); automaton = BasicOperations.Concatenate(prefix, automaton); } list.Add(new CharacterRunAutomatonToStringAnonymousHelper(automaton, () => fq.ToString())); } } else if (query is TermRangeQuery) { TermRangeQuery tq = (TermRangeQuery)query; if (tq.Field.Equals(field, StringComparison.Ordinal)) { // this is *not* an automaton, but its very simple list.Add(new SimpleCharacterRunAutomatonAnonymousHelper(BasicAutomata.MakeEmpty(), tq)); } } return(list.ToArray(/*new CharacterRunAutomaton[list.size()]*/)); }