public virtual void TestEquals() { AutomatonQuery a1 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("foobar")); // reference to a1 AutomatonQuery a2 = a1; // same as a1 (accepts the same language, same term) AutomatonQuery a3 = new AutomatonQuery(NewTerm("foobar"), BasicOperations.Concatenate(BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar"))); // different than a1 (same term, but different language) AutomatonQuery a4 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("different")); // different than a1 (different term, same language) AutomatonQuery a5 = new AutomatonQuery(NewTerm("blah"), BasicAutomata.MakeString("foobar")); Assert.AreEqual(a1.GetHashCode(), a2.GetHashCode()); Assert.AreEqual(a1, a2); Assert.AreEqual(a1.GetHashCode(), a3.GetHashCode()); Assert.AreEqual(a1, a3); // different class AutomatonQuery w1 = new WildcardQuery(NewTerm("foobar")); // different class AutomatonQuery w2 = new RegexpQuery(NewTerm("foobar")); Assert.IsFalse(a1.Equals(w1)); Assert.IsFalse(a1.Equals(w2)); Assert.IsFalse(w1.Equals(w2)); Assert.IsFalse(a1.Equals(a4)); Assert.IsFalse(a1.Equals(a5)); Assert.IsFalse(a1.Equals(null)); }
public virtual void TestSynOverHole() { TokenStream ts = new CannedTokenStream(new Token[] { Token("a", 1, 1), Token("X", 0, 2), Token("b", 2, 1) }); Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts); Automaton a1 = BasicOperations.Union(Join(S2a("a"), SEP_A, HOLE_A), BasicAutomata.MakeString("X")); Automaton expected = BasicOperations.Concatenate(a1, Join(SEP_A, S2a("b"))); //toDot(actual); Assert.IsTrue(BasicOperations.SameLanguage(expected, actual)); }
internal Automaton ToLevenshteinAutomata(Automaton automaton) { var @ref = SpecialOperations.GetFiniteStrings(automaton, -1); Automaton[] subs = new Automaton[@ref.Count]; int upto = 0; foreach (IntsRef path in @ref) { if (path.Length <= nonFuzzyPrefix || path.Length < minFuzzyLength) { subs[upto] = BasicAutomata.MakeString(path.Ints, path.Offset, path.Length); upto++; } else { Automaton prefix = BasicAutomata.MakeString(path.Ints, path.Offset, nonFuzzyPrefix); int[] ints = new int[path.Length - nonFuzzyPrefix]; Array.Copy(path.Ints, path.Offset + nonFuzzyPrefix, ints, 0, ints.Length); // TODO: maybe add alphaMin to LevenshteinAutomata, // and pass 1 instead of 0? We probably don't want // to allow the trailing dedup bytes to be // edited... but then 0 byte is "in general" allowed // on input (but not in UTF8). LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? char.MAX_CODE_POINT : 255, transpositions); Automaton levAutomaton = lev.ToAutomaton(maxEdits); Automaton combined = BasicOperations.Concatenate(Arrays.AsList(prefix, levAutomaton)); combined.Deterministic = true; // its like the special case in concatenate itself, except we cloneExpanded already subs[upto] = combined; upto++; } } if (subs.Length == 0) { // automaton is empty, there is no accepted paths through it return(BasicAutomata.MakeEmpty()); // matches nothing } else if (subs.Length == 1) { // no synonyms or anything: just a single path through the tokenstream return(subs[0]); } else { // multiple paths: this is really scary! is it slow? // maybe we should not do this and throw UOE? Automaton a = BasicOperations.Union(Arrays.AsList(subs)); // TODO: we could call toLevenshteinAutomata() before det? // this only happens if you have multiple paths anyway (e.g. synonyms) BasicOperations.Determinize(a); return(a); } }
private Automaton Join(params string[] strings) { IList <Automaton> @as = new List <Automaton>(); foreach (string s in strings) { @as.Add(BasicAutomata.MakeString(s)); @as.Add(SEP_A); } @as.RemoveAt(@as.Count - 1); return(BasicOperations.Concatenate(@as)); }
public virtual void TestRewritePrefix() { Automaton pfx = BasicAutomata.MakeString("do"); pfx.ExpandSingleton(); // expand singleton representation for testing Automaton prefixAutomaton = BasicOperations.Concatenate(pfx, BasicAutomata.MakeAnyString()); AutomatonQuery aq = new AutomatonQuery(NewTerm("bogus"), prefixAutomaton); Terms terms = MultiFields.GetTerms(Searcher.IndexReader, FN); Assert.IsTrue(aq.GetTermsEnum(terms) is PrefixTermsEnum); Assert.AreEqual(3, AutomatonQueryNrHits(aq)); }
/// <summary> /// Convert Lucene wildcard syntax into an automaton. /// <para/> /// @lucene.internal /// </summary> public static Automaton ToAutomaton(Term wildcardquery) { IList <Automaton> automata = new JCG.List <Automaton>(); string wildcardText = wildcardquery.Text; for (int i = 0; i < wildcardText.Length;) { int c = Character.CodePointAt(wildcardText, i); int length = Character.CharCount(c); switch (c) { case WILDCARD_STRING: automata.Add(BasicAutomata.MakeAnyString()); break; case WILDCARD_CHAR: automata.Add(BasicAutomata.MakeAnyChar()); break; case WILDCARD_ESCAPE: // add the next codepoint instead, if it exists if (i + length < wildcardText.Length) { int nextChar = Character.CodePointAt(wildcardText, i + length); length += Character.CharCount(nextChar); automata.Add(BasicAutomata.MakeChar(nextChar)); break; } // else fallthru, lenient parsing with a trailing \ goto default; default: automata.Add(BasicAutomata.MakeChar(c)); break; } i += length; } return(BasicOperations.Concatenate(automata)); }
/// <summary> /// Initialize levenshtein DFAs up to maxDistance, if possible </summary> private IList <CompiledAutomaton> InitAutomata(int maxDistance) { IList <CompiledAutomaton> runAutomata = dfaAtt.Automata; //System.out.println("cached automata size: " + runAutomata.size()); if (runAutomata.Count <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { LevenshteinAutomata builder = new LevenshteinAutomata(UnicodeUtil.NewString(m_termText, m_realPrefixLength, m_termText.Length - m_realPrefixLength), transpositions); for (int i = runAutomata.Count; i <= maxDistance; i++) { Automaton a = builder.ToAutomaton(i); //System.out.println("compute automaton n=" + i); // constant prefix if (m_realPrefixLength > 0) { Automaton prefix = BasicAutomata.MakeString(UnicodeUtil.NewString(m_termText, 0, m_realPrefixLength)); a = BasicOperations.Concatenate(prefix, a); } runAutomata.Add(new CompiledAutomaton(a, true, false)); } } return(runAutomata); }
private Automaton Join(params Automaton[] @as) { return(BasicOperations.Concatenate(@as)); }
/// <summary> /// Extracts all <see cref="MultiTermQuery"/>s for <paramref name="field"/>, and returns equivalent /// automata that will match terms. /// </summary> internal static CharacterRunAutomaton[] ExtractAutomata(Query query, string field) { List <CharacterRunAutomaton> list = new List <CharacterRunAutomaton>(); if (query is BooleanQuery) { BooleanClause[] clauses = ((BooleanQuery)query).GetClauses(); foreach (BooleanClause clause in clauses) { if (!clause.IsProhibited) { list.AddAll(Arrays.AsList(ExtractAutomata(clause.Query, field))); } } } else if (query is DisjunctionMaxQuery) { foreach (Query sub in ((DisjunctionMaxQuery)query).Disjuncts) { list.AddAll(Arrays.AsList(ExtractAutomata(sub, field))); } } else if (query is SpanOrQuery) { foreach (Query sub in ((SpanOrQuery)query).GetClauses()) { list.AddAll(Arrays.AsList(ExtractAutomata(sub, field))); } } else if (query is SpanNearQuery) { foreach (Query sub in ((SpanNearQuery)query).GetClauses()) { list.AddAll(Arrays.AsList(ExtractAutomata(sub, field))); } } else if (query is SpanNotQuery) { list.AddAll(Arrays.AsList(ExtractAutomata(((SpanNotQuery)query).Include, field))); } else if (query is SpanPositionCheckQuery) { list.AddAll(Arrays.AsList(ExtractAutomata(((SpanPositionCheckQuery)query).Match, field))); } else if (query is ISpanMultiTermQueryWrapper) { list.AddAll(Arrays.AsList(ExtractAutomata(((ISpanMultiTermQueryWrapper)query).WrappedQuery, field))); } else if (query is AutomatonQuery) { AutomatonQuery aq = (AutomatonQuery)query; if (aq.Field.Equals(field, StringComparison.Ordinal)) { list.Add(new CharacterRunAutomatonToStringAnonymousHelper(aq.Automaton, () => aq.ToString())); } } else if (query is PrefixQuery) { PrefixQuery pq = (PrefixQuery)query; Term prefix = pq.Prefix; if (prefix.Field.Equals(field, StringComparison.Ordinal)) { list.Add(new CharacterRunAutomatonToStringAnonymousHelper( BasicOperations.Concatenate(BasicAutomata.MakeString(prefix.Text()), BasicAutomata.MakeAnyString()), () => pq.ToString())); } } else if (query is FuzzyQuery) { FuzzyQuery fq = (FuzzyQuery)query; if (fq.Field.Equals(field, StringComparison.Ordinal)) { string utf16 = fq.Term.Text(); int[] termText = new int[utf16.CodePointCount(0, utf16.Length)]; for (int cp, i = 0, j = 0; i < utf16.Length; i += Character.CharCount(cp)) { termText[j++] = cp = utf16.CodePointAt(i); } int termLength = termText.Length; int prefixLength = Math.Min(fq.PrefixLength, termLength); string suffix = UnicodeUtil.NewString(termText, prefixLength, termText.Length - prefixLength); LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.Transpositions); Automaton automaton = builder.ToAutomaton(fq.MaxEdits); if (prefixLength > 0) { Automaton prefix = BasicAutomata.MakeString(UnicodeUtil.NewString(termText, 0, prefixLength)); automaton = BasicOperations.Concatenate(prefix, automaton); } list.Add(new CharacterRunAutomatonToStringAnonymousHelper(automaton, () => fq.ToString())); } } else if (query is TermRangeQuery) { TermRangeQuery tq = (TermRangeQuery)query; if (tq.Field.Equals(field, StringComparison.Ordinal)) { // this is *not* an automaton, but its very simple list.Add(new SimpleCharacterRunAutomatonAnonymousHelper(BasicAutomata.MakeEmpty(), tq)); } } return(list.ToArray(/*new CharacterRunAutomaton[list.size()]*/)); }