Exemplo n.º 1
0
        public virtual void TestRewritePrefix()
        {
            Automaton pfx = BasicAutomata.MakeString("do");

            pfx.ExpandSingleton(); // expand singleton representation for testing
            Automaton      prefixAutomaton = BasicOperations.Concatenate(pfx, BasicAutomata.MakeAnyString());
            AutomatonQuery aq    = new AutomatonQuery(NewTerm("bogus"), prefixAutomaton);
            Terms          terms = MultiFields.GetTerms(searcher.IndexReader, FN);

            var en = aq.GetTermsEnum(terms);

            Assert.IsTrue(en is PrefixTermsEnum, "Expected type PrefixTermEnum but was {0}", en.GetType().Name);
            Assert.AreEqual(3, AutomatonQueryNrHits(aq));
        }
Exemplo n.º 2
0
 public virtual void TestBasicAutomata()
 {
     AssertAutomatonHits(0, BasicAutomata.MakeEmpty());
     AssertAutomatonHits(0, BasicAutomata.MakeEmptyString());
     AssertAutomatonHits(2, BasicAutomata.MakeAnyChar());
     AssertAutomatonHits(3, BasicAutomata.MakeAnyString());
     AssertAutomatonHits(2, BasicAutomata.MakeString("doc"));
     AssertAutomatonHits(1, BasicAutomata.MakeChar('a'));
     AssertAutomatonHits(2, BasicAutomata.MakeCharRange('a', 'b'));
     AssertAutomatonHits(2, BasicAutomata.MakeInterval(1233, 2346, 0));
     AssertAutomatonHits(1, BasicAutomata.MakeInterval(0, 2000, 0));
     AssertAutomatonHits(2, BasicOperations.Union(BasicAutomata.MakeChar('a'), BasicAutomata.MakeChar('b')));
     AssertAutomatonHits(0, BasicOperations.Intersection(BasicAutomata.MakeChar('a'), BasicAutomata.MakeChar('b')));
     AssertAutomatonHits(1, BasicOperations.Minus(BasicAutomata.MakeCharRange('a', 'b'), BasicAutomata.MakeChar('a')));
 }
        public static void BeforeClass()
        {
            Random random = Random();

            Directory = NewDirectory();
            Stopword  = "" + RandomChar();
            CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.MakeString(Stopword));

            Analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
            RandomIndexWriter iw    = new RandomIndexWriter(random, Directory, Analyzer);
            Document          doc   = new Document();
            Field             id    = new StringField("id", "", Field.Store.NO);
            Field             field = new TextField("field", "", Field.Store.NO);

            doc.Add(id);
            doc.Add(field);

            // index some docs
            int numDocs = AtLeast(1000);

            for (int i = 0; i < numDocs; i++)
            {
                id.StringValue    = Convert.ToString(i);
                field.StringValue = RandomFieldContents();
                iw.AddDocument(doc);
            }

            // delete some docs
            int numDeletes = numDocs / 20;

            for (int i = 0; i < numDeletes; i++)
            {
                Term toDelete = new Term("id", Convert.ToString(random.Next(numDocs)));
                if (random.NextBoolean())
                {
                    iw.DeleteDocuments(toDelete);
                }
                else
                {
                    iw.DeleteDocuments(new TermQuery(toDelete));
                }
            }

            Reader = iw.Reader;
            S1     = NewSearcher(Reader);
            S2     = NewSearcher(Reader);
            iw.Dispose();
        }
Exemplo n.º 4
0
        //Automaton<T> Restrict<T>(Automaton<IMonadicPredicate<BDD, T>> autom)
        //{
        //    List<Move<T>> moves = new List<Move<T>>();
        //    foreach (var move in autom.GetMoves())
        //        moves.Add(new Move<T>(move.SourceState, move.TargetState, move.Label.ProjectSecond()));
        //    var res = Automaton<T>.Create(autom.InitialState, autom.GetFinalStates(), moves);
        //    return res;
        //}

        public void TestWS1S_UseOfCharRangePreds <T>(IBooleanAlgebra <T> solver, T isDigit, T isWordLetter, IRegexConverter <T> regexConverter)
        {
            var ca = new CartesianAlgebraBDD <T>(solver);
            var x  = new Variable("x", false);
            var y  = new Variable("y", false);
            var z  = new Variable("z", false);
            var X  = new Variable("X", false);
            //there are at least two distinct positions x and y
            var xy = new MSOAnd <T>(new MSOAnd <T>(new MSONot <T>(new MSOEq <T>(x, y)), new MSOIsSingleton <T>(x)), new MSOIsSingleton <T>(y));
            //there is a set X containing x and y and all positions z in X have characters that satisfy isWordLetter
            var phi = new MSOExists <T>(X, new MSOAnd <T>(
                                            new MSOAnd <T>(new MSOSubset <T>(x, X), new MSOSubset <T>(y, X)),
                                            new MSONot <T>(new MSOExists <T>(z, new MSONot <T>(
                                                                                 new MSOOr <T>(new MSONot <T>(new MSOAnd <T>(new MSOIsSingleton <T>(z), new MSOSubset <T>(z, X))),
                                                                                               new MSOPredicate <T>(isWordLetter, z)))))));

            var psi2        = new MSOAnd <T>(xy, phi);
            var atLeast2wEE = new MSOExists <T>(x, new MSOExists <T>(y, psi2));
            var psi1        = new MSOAnd <T>(new MSOIsSingleton <T>(x), new MSOPredicate <T>(isDigit, x));
            var aut_psi1    = psi1.GetAutomaton(ca);
            //aut_psi1.ShowGraph("SFA(psi1)");
            var atLeast1d = new MSOExists <T>(x, psi1);
            var psi       = new MSOAnd <T>(atLeast2wEE, atLeast1d);
            var aut1      = psi.GetAutomaton(ca);
            //var aut_atLeast1d = atLeast1d.GetAutomaton(solver);

            var aut2 = regexConverter.Convert(@"\w.*\w", System.Text.RegularExpressions.RegexOptions.Singleline).Intersect(regexConverter.Convert(@"\d"));

            //aut1.ShowGraph("aut1");
            //aut2.ShowGraph("aut2");

            bool equiv = aut2.IsEquivalentWith(BasicAutomata.Restrict(aut1));

            Assert.IsTrue(equiv);

            //solver.ShowGraph(aut_atLeast1d, "aut_atLeast1d");

            var aut_psi2 = psi2.GetAutomaton(ca);
            // var aut_atLeast2wEE = atLeast2wEE.GetAutomaton(ca, "x", "y");
            // var aut_atLeast2wEE2 = atLeast2wEE.GetAutomaton(solver);
            //aut_psi2.ShowGraph("SFA(psi2)");
            //aut_atLeast2wEE.ShowGraph("aut_atLeast2wEE");
            //aut_atLeast2wEE2.ShowGraph("aut_atLeast2wEE2");
            //solver.ShowGraph(aut_atLeast2wEE2, "aut_atLeast2wEE2");
        }
Exemplo n.º 5
0
        public virtual void TestIntersect()
        {
            for (int i = 0; i < numIterations; i++)
            {
                string                   reg       = AutomatonTestUtil.RandomRegexp(Random);
                Automaton                automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
                CompiledAutomaton        ca        = new CompiledAutomaton(automaton, SpecialOperations.IsFinite(automaton), false);
                TermsEnum                te        = MultiFields.GetTerms(reader, "field").Intersect(ca, null);
                Automaton                expected  = BasicOperations.Intersection(termsAutomaton, automaton);
                JCG.SortedSet <BytesRef> found     = new JCG.SortedSet <BytesRef>();
                while (te.Next() != null)
                {
                    found.Add(BytesRef.DeepCopyOf(te.Term));
                }

                Automaton actual = BasicAutomata.MakeStringUnion(found);
                Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
            }
        }
Exemplo n.º 6
0
        /// <summary>
        /// Convert Lucene wildcard syntax into an automaton.
        /// <para/>
        /// @lucene.internal
        /// </summary>
        public static Automaton ToAutomaton(Term wildcardquery)
        {
            IList <Automaton> automata = new JCG.List <Automaton>();

            string wildcardText = wildcardquery.Text;

            for (int i = 0; i < wildcardText.Length;)
            {
                int c      = Character.CodePointAt(wildcardText, i);
                int length = Character.CharCount(c);
                switch (c)
                {
                case WILDCARD_STRING:
                    automata.Add(BasicAutomata.MakeAnyString());
                    break;

                case WILDCARD_CHAR:
                    automata.Add(BasicAutomata.MakeAnyChar());
                    break;

                case WILDCARD_ESCAPE:
                    // add the next codepoint instead, if it exists
                    if (i + length < wildcardText.Length)
                    {
                        int nextChar = Character.CodePointAt(wildcardText, i + length);
                        length += Character.CharCount(nextChar);
                        automata.Add(BasicAutomata.MakeChar(nextChar));
                        break;
                    }     // else fallthru, lenient parsing with a trailing \
                    goto default;

                default:
                    automata.Add(BasicAutomata.MakeChar(c));
                    break;
                }
                i += length;
            }

            return(BasicOperations.Concatenate(automata));
        }
Exemplo n.º 7
0
        public void TestBoost()
        {
            CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.MakeString("on"));
            Analyzer oneStopAnalyzer      = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true, stopSet);

            PrecedenceQueryParser qp = new PrecedenceQueryParser();

            qp.Analyzer = (oneStopAnalyzer);
            Query q = qp.Parse("on^1.0", "field");

            assertNotNull(q);
            q = qp.Parse("\"hello\"^2.0", "field");
            assertNotNull(q);
            assertEquals(q.Boost, (float)2.0, (float)0.5);
            q = qp.Parse("hello^2.0", "field");
            assertNotNull(q);
            assertEquals(q.Boost, (float)2.0, (float)0.5);
            q = qp.Parse("\"on\"^1.0", "field");
            assertNotNull(q);

            q = GetParser(new MockAnalyzer(Random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).Parse("the^3",
                                                                                                                       "field");
            assertNotNull(q);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Initialize levenshtein DFAs up to maxDistance, if possible </summary>
        private IList <CompiledAutomaton> InitAutomata(int maxDistance)
        {
            IList <CompiledAutomaton> runAutomata = dfaAtt.Automata;

            //System.out.println("cached automata size: " + runAutomata.size());
            if (runAutomata.Count <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)
            {
                LevenshteinAutomata builder = new LevenshteinAutomata(UnicodeUtil.NewString(m_termText, m_realPrefixLength, m_termText.Length - m_realPrefixLength), transpositions);

                for (int i = runAutomata.Count; i <= maxDistance; i++)
                {
                    Automaton a = builder.ToAutomaton(i);
                    //System.out.println("compute automaton n=" + i);
                    // constant prefix
                    if (m_realPrefixLength > 0)
                    {
                        Automaton prefix = BasicAutomata.MakeString(UnicodeUtil.NewString(m_termText, 0, m_realPrefixLength));
                        a = BasicOperations.Concatenate(prefix, a);
                    }
                    runAutomata.Add(new CompiledAutomaton(a, true, false));
                }
            }
            return(runAutomata);
        }
Exemplo n.º 9
0
        public virtual void TestFiniteVersusInfinite()
        {
            for (int i = 0; i < numIterations; i++)
            {
                string           reg          = AutomatonTestUtil.RandomRegexp(Random);
                Automaton        automaton    = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
                IList <BytesRef> matchedTerms = new List <BytesRef>();
                foreach (BytesRef t in terms)
                {
                    if (BasicOperations.Run(automaton, t.Utf8ToString()))
                    {
                        matchedTerms.Add(t);
                    }
                }

                Automaton alternate = BasicAutomata.MakeStringUnion(matchedTerms);
                //System.out.println("match " + matchedTerms.Size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
                //AutomatonTestUtil.minimizeSimple(alternate);
                //System.out.println("minmize done");
                AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
                AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
                CheckHits.CheckEqual(a1, searcher.Search(a1, 25).ScoreDocs, searcher.Search(a2, 25).ScoreDocs);
            }
        }
Exemplo n.º 10
0
 public AutomatonProviderAnonymousInnerClassHelper(TestRegexpQuery outerInstance)
 {
     this.OuterInstance  = outerInstance;
     quickBrownAutomaton = BasicOperations.Union(new Automaton[] { BasicAutomata.MakeString("quick"), BasicAutomata.MakeString("brown"), BasicAutomata.MakeString("bob") });
 }
Exemplo n.º 11
0
        public virtual void TestSynHangingOverEnd()
        {
            TokenStream ts       = new CannedTokenStream(new Token[] { Token("a", 1, 1), Token("X", 0, 10) });
            Automaton   actual   = (new TokenStreamToAutomaton()).ToAutomaton(ts);
            Automaton   expected = BasicOperations.Union(BasicAutomata.MakeString("a"), BasicAutomata.MakeString("X"));

            Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
        }
Exemplo n.º 12
0
        public virtual void TestSynOverHole2()
        {
            TokenStream ts       = new CannedTokenStream(new Token[] { Token("xyz", 1, 1), Token("abc", 0, 3), Token("def", 2, 1) });
            Automaton   actual   = (new TokenStreamToAutomaton()).ToAutomaton(ts);
            Automaton   expected = BasicOperations.Union(Join(S2a("xyz"), SEP_A, HOLE_A, SEP_A, S2a("def")), BasicAutomata.MakeString("abc"));

            Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
        }
Exemplo n.º 13
0
        public virtual void TestSynOverHole()
        {
            TokenStream ts       = new CannedTokenStream(new Token[] { Token("a", 1, 1), Token("X", 0, 2), Token("b", 2, 1) });
            Automaton   actual   = (new TokenStreamToAutomaton()).ToAutomaton(ts);
            Automaton   a1       = BasicOperations.Union(Join(S2a("a"), SEP_A, HOLE_A), BasicAutomata.MakeString("X"));
            Automaton   expected = BasicOperations.Concatenate(a1, Join(SEP_A, S2a("b")));

            //toDot(actual);
            Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
        }
Exemplo n.º 14
0
 private Automaton S2a(string s)
 {
     return(BasicAutomata.MakeString(s));
 }
Exemplo n.º 15
0
        public virtual void TestIntersectRandom()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            int numTerms = AtLeast(300);
            //final int numTerms = 50;

            HashSet <string>             terms        = new HashSet <string>();
            ICollection <string>         pendingTerms = new List <string>();
            IDictionary <BytesRef, int?> termToID     = new Dictionary <BytesRef, int?>();
            int id = 0;

            while (terms.Count != numTerms)
            {
                string s = RandomString;
                if (!terms.Contains(s))
                {
                    terms.Add(s);
                    pendingTerms.Add(s);
                    if (Random.Next(20) == 7)
                    {
                        AddDoc(w, pendingTerms, termToID, id++);
                    }
                }
            }
            AddDoc(w, pendingTerms, termToID, id++);

            BytesRef[]         termsArray = new BytesRef[terms.Count];
            HashSet <BytesRef> termsSet   = new HashSet <BytesRef>();

            {
                int upto = 0;
                foreach (string s in terms)
                {
                    BytesRef b = new BytesRef(s);
                    termsArray[upto++] = b;
                    termsSet.Add(b);
                }
                Array.Sort(termsArray);
            }

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: indexed terms (unicode order):");
                foreach (BytesRef t in termsArray)
                {
                    Console.WriteLine("  " + t.Utf8ToString() + " -> id:" + termToID[t]);
                }
            }

            IndexReader r = w.GetReader();

            w.Dispose();

            // NOTE: intentional insanity!!
            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);

            for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++)
            {
                // TODO: can we also test infinite As here...?

                // From the random terms, pick some ratio and compile an
                // automaton:
                HashSet <string>     acceptTerms       = new HashSet <string>();
                SortedSet <BytesRef> sortedAcceptTerms = new SortedSet <BytesRef>();
                double    keepPct = Random.NextDouble();
                Automaton a;
                if (iter == 0)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: empty automaton");
                    }
                    a = BasicAutomata.MakeEmpty();
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: keepPct=" + keepPct);
                    }
                    foreach (string s in terms)
                    {
                        string s2;
                        if (Random.NextDouble() <= keepPct)
                        {
                            s2 = s;
                        }
                        else
                        {
                            s2 = RandomString;
                        }
                        acceptTerms.Add(s2);
                        sortedAcceptTerms.Add(new BytesRef(s2));
                    }
                    a = BasicAutomata.MakeStringUnion(sortedAcceptTerms);
                }

                if (Random.NextBoolean())
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reduce the automaton");
                    }
                    a.Reduce();
                }

                CompiledAutomaton c = new CompiledAutomaton(a, true, false);

                BytesRef[]         acceptTermsArray = new BytesRef[acceptTerms.Count];
                HashSet <BytesRef> acceptTermsSet   = new HashSet <BytesRef>();
                int upto = 0;
                foreach (string s in acceptTerms)
                {
                    BytesRef b = new BytesRef(s);
                    acceptTermsArray[upto++] = b;
                    acceptTermsSet.Add(b);
                    Assert.IsTrue(Accepts(c, b));
                }
                Array.Sort(acceptTermsArray);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: accept terms (unicode order):");
                    foreach (BytesRef t in acceptTermsArray)
                    {
                        Console.WriteLine("  " + t.Utf8ToString() + (termsSet.Contains(t) ? " (exists)" : ""));
                    }
                    Console.WriteLine(a.ToDot());
                }

                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    BytesRef startTerm = acceptTermsArray.Length == 0 || Random.NextBoolean() ? null : acceptTermsArray[Random.Next(acceptTermsArray.Length)];

                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.Utf8ToString()));

                        if (startTerm != null)
                        {
                            int state = c.RunAutomaton.InitialState;
                            for (int idx = 0; idx < startTerm.Length; idx++)
                            {
                                int label = startTerm.Bytes[startTerm.Offset + idx] & 0xff;
                                Console.WriteLine("  state=" + state + " label=" + label);
                                state = c.RunAutomaton.Step(state, label);
                                Assert.IsTrue(state != -1);
                            }
                            Console.WriteLine("  state=" + state);
                        }
                    }

                    TermsEnum te = MultiFields.GetTerms(r, "f").Intersect(c, startTerm);

                    int loc;
                    if (startTerm == null)
                    {
                        loc = 0;
                    }
                    else
                    {
                        loc = Array.BinarySearch(termsArray, BytesRef.DeepCopyOf(startTerm));
                        if (loc < 0)
                        {
                            loc = -(loc + 1);
                        }
                        else
                        {
                            // startTerm exists in index
                            loc++;
                        }
                    }
                    while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]))
                    {
                        loc++;
                    }

                    DocsEnum docsEnum = null;
                    while (loc < termsArray.Length)
                    {
                        BytesRef expected = termsArray[loc];
                        BytesRef actual   = te.Next();
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST:   next() expected=" + expected.Utf8ToString() + " actual=" + (actual == null ? "null" : actual.Utf8ToString()));
                        }
                        Assert.AreEqual(expected, actual);
                        Assert.AreEqual(1, te.DocFreq);
                        docsEnum = TestUtil.Docs(Random, te, null, docsEnum, DocsFlags.NONE);
                        int docID = docsEnum.NextDoc();
                        Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS);
                        Assert.AreEqual(docIDToID.Get(docID), (int)termToID[expected]);
                        do
                        {
                            loc++;
                        } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]));
                    }
                    Assert.IsNull(te.Next());
                }
            }

            r.Dispose();
            dir.Dispose();
        }
Exemplo n.º 16
0
        /// <summary>
        /// Extracts all <see cref="MultiTermQuery"/>s for <paramref name="field"/>, and returns equivalent
        /// automata that will match terms.
        /// </summary>
        internal static CharacterRunAutomaton[] ExtractAutomata(Query query, string field)
        {
            List <CharacterRunAutomaton> list = new List <CharacterRunAutomaton>();

            if (query is BooleanQuery)
            {
                BooleanClause[] clauses = ((BooleanQuery)query).GetClauses();
                foreach (BooleanClause clause in clauses)
                {
                    if (!clause.IsProhibited)
                    {
                        list.AddAll(Arrays.AsList(ExtractAutomata(clause.Query, field)));
                    }
                }
            }
            else if (query is DisjunctionMaxQuery)
            {
                foreach (Query sub in ((DisjunctionMaxQuery)query).Disjuncts)
                {
                    list.AddAll(Arrays.AsList(ExtractAutomata(sub, field)));
                }
            }
            else if (query is SpanOrQuery)
            {
                foreach (Query sub in ((SpanOrQuery)query).GetClauses())
                {
                    list.AddAll(Arrays.AsList(ExtractAutomata(sub, field)));
                }
            }
            else if (query is SpanNearQuery)
            {
                foreach (Query sub in ((SpanNearQuery)query).GetClauses())
                {
                    list.AddAll(Arrays.AsList(ExtractAutomata(sub, field)));
                }
            }
            else if (query is SpanNotQuery)
            {
                list.AddAll(Arrays.AsList(ExtractAutomata(((SpanNotQuery)query).Include, field)));
            }
            else if (query is SpanPositionCheckQuery)
            {
                list.AddAll(Arrays.AsList(ExtractAutomata(((SpanPositionCheckQuery)query).Match, field)));
            }
            else if (query is ISpanMultiTermQueryWrapper)
            {
                list.AddAll(Arrays.AsList(ExtractAutomata(((ISpanMultiTermQueryWrapper)query).WrappedQuery, field)));
            }
            else if (query is AutomatonQuery)
            {
                AutomatonQuery aq = (AutomatonQuery)query;
                if (aq.Field.Equals(field, StringComparison.Ordinal))
                {
                    list.Add(new CharacterRunAutomatonToStringAnonymousHelper(aq.Automaton, () => aq.ToString()));
                }
            }
            else if (query is PrefixQuery)
            {
                PrefixQuery pq     = (PrefixQuery)query;
                Term        prefix = pq.Prefix;
                if (prefix.Field.Equals(field, StringComparison.Ordinal))
                {
                    list.Add(new CharacterRunAutomatonToStringAnonymousHelper(
                                 BasicOperations.Concatenate(BasicAutomata.MakeString(prefix.Text()), BasicAutomata.MakeAnyString()),
                                 () => pq.ToString()));
                }
            }
            else if (query is FuzzyQuery)
            {
                FuzzyQuery fq = (FuzzyQuery)query;
                if (fq.Field.Equals(field, StringComparison.Ordinal))
                {
                    string utf16    = fq.Term.Text();
                    int[]  termText = new int[utf16.CodePointCount(0, utf16.Length)];
                    for (int cp, i = 0, j = 0; i < utf16.Length; i += Character.CharCount(cp))
                    {
                        termText[j++] = cp = utf16.CodePointAt(i);
                    }
                    int    termLength             = termText.Length;
                    int    prefixLength           = Math.Min(fq.PrefixLength, termLength);
                    string suffix                 = UnicodeUtil.NewString(termText, prefixLength, termText.Length - prefixLength);
                    LevenshteinAutomata builder   = new LevenshteinAutomata(suffix, fq.Transpositions);
                    Automaton           automaton = builder.ToAutomaton(fq.MaxEdits);
                    if (prefixLength > 0)
                    {
                        Automaton prefix = BasicAutomata.MakeString(UnicodeUtil.NewString(termText, 0, prefixLength));
                        automaton = BasicOperations.Concatenate(prefix, automaton);
                    }
                    list.Add(new CharacterRunAutomatonToStringAnonymousHelper(automaton, () => fq.ToString()));
                }
            }
            else if (query is TermRangeQuery)
            {
                TermRangeQuery tq = (TermRangeQuery)query;
                if (tq.Field.Equals(field, StringComparison.Ordinal))
                {
                    // this is *not* an automaton, but its very simple
                    list.Add(new SimpleCharacterRunAutomatonAnonymousHelper(BasicAutomata.MakeEmpty(), tq));
                }
            }
            return(list.ToArray(/*new CharacterRunAutomaton[list.size()]*/));
        }
Exemplo n.º 17
0
        public virtual void TestEquals()
        {
            AutomatonQuery a1 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("foobar"));
            // reference to a1
            AutomatonQuery a2 = a1;
            // same as a1 (accepts the same language, same term)
            AutomatonQuery a3 = new AutomatonQuery(NewTerm("foobar"), BasicOperations.Concatenate(BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar")));
            // different than a1 (same term, but different language)
            AutomatonQuery a4 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("different"));
            // different than a1 (different term, same language)
            AutomatonQuery a5 = new AutomatonQuery(NewTerm("blah"), BasicAutomata.MakeString("foobar"));

            Assert.AreEqual(a1.GetHashCode(), a2.GetHashCode());
            Assert.AreEqual(a1, a2);

            Assert.AreEqual(a1.GetHashCode(), a3.GetHashCode());
            Assert.AreEqual(a1, a3);

            // different class
            AutomatonQuery w1 = new WildcardQuery(NewTerm("foobar"));
            // different class
            AutomatonQuery w2 = new RegexpQuery(NewTerm("foobar"));

            Assert.IsFalse(a1.Equals(w1));
            Assert.IsFalse(a1.Equals(w2));
            Assert.IsFalse(w1.Equals(w2));
            Assert.IsFalse(a1.Equals(a4));
            Assert.IsFalse(a1.Equals(a5));
            Assert.IsFalse(a1.Equals(null));
        }
Exemplo n.º 18
0
        public virtual void TestKeep()
        {
            CharacterRunAutomaton keepWords = new CharacterRunAutomaton(BasicOperations.Complement(Automaton.Union(new Automaton[] { BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar") })));
            Analyzer a = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true, keepWords);

            AssertAnalyzesTo(a, "quick foo brown bar bar fox foo", new string[] { "foo", "bar", "bar", "foo" }, new int[] { 2, 2, 1, 2 });
        }