A fast regular expression query based on the Lucene.Net.Util.Automaton package.
  • Comparisons are fast
  • The term dictionary is enumerated in an intelligent way, to avoid comparisons. See AutomatonQuery for more details.

The supported syntax is documented in the RegExp class. Note this might be different than other regular expression implementations. For some alternatives with different syntax, look under the sandbox.

Note this query can be slow, as it needs to iterate over many terms. In order to prevent extremely slow RegexpQueries, a Regexp term should not start with the expression .*

Inheritance: AutomatonQuery
        public virtual void TestEquals()
        {
            AutomatonQuery a1 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("foobar"));
            // reference to a1
            AutomatonQuery a2 = a1;
            // same as a1 (accepts the same language, same term)
            AutomatonQuery a3 = new AutomatonQuery(NewTerm("foobar"), BasicOperations.Concatenate(BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar")));
            // different than a1 (same term, but different language)
            AutomatonQuery a4 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("different"));
            // different than a1 (different term, same language)
            AutomatonQuery a5 = new AutomatonQuery(NewTerm("blah"), BasicAutomata.MakeString("foobar"));

            Assert.AreEqual(a1.GetHashCode(), a2.GetHashCode());
            Assert.AreEqual(a1, a2);

            Assert.AreEqual(a1.GetHashCode(), a3.GetHashCode());
            Assert.AreEqual(a1, a3);

            // different class
            AutomatonQuery w1 = new WildcardQuery(NewTerm("foobar"));
            // different class
            AutomatonQuery w2 = new RegexpQuery(NewTerm("foobar"));

            Assert.IsFalse(a1.Equals(w1));
            Assert.IsFalse(a1.Equals(w2));
            Assert.IsFalse(w1.Equals(w2));
            Assert.IsFalse(a1.Equals(a4));
            Assert.IsFalse(a1.Equals(a5));
            Assert.IsFalse(a1.Equals(null));
        }
Beispiel #2
0
        public virtual void TestCustomProvider()
        {
            IAutomatonProvider myProvider = new AutomatonProviderAnonymousInnerClassHelper(this);
            RegexpQuery        query      = new RegexpQuery(NewTerm("<quickBrown>"), RegExpSyntax.ALL, myProvider);

            Assert.AreEqual(1, Searcher.Search(query, 5).TotalHits);
        }
Beispiel #3
0
        private void AssertPatternHits(string pattern, int numHits)
        {
            Query   wq   = new RegexpQuery(new Term("field", FillPattern(pattern)));
            TopDocs docs = Searcher.Search(wq, 25);

            Assert.AreEqual(numHits, docs.TotalHits, "Incorrect hits for pattern: " + pattern);
        }
Beispiel #4
0
        /// <summary>
        /// check that the # of hits is the same as from a very
        /// simple regexpquery implementation.
        /// </summary>
        protected internal virtual void AssertSame(string regexp)
        {
            RegexpQuery     smart = new RegexpQuery(new Term(FieldName, regexp), RegExp.NONE);
            DumbRegexpQuery dumb  = new DumbRegexpQuery(this, new Term(FieldName, regexp), RegExp.NONE);

            TopDocs smartDocs = Searcher1.Search(smart, 25);
            TopDocs dumbDocs  = Searcher2.Search(dumb, 25);

            CheckHits.CheckEqual(smart, smartDocs.ScoreDocs, dumbDocs.ScoreDocs);
        }
Beispiel #5
0
        /// <summary>
        /// check that the # of hits is the same as if the query
        /// is run against the inverted index
        /// </summary>
        protected internal virtual void AssertSame(string regexp)
        {
            RegexpQuery docValues = new RegexpQuery(new Term(fieldName, regexp), RegExpSyntax.NONE);

            docValues.MultiTermRewriteMethod = (new DocTermOrdsRewriteMethod());
            RegexpQuery inverted = new RegexpQuery(new Term(fieldName, regexp), RegExpSyntax.NONE);

            TopDocs invertedDocs  = searcher1.Search(inverted, 25);
            TopDocs docValuesDocs = searcher2.Search(docValues, 25);

            CheckHits.CheckEqual(inverted, invertedDocs.ScoreDocs, docValuesDocs.ScoreDocs);
        }
        /// <summary>
        /// Test fieldcache rewrite against filter rewrite </summary>
        protected internal override void AssertSame(string regexp)
        {
            RegexpQuery fieldCache = new RegexpQuery(new Term(FieldName, regexp), RegExp.NONE);
            fieldCache.SetRewriteMethod(new FieldCacheRewriteMethod());

            RegexpQuery filter = new RegexpQuery(new Term(FieldName, regexp), RegExp.NONE);
            filter.SetRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);

            TopDocs fieldCacheDocs = Searcher1.Search(fieldCache, 25);
            TopDocs filterDocs = Searcher2.Search(filter, 25);

            CheckHits.CheckEqual(fieldCache, fieldCacheDocs.ScoreDocs, filterDocs.ScoreDocs);
        }
Beispiel #7
0
        /// <summary>
        /// Test fieldcache rewrite against filter rewrite </summary>
        protected internal override void AssertSame(string regexp)
        {
            RegexpQuery fieldCache = new RegexpQuery(new Term(fieldName, regexp), RegExpSyntax.NONE);

            fieldCache.MultiTermRewriteMethod = (new FieldCacheRewriteMethod());

            RegexpQuery filter = new RegexpQuery(new Term(fieldName, regexp), RegExpSyntax.NONE);

            filter.MultiTermRewriteMethod = (MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);

            TopDocs fieldCacheDocs = searcher1.Search(fieldCache, 25);
            TopDocs filterDocs     = searcher2.Search(filter, 25);

            CheckHits.CheckEqual(fieldCache, fieldCacheDocs.ScoreDocs, filterDocs.ScoreDocs);
        }
        public virtual void TestEquals()
        {
            RegexpQuery a1 = new RegexpQuery(new Term(FieldName, "[aA]"), RegExp.NONE);
            RegexpQuery a2 = new RegexpQuery(new Term(FieldName, "[aA]"), RegExp.NONE);
            RegexpQuery b = new RegexpQuery(new Term(FieldName, "[bB]"), RegExp.NONE);
            Assert.AreEqual(a1, a2);
            Assert.IsFalse(a1.Equals(b));

            a1.SetRewriteMethod(new FieldCacheRewriteMethod());
            a2.SetRewriteMethod(new FieldCacheRewriteMethod());
            b.SetRewriteMethod(new FieldCacheRewriteMethod());
            Assert.AreEqual(a1, a2);
            Assert.IsFalse(a1.Equals(b));
            QueryUtils.Check(a1);
        }
Beispiel #9
0
        public virtual void TestEquals()
        {
            RegexpQuery a1 = new RegexpQuery(new Term(FieldName, "[aA]"), RegExp.NONE);
            RegexpQuery a2 = new RegexpQuery(new Term(FieldName, "[aA]"), RegExp.NONE);
            RegexpQuery b  = new RegexpQuery(new Term(FieldName, "[bB]"), RegExp.NONE);

            Assert.AreEqual(a1, a2);
            Assert.IsFalse(a1.Equals(b));

            a1.SetRewriteMethod(new FieldCacheRewriteMethod());
            a2.SetRewriteMethod(new FieldCacheRewriteMethod());
            b.SetRewriteMethod(new FieldCacheRewriteMethod());
            Assert.AreEqual(a1, a2);
            Assert.IsFalse(a1.Equals(b));
            QueryUtils.Check(a1);
        }
        public virtual Query Build(IQueryNode queryNode)
        {
            RegexpQueryNode regexpNode = (RegexpQueryNode)queryNode;

            RegexpQuery q = new RegexpQuery(new Term(regexpNode.GetFieldAsString(),
                regexpNode.TextToBytesRef()));

            MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod)queryNode
                .GetTag(MultiTermRewriteMethodProcessor.TAG_ID);
            if (method != null)
            {
                q.SetRewriteMethod(method);
            }

            return q;
        }
Beispiel #11
0
        public virtual void TestEquals()
        {
            RegexpQuery a1 = new RegexpQuery(new Term(fieldName, "[aA]"), RegExpSyntax.NONE);
            RegexpQuery a2 = new RegexpQuery(new Term(fieldName, "[aA]"), RegExpSyntax.NONE);
            RegexpQuery b  = new RegexpQuery(new Term(fieldName, "[bB]"), RegExpSyntax.NONE);

            Assert.AreEqual(a1, a2);
            Assert.IsFalse(a1.Equals(b));

            a1.MultiTermRewriteMethod = (new DocTermOrdsRewriteMethod());
            a2.MultiTermRewriteMethod = (new DocTermOrdsRewriteMethod());
            b.MultiTermRewriteMethod  = (new DocTermOrdsRewriteMethod());
            Assert.AreEqual(a1, a2);
            Assert.IsFalse(a1.Equals(b));
            QueryUtils.Check(a1);
        }
Beispiel #12
0
        private int RegexQueryNrHits(string regex)
        {
            RegexpQuery query = new RegexpQuery(NewTerm(regex));

            return(Searcher.Search(query, 5).TotalHits);
        }
        /// <summary>
        /// check that the # of hits is the same as if the query
        /// is run against the inverted index
        /// </summary>
        protected internal virtual void AssertSame(string regexp)
        {
            RegexpQuery docValues = new RegexpQuery(new Term(FieldName, regexp), RegExp.NONE);
            docValues.SetRewriteMethod(new DocTermOrdsRewriteMethod());
            RegexpQuery inverted = new RegexpQuery(new Term(FieldName, regexp), RegExp.NONE);

            TopDocs invertedDocs = Searcher1.Search(inverted, 25);
            TopDocs docValuesDocs = Searcher2.Search(docValues, 25);

            CheckHits.CheckEqual(inverted, invertedDocs.ScoreDocs, docValuesDocs.ScoreDocs);
        }
 public virtual void TestCustomProvider()
 {
     AutomatonProvider myProvider = new AutomatonProviderAnonymousInnerClassHelper(this);
     RegexpQuery query = new RegexpQuery(NewTerm("<quickBrown>"), RegExp.ALL, myProvider);
     Assert.AreEqual(1, Searcher.Search(query, 5).TotalHits);
 }
        public virtual void TestEquals()
        {
            AutomatonQuery a1 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("foobar"));
            // reference to a1
            AutomatonQuery a2 = a1;
            // same as a1 (accepts the same language, same term)
            AutomatonQuery a3 = new AutomatonQuery(NewTerm("foobar"), BasicOperations.Concatenate(BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar")));
            // different than a1 (same term, but different language)
            AutomatonQuery a4 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("different"));
            // different than a1 (different term, same language)
            AutomatonQuery a5 = new AutomatonQuery(NewTerm("blah"), BasicAutomata.MakeString("foobar"));

            Assert.AreEqual(a1.GetHashCode(), a2.GetHashCode());
            Assert.AreEqual(a1, a2);

            Assert.AreEqual(a1.GetHashCode(), a3.GetHashCode());
            Assert.AreEqual(a1, a3);

            // different class
            AutomatonQuery w1 = new WildcardQuery(NewTerm("foobar"));
            // different class
            AutomatonQuery w2 = new RegexpQuery(NewTerm("foobar"));

            Assert.IsFalse(a1.Equals(w1));
            Assert.IsFalse(a1.Equals(w2));
            Assert.IsFalse(w1.Equals(w2));
            Assert.IsFalse(a1.Equals(a4));
            Assert.IsFalse(a1.Equals(a5));
            Assert.IsFalse(a1.Equals(null));
        }
 private int RegexQueryNrHits(string regex)
 {
     RegexpQuery query = new RegexpQuery(NewTerm(regex));
     return Searcher.Search(query, 5).TotalHits;
 }
Beispiel #17
0
        public void TestRegexps()
        {
            StandardQueryParser qp = new StandardQueryParser();
            String df = "field";
            RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
            assertEquals(q, qp.Parse("/[a-z][123]/", df));
            qp.LowercaseExpandedTerms = (true);
            assertEquals(q, qp.Parse("/[A-Z][123]/", df));
            q.Boost = (0.5f);
            assertEquals(q, qp.Parse("/[A-Z][123]/^0.5", df));
            qp.MultiTermRewriteMethod = (MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
            q.SetRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); // LUCENENET TODO: Inconsistent API betwen RegexpQuery and StandardQueryParser
            assertTrue(qp.Parse("/[A-Z][123]/^0.5", df) is RegexpQuery);
            assertEquals(q, qp.Parse("/[A-Z][123]/^0.5", df));
            assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.Parse("/[A-Z][123]/^0.5", df)).GetRewriteMethod());
            qp.MultiTermRewriteMethod = (MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);

            Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
            assertEquals(escaped, qp.Parse("/[a-z]\\/[123]/", df));
            Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
            assertEquals(escaped2, qp.Parse("/[a-z]\\*[123]/", df));

            BooleanQuery complex = new BooleanQuery();
            complex.Add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), BooleanClause.Occur.MUST);
            complex.Add(new TermQuery(new Term("path", "/etc/init.d/")), BooleanClause.Occur.MUST);
            complex.Add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), BooleanClause.Occur.SHOULD);
            assertEquals(complex, qp.Parse("/[a-z]\\/[123]/ AND path:\"/etc/init.d/\" OR \"/etc\\/init\\[.\\]d/lucene/\" ", df));

            Query re = new RegexpQuery(new Term("field", "http.*"));
            assertEquals(re, qp.Parse("field:/http.*/", df));
            assertEquals(re, qp.Parse("/http.*/", df));

            re = new RegexpQuery(new Term("field", "http~0.5"));
            assertEquals(re, qp.Parse("field:/http~0.5/", df));
            assertEquals(re, qp.Parse("/http~0.5/", df));

            re = new RegexpQuery(new Term("field", "boo"));
            assertEquals(re, qp.Parse("field:/boo/", df));
            assertEquals(re, qp.Parse("/boo/", df));


            assertEquals(new TermQuery(new Term("field", "/boo/")), qp.Parse("\"/boo/\"", df));
            assertEquals(new TermQuery(new Term("field", "/boo/")), qp.Parse("\\/boo\\/", df));

            BooleanQuery two = new BooleanQuery();
            two.Add(new RegexpQuery(new Term("field", "foo")), BooleanClause.Occur.SHOULD);
            two.Add(new RegexpQuery(new Term("field", "bar")), BooleanClause.Occur.SHOULD);
            assertEquals(two, qp.Parse("field:/foo/ field:/bar/", df));
            assertEquals(two, qp.Parse("/foo/ /bar/", df));
        }
Beispiel #18
0
        public virtual void TestRegexps()
        {
            ICommonQueryParserConfiguration qp = GetParserConfig(new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false));
            RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
            assertEquals(q, GetQuery("/[a-z][123]/", qp));
            qp.LowercaseExpandedTerms = (true);
            assertEquals(q, GetQuery("/[A-Z][123]/", qp));
            q.Boost = (0.5f);
            assertEquals(q, GetQuery("/[A-Z][123]/^0.5", qp));
            qp.MultiTermRewriteMethod=(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
            q.SetRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
            assertTrue(GetQuery("/[A-Z][123]/^0.5", qp) is RegexpQuery);
            assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)GetQuery("/[A-Z][123]/^0.5", qp)).GetRewriteMethod());
            assertEquals(q, GetQuery("/[A-Z][123]/^0.5", qp));
            qp.MultiTermRewriteMethod=(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);

            Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
            assertEquals(escaped, GetQuery("/[a-z]\\/[123]/", qp));
            Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
            assertEquals(escaped2, GetQuery("/[a-z]\\*[123]/", qp));

            BooleanQuery complex = new BooleanQuery();
            complex.Add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), BooleanClause.Occur.MUST);
            complex.Add(new TermQuery(new Term("path", "/etc/init.d/")), BooleanClause.Occur.MUST);
            complex.Add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), BooleanClause.Occur.SHOULD);
            assertEquals(complex, GetQuery("/[a-z]\\/[123]/ AND path:\"/etc/init.d/\" OR \"/etc\\/init\\[.\\]d/lucene/\" ", qp));

            Query re = new RegexpQuery(new Term("field", "http.*"));
            assertEquals(re, GetQuery("field:/http.*/", qp));
            assertEquals(re, GetQuery("/http.*/", qp));

            re = new RegexpQuery(new Term("field", "http~0.5"));
            assertEquals(re, GetQuery("field:/http~0.5/", qp));
            assertEquals(re, GetQuery("/http~0.5/", qp));

            re = new RegexpQuery(new Term("field", "boo"));
            assertEquals(re, GetQuery("field:/boo/", qp));
            assertEquals(re, GetQuery("/boo/", qp));

            assertEquals(new TermQuery(new Term("field", "/boo/")), GetQuery("\"/boo/\"", qp));
            assertEquals(new TermQuery(new Term("field", "/boo/")), GetQuery("\\/boo\\/", qp));

            BooleanQuery two = new BooleanQuery();
            two.Add(new RegexpQuery(new Term("field", "foo")), BooleanClause.Occur.SHOULD);
            two.Add(new RegexpQuery(new Term("field", "bar")), BooleanClause.Occur.SHOULD);
            assertEquals(two, GetQuery("field:/foo/ field:/bar/", qp));
            assertEquals(two, GetQuery("/foo/ /bar/", qp));
        }
        /// <summary>
        /// check that the # of hits is the same as from a very
        /// simple regexpquery implementation.
        /// </summary>
        protected internal virtual void AssertSame(string regexp)
        {
            RegexpQuery smart = new RegexpQuery(new Term(FieldName, regexp), RegExp.NONE);
            DumbRegexpQuery dumb = new DumbRegexpQuery(new Term(FieldName, regexp), RegExp.NONE);

            TopDocs smartDocs = Searcher1.Search(smart, 25);
            TopDocs dumbDocs = Searcher2.Search(dumb, 25);

            CheckHits.CheckEqual(smart, smartDocs.ScoreDocs, dumbDocs.ScoreDocs);
        }
Beispiel #20
0
        public void TestRegexQueryParsing()
        {
            String[]
            fields = { "b", "t" };

            StandardQueryParser parser = new StandardQueryParser();
            parser.SetMultiFields(fields);
            parser.DefaultOperator = (StandardQueryConfigHandler.Operator.AND);
            parser.Analyzer = (new MockAnalyzer(Random()));

            BooleanQuery exp = new BooleanQuery();
            exp.Add(new BooleanClause(new RegexpQuery(new Term("b", "ab.+")), BooleanClause.Occur.SHOULD));//TODO spezification? was "MUST"
            exp.Add(new BooleanClause(new RegexpQuery(new Term("t", "ab.+")), BooleanClause.Occur.SHOULD));//TODO spezification? was "MUST"

            assertEquals(exp, parser.Parse("/ab.+/", null));

            RegexpQuery regexpQueryexp = new RegexpQuery(new Term("test", "[abc]?[0-9]"));

            assertEquals(regexpQueryexp, parser.Parse("test:/[abc]?[0-9]/", null));

        }
 private void AssertPatternHits(string pattern, int numHits)
 {
     Query wq = new RegexpQuery(new Term("field", FillPattern(pattern)));
     TopDocs docs = Searcher.Search(wq, 25);
     Assert.AreEqual(numHits, docs.TotalHits, "Incorrect hits for pattern: " + pattern);
 }
 /// <summary>
 /// Builds a new RegexpQuery instance
 /// </summary>
 /// <param name="regexp">Regexp term</param>
 /// <returns>new RegexpQuery instance</returns>
 protected internal virtual Query NewRegexpQuery(Term regexp)
 {
     RegexpQuery query = new RegexpQuery(regexp);
     query.SetRewriteMethod(MultiTermRewriteMethod);
     return query;
 }