A fast regular expression query based on the Lucene.Net.Util.Automaton package.
  • Comparisons are fast
  • The term dictionary is enumerated in an intelligent way, to avoid comparisons. See AutomatonQuery for more details.

The supported syntax is documented in the RegExp class. Note this might be different than other regular expression implementations. For some alternatives with different syntax, look under the sandbox.

Note this query can be slow, as it needs to iterate over many terms. In order to prevent extremely slow RegexpQueries, a Regexp term should not start with the expression .*

Inheritance: AutomatonQuery
コード例 #1
0
        public virtual void TestEquals()
        {
            AutomatonQuery a1 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("foobar"));
            // reference to a1
            AutomatonQuery a2 = a1;
            // same as a1 (accepts the same language, same term)
            AutomatonQuery a3 = new AutomatonQuery(NewTerm("foobar"), BasicOperations.Concatenate(BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar")));
            // different than a1 (same term, but different language)
            AutomatonQuery a4 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("different"));
            // different than a1 (different term, same language)
            AutomatonQuery a5 = new AutomatonQuery(NewTerm("blah"), BasicAutomata.MakeString("foobar"));

            Assert.AreEqual(a1.GetHashCode(), a2.GetHashCode());
            Assert.AreEqual(a1, a2);

            Assert.AreEqual(a1.GetHashCode(), a3.GetHashCode());
            Assert.AreEqual(a1, a3);

            // different class
            AutomatonQuery w1 = new WildcardQuery(NewTerm("foobar"));
            // different class
            AutomatonQuery w2 = new RegexpQuery(NewTerm("foobar"));

            Assert.IsFalse(a1.Equals(w1));
            Assert.IsFalse(a1.Equals(w2));
            Assert.IsFalse(w1.Equals(w2));
            Assert.IsFalse(a1.Equals(a4));
            Assert.IsFalse(a1.Equals(a5));
            Assert.IsFalse(a1.Equals(null));
        }
コード例 #2
0
ファイル: TestRegexpQuery.cs プロジェクト: clieben/lucenenet
        public virtual void TestCustomProvider()
        {
            IAutomatonProvider myProvider = new AutomatonProviderAnonymousInnerClassHelper(this);
            RegexpQuery        query      = new RegexpQuery(NewTerm("<quickBrown>"), RegExpSyntax.ALL, myProvider);

            Assert.AreEqual(1, Searcher.Search(query, 5).TotalHits);
        }
コード例 #3
0
        private void AssertPatternHits(string pattern, int numHits)
        {
            Query   wq   = new RegexpQuery(new Term("field", FillPattern(pattern)));
            TopDocs docs = Searcher.Search(wq, 25);

            Assert.AreEqual(numHits, docs.TotalHits, "Incorrect hits for pattern: " + pattern);
        }
コード例 #4
0
        /// <summary>
        /// check that the # of hits is the same as from a very
        /// simple regexpquery implementation.
        /// </summary>
        protected internal virtual void AssertSame(string regexp)
        {
            RegexpQuery     smart = new RegexpQuery(new Term(FieldName, regexp), RegExp.NONE);
            DumbRegexpQuery dumb  = new DumbRegexpQuery(this, new Term(FieldName, regexp), RegExp.NONE);

            TopDocs smartDocs = Searcher1.Search(smart, 25);
            TopDocs dumbDocs  = Searcher2.Search(dumb, 25);

            CheckHits.CheckEqual(smart, smartDocs.ScoreDocs, dumbDocs.ScoreDocs);
        }
コード例 #5
0
        /// <summary>
        /// check that the # of hits is the same as if the query
        /// is run against the inverted index
        /// </summary>
        protected internal virtual void AssertSame(string regexp)
        {
            RegexpQuery docValues = new RegexpQuery(new Term(fieldName, regexp), RegExpSyntax.NONE);

            docValues.MultiTermRewriteMethod = (new DocTermOrdsRewriteMethod());
            RegexpQuery inverted = new RegexpQuery(new Term(fieldName, regexp), RegExpSyntax.NONE);

            TopDocs invertedDocs  = searcher1.Search(inverted, 25);
            TopDocs docValuesDocs = searcher2.Search(docValues, 25);

            CheckHits.CheckEqual(inverted, invertedDocs.ScoreDocs, docValuesDocs.ScoreDocs);
        }
コード例 #6
0
        /// <summary>
        /// Test fieldcache rewrite against filter rewrite </summary>
        protected internal override void AssertSame(string regexp)
        {
            RegexpQuery fieldCache = new RegexpQuery(new Term(FieldName, regexp), RegExp.NONE);
            fieldCache.SetRewriteMethod(new FieldCacheRewriteMethod());

            RegexpQuery filter = new RegexpQuery(new Term(FieldName, regexp), RegExp.NONE);
            filter.SetRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);

            TopDocs fieldCacheDocs = Searcher1.Search(fieldCache, 25);
            TopDocs filterDocs = Searcher2.Search(filter, 25);

            CheckHits.CheckEqual(fieldCache, fieldCacheDocs.ScoreDocs, filterDocs.ScoreDocs);
        }
コード例 #7
0
        /// <summary>
        /// Test fieldcache rewrite against filter rewrite </summary>
        protected internal override void AssertSame(string regexp)
        {
            RegexpQuery fieldCache = new RegexpQuery(new Term(fieldName, regexp), RegExpSyntax.NONE);

            fieldCache.MultiTermRewriteMethod = (new FieldCacheRewriteMethod());

            RegexpQuery filter = new RegexpQuery(new Term(fieldName, regexp), RegExpSyntax.NONE);

            filter.MultiTermRewriteMethod = (MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);

            TopDocs fieldCacheDocs = searcher1.Search(fieldCache, 25);
            TopDocs filterDocs     = searcher2.Search(filter, 25);

            CheckHits.CheckEqual(fieldCache, fieldCacheDocs.ScoreDocs, filterDocs.ScoreDocs);
        }
コード例 #8
0
        public virtual void TestEquals()
        {
            RegexpQuery a1 = new RegexpQuery(new Term(FieldName, "[aA]"), RegExp.NONE);
            RegexpQuery a2 = new RegexpQuery(new Term(FieldName, "[aA]"), RegExp.NONE);
            RegexpQuery b = new RegexpQuery(new Term(FieldName, "[bB]"), RegExp.NONE);
            Assert.AreEqual(a1, a2);
            Assert.IsFalse(a1.Equals(b));

            a1.SetRewriteMethod(new FieldCacheRewriteMethod());
            a2.SetRewriteMethod(new FieldCacheRewriteMethod());
            b.SetRewriteMethod(new FieldCacheRewriteMethod());
            Assert.AreEqual(a1, a2);
            Assert.IsFalse(a1.Equals(b));
            QueryUtils.Check(a1);
        }
コード例 #9
0
        public virtual void TestEquals()
        {
            RegexpQuery a1 = new RegexpQuery(new Term(FieldName, "[aA]"), RegExp.NONE);
            RegexpQuery a2 = new RegexpQuery(new Term(FieldName, "[aA]"), RegExp.NONE);
            RegexpQuery b  = new RegexpQuery(new Term(FieldName, "[bB]"), RegExp.NONE);

            Assert.AreEqual(a1, a2);
            Assert.IsFalse(a1.Equals(b));

            a1.SetRewriteMethod(new FieldCacheRewriteMethod());
            a2.SetRewriteMethod(new FieldCacheRewriteMethod());
            b.SetRewriteMethod(new FieldCacheRewriteMethod());
            Assert.AreEqual(a1, a2);
            Assert.IsFalse(a1.Equals(b));
            QueryUtils.Check(a1);
        }
コード例 #10
0
        public virtual Query Build(IQueryNode queryNode)
        {
            RegexpQueryNode regexpNode = (RegexpQueryNode)queryNode;

            RegexpQuery q = new RegexpQuery(new Term(regexpNode.GetFieldAsString(),
                regexpNode.TextToBytesRef()));

            MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod)queryNode
                .GetTag(MultiTermRewriteMethodProcessor.TAG_ID);
            if (method != null)
            {
                q.SetRewriteMethod(method);
            }

            return q;
        }
コード例 #11
0
        public virtual void TestEquals()
        {
            RegexpQuery a1 = new RegexpQuery(new Term(fieldName, "[aA]"), RegExpSyntax.NONE);
            RegexpQuery a2 = new RegexpQuery(new Term(fieldName, "[aA]"), RegExpSyntax.NONE);
            RegexpQuery b  = new RegexpQuery(new Term(fieldName, "[bB]"), RegExpSyntax.NONE);

            Assert.AreEqual(a1, a2);
            Assert.IsFalse(a1.Equals(b));

            a1.MultiTermRewriteMethod = (new DocTermOrdsRewriteMethod());
            a2.MultiTermRewriteMethod = (new DocTermOrdsRewriteMethod());
            b.MultiTermRewriteMethod  = (new DocTermOrdsRewriteMethod());
            Assert.AreEqual(a1, a2);
            Assert.IsFalse(a1.Equals(b));
            QueryUtils.Check(a1);
        }
コード例 #12
0
ファイル: TestRegexpQuery.cs プロジェクト: clieben/lucenenet
        private int RegexQueryNrHits(string regex)
        {
            RegexpQuery query = new RegexpQuery(NewTerm(regex));

            return(Searcher.Search(query, 5).TotalHits);
        }
コード例 #13
0
        /// <summary>
        /// check that the # of hits is the same as if the query
        /// is run against the inverted index
        /// </summary>
        protected internal virtual void AssertSame(string regexp)
        {
            RegexpQuery docValues = new RegexpQuery(new Term(FieldName, regexp), RegExp.NONE);
            docValues.SetRewriteMethod(new DocTermOrdsRewriteMethod());
            RegexpQuery inverted = new RegexpQuery(new Term(FieldName, regexp), RegExp.NONE);

            TopDocs invertedDocs = Searcher1.Search(inverted, 25);
            TopDocs docValuesDocs = Searcher2.Search(docValues, 25);

            CheckHits.CheckEqual(inverted, invertedDocs.ScoreDocs, docValuesDocs.ScoreDocs);
        }
コード例 #14
0
 public virtual void TestCustomProvider()
 {
     AutomatonProvider myProvider = new AutomatonProviderAnonymousInnerClassHelper(this);
     RegexpQuery query = new RegexpQuery(NewTerm("<quickBrown>"), RegExp.ALL, myProvider);
     Assert.AreEqual(1, Searcher.Search(query, 5).TotalHits);
 }
コード例 #15
0
        public virtual void TestEquals()
        {
            AutomatonQuery a1 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("foobar"));
            // reference to a1
            AutomatonQuery a2 = a1;
            // same as a1 (accepts the same language, same term)
            AutomatonQuery a3 = new AutomatonQuery(NewTerm("foobar"), BasicOperations.Concatenate(BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar")));
            // different than a1 (same term, but different language)
            AutomatonQuery a4 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("different"));
            // different than a1 (different term, same language)
            AutomatonQuery a5 = new AutomatonQuery(NewTerm("blah"), BasicAutomata.MakeString("foobar"));

            Assert.AreEqual(a1.GetHashCode(), a2.GetHashCode());
            Assert.AreEqual(a1, a2);

            Assert.AreEqual(a1.GetHashCode(), a3.GetHashCode());
            Assert.AreEqual(a1, a3);

            // different class
            AutomatonQuery w1 = new WildcardQuery(NewTerm("foobar"));
            // different class
            AutomatonQuery w2 = new RegexpQuery(NewTerm("foobar"));

            Assert.IsFalse(a1.Equals(w1));
            Assert.IsFalse(a1.Equals(w2));
            Assert.IsFalse(w1.Equals(w2));
            Assert.IsFalse(a1.Equals(a4));
            Assert.IsFalse(a1.Equals(a5));
            Assert.IsFalse(a1.Equals(null));
        }
コード例 #16
0
 private int RegexQueryNrHits(string regex)
 {
     RegexpQuery query = new RegexpQuery(NewTerm(regex));
     return Searcher.Search(query, 5).TotalHits;
 }
コード例 #17
0
ファイル: TestQPHelper.cs プロジェクト: apache/lucenenet
        public void TestRegexps()
        {
            StandardQueryParser qp = new StandardQueryParser();
            String df = "field";
            RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
            assertEquals(q, qp.Parse("/[a-z][123]/", df));
            qp.LowercaseExpandedTerms = (true);
            assertEquals(q, qp.Parse("/[A-Z][123]/", df));
            q.Boost = (0.5f);
            assertEquals(q, qp.Parse("/[A-Z][123]/^0.5", df));
            qp.MultiTermRewriteMethod = (MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
            q.SetRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); // LUCENENET TODO: Inconsistent API betwen RegexpQuery and StandardQueryParser
            assertTrue(qp.Parse("/[A-Z][123]/^0.5", df) is RegexpQuery);
            assertEquals(q, qp.Parse("/[A-Z][123]/^0.5", df));
            assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.Parse("/[A-Z][123]/^0.5", df)).GetRewriteMethod());
            qp.MultiTermRewriteMethod = (MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);

            Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
            assertEquals(escaped, qp.Parse("/[a-z]\\/[123]/", df));
            Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
            assertEquals(escaped2, qp.Parse("/[a-z]\\*[123]/", df));

            BooleanQuery complex = new BooleanQuery();
            complex.Add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), BooleanClause.Occur.MUST);
            complex.Add(new TermQuery(new Term("path", "/etc/init.d/")), BooleanClause.Occur.MUST);
            complex.Add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), BooleanClause.Occur.SHOULD);
            assertEquals(complex, qp.Parse("/[a-z]\\/[123]/ AND path:\"/etc/init.d/\" OR \"/etc\\/init\\[.\\]d/lucene/\" ", df));

            Query re = new RegexpQuery(new Term("field", "http.*"));
            assertEquals(re, qp.Parse("field:/http.*/", df));
            assertEquals(re, qp.Parse("/http.*/", df));

            re = new RegexpQuery(new Term("field", "http~0.5"));
            assertEquals(re, qp.Parse("field:/http~0.5/", df));
            assertEquals(re, qp.Parse("/http~0.5/", df));

            re = new RegexpQuery(new Term("field", "boo"));
            assertEquals(re, qp.Parse("field:/boo/", df));
            assertEquals(re, qp.Parse("/boo/", df));


            assertEquals(new TermQuery(new Term("field", "/boo/")), qp.Parse("\"/boo/\"", df));
            assertEquals(new TermQuery(new Term("field", "/boo/")), qp.Parse("\\/boo\\/", df));

            BooleanQuery two = new BooleanQuery();
            two.Add(new RegexpQuery(new Term("field", "foo")), BooleanClause.Occur.SHOULD);
            two.Add(new RegexpQuery(new Term("field", "bar")), BooleanClause.Occur.SHOULD);
            assertEquals(two, qp.Parse("field:/foo/ field:/bar/", df));
            assertEquals(two, qp.Parse("/foo/ /bar/", df));
        }
コード例 #18
0
        public virtual void TestRegexps()
        {
            ICommonQueryParserConfiguration qp = GetParserConfig(new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false));
            RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
            assertEquals(q, GetQuery("/[a-z][123]/", qp));
            qp.LowercaseExpandedTerms = (true);
            assertEquals(q, GetQuery("/[A-Z][123]/", qp));
            q.Boost = (0.5f);
            assertEquals(q, GetQuery("/[A-Z][123]/^0.5", qp));
            qp.MultiTermRewriteMethod=(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
            q.SetRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
            assertTrue(GetQuery("/[A-Z][123]/^0.5", qp) is RegexpQuery);
            assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)GetQuery("/[A-Z][123]/^0.5", qp)).GetRewriteMethod());
            assertEquals(q, GetQuery("/[A-Z][123]/^0.5", qp));
            qp.MultiTermRewriteMethod=(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);

            Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
            assertEquals(escaped, GetQuery("/[a-z]\\/[123]/", qp));
            Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
            assertEquals(escaped2, GetQuery("/[a-z]\\*[123]/", qp));

            BooleanQuery complex = new BooleanQuery();
            complex.Add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), BooleanClause.Occur.MUST);
            complex.Add(new TermQuery(new Term("path", "/etc/init.d/")), BooleanClause.Occur.MUST);
            complex.Add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), BooleanClause.Occur.SHOULD);
            assertEquals(complex, GetQuery("/[a-z]\\/[123]/ AND path:\"/etc/init.d/\" OR \"/etc\\/init\\[.\\]d/lucene/\" ", qp));

            Query re = new RegexpQuery(new Term("field", "http.*"));
            assertEquals(re, GetQuery("field:/http.*/", qp));
            assertEquals(re, GetQuery("/http.*/", qp));

            re = new RegexpQuery(new Term("field", "http~0.5"));
            assertEquals(re, GetQuery("field:/http~0.5/", qp));
            assertEquals(re, GetQuery("/http~0.5/", qp));

            re = new RegexpQuery(new Term("field", "boo"));
            assertEquals(re, GetQuery("field:/boo/", qp));
            assertEquals(re, GetQuery("/boo/", qp));

            assertEquals(new TermQuery(new Term("field", "/boo/")), GetQuery("\"/boo/\"", qp));
            assertEquals(new TermQuery(new Term("field", "/boo/")), GetQuery("\\/boo\\/", qp));

            BooleanQuery two = new BooleanQuery();
            two.Add(new RegexpQuery(new Term("field", "foo")), BooleanClause.Occur.SHOULD);
            two.Add(new RegexpQuery(new Term("field", "bar")), BooleanClause.Occur.SHOULD);
            assertEquals(two, GetQuery("field:/foo/ field:/bar/", qp));
            assertEquals(two, GetQuery("/foo/ /bar/", qp));
        }
コード例 #19
0
        /// <summary>
        /// check that the # of hits is the same as from a very
        /// simple regexpquery implementation.
        /// </summary>
        protected internal virtual void AssertSame(string regexp)
        {
            RegexpQuery smart = new RegexpQuery(new Term(FieldName, regexp), RegExp.NONE);
            DumbRegexpQuery dumb = new DumbRegexpQuery(new Term(FieldName, regexp), RegExp.NONE);

            TopDocs smartDocs = Searcher1.Search(smart, 25);
            TopDocs dumbDocs = Searcher2.Search(dumb, 25);

            CheckHits.CheckEqual(smart, smartDocs.ScoreDocs, dumbDocs.ScoreDocs);
        }
コード例 #20
0
ファイル: TestQPHelper.cs プロジェクト: apache/lucenenet
        public void TestRegexQueryParsing()
        {
            String[]
            fields = { "b", "t" };

            StandardQueryParser parser = new StandardQueryParser();
            parser.SetMultiFields(fields);
            parser.DefaultOperator = (StandardQueryConfigHandler.Operator.AND);
            parser.Analyzer = (new MockAnalyzer(Random()));

            BooleanQuery exp = new BooleanQuery();
            exp.Add(new BooleanClause(new RegexpQuery(new Term("b", "ab.+")), BooleanClause.Occur.SHOULD));//TODO spezification? was "MUST"
            exp.Add(new BooleanClause(new RegexpQuery(new Term("t", "ab.+")), BooleanClause.Occur.SHOULD));//TODO spezification? was "MUST"

            assertEquals(exp, parser.Parse("/ab.+/", null));

            RegexpQuery regexpQueryexp = new RegexpQuery(new Term("test", "[abc]?[0-9]"));

            assertEquals(regexpQueryexp, parser.Parse("test:/[abc]?[0-9]/", null));

        }
コード例 #21
0
 private void AssertPatternHits(string pattern, int numHits)
 {
     Query wq = new RegexpQuery(new Term("field", FillPattern(pattern)));
     TopDocs docs = Searcher.Search(wq, 25);
     Assert.AreEqual(numHits, docs.TotalHits, "Incorrect hits for pattern: " + pattern);
 }
コード例 #22
0
 /// <summary>
 /// Builds a new RegexpQuery instance
 /// </summary>
 /// <param name="regexp">Regexp term</param>
 /// <returns>new RegexpQuery instance</returns>
 protected internal virtual Query NewRegexpQuery(Term regexp)
 {
     RegexpQuery query = new RegexpQuery(regexp);
     query.SetRewriteMethod(MultiTermRewriteMethod);
     return query;
 }