CharFilter that uses a regular expression for the target of replace string. The pattern match will be done in each "block" in char stream.

ex1) source="aa  bb aa bb", pattern="(aa)\\s+(bb)" replacement="$1#$2"
output="aa#bb aa#bb"

NOTE: If you produce a phrase that has different length to source string and the field is used for highlighting for a term of the phrase, you will face a trouble.

ex2) source="aa123bb", pattern="(aa)\\d+(bb)" replacement="$1 $2"
output="aa bb"
and you want to search bb and highlight it, you will get
highlight snippet="aa1<em>23bb</em>"

@since Solr 1.5
Inheritance: Lucene.Net.Analysis.CharFilters.BaseCharFilter
Exemple #1
0
        public virtual void Test1block1matchShorter()
        {
            const string BLOCK = "aa  bb   cc dd";
            CharFilter   cs    = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2", new StringReader(BLOCK));
            TokenStream  ts    = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);

            AssertTokenStreamContents(ts, new string[] { "aa#bb", "dd" }, new int[] { 0, 12 }, new int[] { 11, 14 }, BLOCK.Length);
        }
Exemple #2
0
        public virtual void Test1blockMultiMatches()
        {
            const string BLOCK = "  aa bb cc --- aa bb aa   bb   cc";
            CharFilter   cs    = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)\\s+(cc)"), "$1  $2  $3", new StringReader(BLOCK));
            TokenStream  ts    = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);

            AssertTokenStreamContents(ts, new string[] { "aa", "bb", "cc", "---", "aa", "bb", "aa", "bb", "cc" }, new int[] { 2, 6, 9, 11, 15, 18, 21, 25, 29 }, new int[] { 4, 8, 10, 14, 17, 20, 23, 27, 33 }, BLOCK.Length);
        }
Exemple #3
0
        public virtual void TestReplaceByEmpty()
        {
            const string BLOCK = "aa bb cc";
            CharFilter   cs    = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)\\s+(cc)"), "", new StringReader(BLOCK));
            TokenStream  ts    = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);

            AssertTokenStreamContents(ts, new string[] { });
        }
Exemple #4
0
        public virtual void Test1block2matchLonger()
        {
            const string BLOCK = " a  a";
            CharFilter   cs    = new PatternReplaceCharFilter(pattern("a"), "aa", new StringReader(BLOCK));
            TokenStream  ts    = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);

            AssertTokenStreamContents(ts, new string[] { "aa", "aa" }, new int[] { 1, 4 }, new int[] { 2, 5 }, BLOCK.Length);
        }
Exemple #5
0
        public virtual void TestNothingChange()
        {
            const string BLOCK = "this is test.";
            CharFilter   cs    = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)\\s+(cc)"), "$1$2$3", new StringReader(BLOCK));
            TokenStream  ts    = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);

            AssertTokenStreamContents(ts, new string[] { "this", "is", "test." }, new int[] { 0, 5, 8 }, new int[] { 4, 7, 13 }, BLOCK.Length);
        }
Exemple #6
0
        public virtual void Test2blocksMultiMatches()
        {
            const string BLOCK = "  aa bb cc --- aa bb aa. bb aa   bb cc";

            CharFilter  cs = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)"), "$1##$2", new StringReader(BLOCK));
            TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);

            AssertTokenStreamContents(ts, new string[] { "aa##bb", "cc", "---", "aa##bb", "aa.", "bb", "aa##bb", "cc" }, new int[] { 2, 8, 11, 15, 21, 25, 28, 36 }, new int[] { 7, 10, 14, 20, 24, 27, 35, 38 }, BLOCK.Length);
        }
Exemple #7
0
        public virtual void TestChain()
        {
            const string BLOCK = " a bb - ccc . --- bb a . ccc ccc bb";
            CharFilter   cs    = new PatternReplaceCharFilter(pattern("a"), "aa", new StringReader(BLOCK));

            cs = new PatternReplaceCharFilter(pattern("bb"), "b", cs);
            cs = new PatternReplaceCharFilter(pattern("ccc"), "c", cs);
            TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);

            AssertTokenStreamContents(ts, new string[] { "aa", "b", "-", "c", ".", "---", "b", "aa", ".", "c", "c", "b" }, new int[] { 1, 3, 6, 8, 12, 14, 18, 21, 23, 25, 29, 33 }, new int[] { 2, 5, 7, 11, 13, 17, 20, 22, 24, 28, 32, 35 }, BLOCK.Length);
        }
Exemple #8
0
        private void checkOutput(string input, string pattern, string replacement, string expectedOutput, string expectedIndexMatchedOutput)
        {
            CharFilter cs = new PatternReplaceCharFilter(new Regex(pattern, RegexOptions.Compiled), replacement, new StringReader(input));

            StringBuilder output = new StringBuilder();

            for (int chr = cs.Read(); chr > 0; chr = cs.Read())
            {
                output.Append((char)chr);
            }

            StringBuilder indexMatched = new StringBuilder();

            for (int i = 0; i < output.Length; i++)
            {
                if (cs.CorrectOffset(i) < input.Length)
                {
                    indexMatched.Append((cs.CorrectOffset(i) < 0 ? '-' : input[cs.CorrectOffset(i)]));
                }
            }

            bool outputGood       = expectedOutput.Equals(output.ToString(), StringComparison.Ordinal);
            bool indexMatchedGood = expectedIndexMatchedOutput.Equals(indexMatched.ToString(), StringComparison.Ordinal);

            if (!outputGood || !indexMatchedGood || false)
            {
                Console.WriteLine("Pattern : " + pattern);
                Console.WriteLine("Replac. : " + replacement);
                Console.WriteLine("Input   : " + input);
                Console.WriteLine("Output  : " + output);
                Console.WriteLine("Expected: " + expectedOutput);
                Console.WriteLine("Output/i: " + indexMatched);
                Console.WriteLine("Expected: " + expectedIndexMatchedOutput);
                Console.WriteLine();
            }

            assertTrue("Output doesn't match.", outputGood);
            assertTrue("Index-matched output doesn't match.", indexMatchedGood);
        }
        private void checkOutput(string input, string pattern, string replacement, string expectedOutput, string expectedIndexMatchedOutput)
        {
            CharFilter cs = new PatternReplaceCharFilter(new Regex(pattern, RegexOptions.Compiled), replacement, new StringReader(input));

            StringBuilder output = new StringBuilder();
            for (int chr = cs.Read(); chr > 0; chr = cs.Read())
            {
                output.Append((char)chr);
            }

            StringBuilder indexMatched = new StringBuilder();
            for (int i = 0; i < output.Length; i++)
            {
                if (cs.CorrectOffset(i) < input.Length)
                {
                    indexMatched.Append((cs.CorrectOffset(i) < 0 ? '-' : input[cs.CorrectOffset(i)]));
                }
            }

            bool outputGood = expectedOutput.Equals(output.ToString());
            bool indexMatchedGood = expectedIndexMatchedOutput.Equals(indexMatched.ToString());

            if (!outputGood || !indexMatchedGood || false)
            {
                Console.WriteLine("Pattern : " + pattern);
                Console.WriteLine("Replac. : " + replacement);
                Console.WriteLine("Input   : " + input);
                Console.WriteLine("Output  : " + output);
                Console.WriteLine("Expected: " + expectedOutput);
                Console.WriteLine("Output/i: " + indexMatched);
                Console.WriteLine("Expected: " + expectedIndexMatchedOutput);
                Console.WriteLine();
            }

            assertTrue("Output doesn't match.", outputGood);
            assertTrue("Index-matched output doesn't match.", indexMatchedGood);
        }
        public virtual void Test2blocksMultiMatches()
        {
            const string BLOCK = "  aa bb cc --- aa bb aa. bb aa   bb cc";

            CharFilter cs = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)"), "$1##$2", new StringReader(BLOCK));
            TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
            AssertTokenStreamContents(ts, new string[] { "aa##bb", "cc", "---", "aa##bb", "aa.", "bb", "aa##bb", "cc" }, new int[] { 2, 8, 11, 15, 21, 25, 28, 36 }, new int[] { 7, 10, 14, 20, 24, 27, 35, 38 }, BLOCK.Length);
        }
 public virtual void TestChain()
 {
     const string BLOCK = " a bb - ccc . --- bb a . ccc ccc bb";
     CharFilter cs = new PatternReplaceCharFilter(pattern("a"), "aa", new StringReader(BLOCK));
     cs = new PatternReplaceCharFilter(pattern("bb"), "b", cs);
     cs = new PatternReplaceCharFilter(pattern("ccc"), "c", cs);
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     AssertTokenStreamContents(ts, new string[] { "aa", "b", "-", "c", ".", "---", "b", "aa", ".", "c", "c", "b" }, new int[] { 1, 3, 6, 8, 12, 14, 18, 21, 23, 25, 29, 33 }, new int[] { 2, 5, 7, 11, 13, 17, 20, 22, 24, 28, 32, 35 }, BLOCK.Length);
 }
 public virtual void Test1blockMultiMatches()
 {
     const string BLOCK = "  aa bb cc --- aa bb aa   bb   cc";
     CharFilter cs = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)\\s+(cc)"), "$1  $2  $3", new StringReader(BLOCK));
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     AssertTokenStreamContents(ts, new string[] { "aa", "bb", "cc", "---", "aa", "bb", "aa", "bb", "cc" }, new int[] { 2, 6, 9, 11, 15, 18, 21, 25, 29 }, new int[] { 4, 8, 10, 14, 17, 20, 23, 27, 33 }, BLOCK.Length);
 }
 public virtual void Test1block1matchShorter()
 {
     const string BLOCK = "aa  bb   cc dd";
     CharFilter cs = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2", new StringReader(BLOCK));
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     AssertTokenStreamContents(ts, new string[] { "aa#bb", "dd" }, new int[] { 0, 12 }, new int[] { 11, 14 }, BLOCK.Length);
 }
 public virtual void Test1block2matchLonger()
 {
     const string BLOCK = " a  a";
     CharFilter cs = new PatternReplaceCharFilter(pattern("a"), "aa", new StringReader(BLOCK));
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     AssertTokenStreamContents(ts, new string[] { "aa", "aa" }, new int[] { 1, 4 }, new int[] { 2, 5 }, BLOCK.Length);
 }
 public virtual void TestReplaceByEmpty()
 {
     const string BLOCK = "aa bb cc";
     CharFilter cs = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)\\s+(cc)"), "", new StringReader(BLOCK));
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     AssertTokenStreamContents(ts, new string[] { });
 }
 public virtual void TestNothingChange()
 {
     const string BLOCK = "this is test.";
     CharFilter cs = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)\\s+(cc)"), "$1$2$3", new StringReader(BLOCK));
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     AssertTokenStreamContents(ts, new string[] { "this", "is", "test." }, new int[] { 0, 5, 8 }, new int[] { 4, 7, 13 }, BLOCK.Length);
 }