Parser for the Solr synonyms format.
  1. Blank lines and lines starting with '#' are comments.
  2. Explicit mappings match any token sequence on the LHS of "=>" and replace with all alternatives on the RHS. These types of mappings ignore the expand parameter in the constructor. Example:
    i-pod, i pod => ipod
  3. Equivalent synonyms may be separated with commas and give no explicit mapping. In this case the mapping behavior will be taken from the expand parameter in the constructor. This allows the same synonym file to be used in different synonym handling strategies. Example:
    ipod, i-pod, i pod
  4. Multiple synonym mapping entries are merged. Example:
    foo => foo bar
    foo => baz

    is equivalent to

    foo => foo bar, baz
@lucene.experimental
Inheritance: SynonymMap.Parser
Beispiel #1
0
        public virtual void TestInvalidAnalyzesToNothingInput()
        {
            string            testFile = "1 => a";
            SolrSynonymParser parser   = new SolrSynonymParser(true, true, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, false));

            Assert.Throws <Exception>(() => parser.Parse(new StringReader(testFile)));
        }
Beispiel #2
0
        public virtual void TestInvalidPositionsOutput()
        {
            string            testFile = "the test => testola";
            SolrSynonymParser parser   = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT));

            Assert.Throws <Exception>(() => parser.Parse(new StringReader(testFile)));
        }
Beispiel #3
0
        public virtual void TestInvalidDoubleMap()
        {
            string            testFile = "a => b => c";
            SolrSynonymParser parser   = new SolrSynonymParser(true, true, new MockAnalyzer(Random()));

            Assert.Throws <Exception>(() => parser.Parse(new StringReader(testFile)));
        }
        public virtual void TestInvalidAnalyzesToNothingOutput()
        {
            string            testFile = "a => 1";
            SolrSynonymParser parser   = new SolrSynonymParser(true, true, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, false));

            parser.Parse(new StringReader(testFile));
        }
        public virtual void TestInvalidPositionsInput()
        {
            string            testFile = "testola => the test";
            SolrSynonymParser parser   = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT));

            parser.Parse(new StringReader(testFile));
        }
Beispiel #6
0
        public virtual void TestEscapedStuff()
        {
            string            testFile = "a\\=>a => b\\=>b\n" + "a\\,a => b\\,b";
            SolrSynonymParser parser   = new SolrSynonymParser(true, true, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false));

            parser.Parse(new StringReader(testFile));
            SynonymMap map      = parser.Build();
            Analyzer   analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);

            AssertAnalyzesTo(analyzer, "ball", new string[] { "ball" }, new int[] { 1 });

            AssertAnalyzesTo(analyzer, "a=>a", new string[] { "b=>b" }, new int[] { 1 });

            AssertAnalyzesTo(analyzer, "a,a", new string[] { "b,b" }, new int[] { 1 });
        }
Beispiel #7
0
        public virtual void TestInvalidAnalyzesToNothingInput()
        {
            string            testFile = "1 => a";
            SolrSynonymParser parser   = new SolrSynonymParser(true, true, new MockAnalyzer(Random, MockTokenizer.SIMPLE, false));

            try
            {
                parser.Parse(new StringReader(testFile));
                fail();
            }
            catch (Exception pe) when(pe.IsParseException())
            {
                // expected
            }
        }
Beispiel #8
0
        public virtual void TestInvalidDoubleMap()
        {
            string            testFile = "a => b => c";
            SolrSynonymParser parser   = new SolrSynonymParser(true, true, new MockAnalyzer(Random));

            try
            {
                parser.Parse(new StringReader(testFile));
                fail();
            }
            catch (Exception pe) when(pe.IsParseException())
            {
                // expected
            }
        }
Beispiel #9
0
        public virtual void TestInvalidPositionsOutput()
        {
            string            testFile = "the test => testola";
            SolrSynonymParser parser   = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT));

            try
            {
                parser.Parse(new StringReader(testFile));
                fail();
            }
            catch (Exception pe) when(pe.IsParseException())
            {
                // expected
            }
        }
        public virtual void TestVanishingTerms()
        {
            string testFile = "aaa => aaaa1 aaaa2 aaaa3\n" + "bbb => bbbb1 bbbb2\n";

            SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(Random()));

            parser.Parse(new StringReader(testFile));
            SynonymMap map = parser.Build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper5(this, map);

            // where did my pot go?!
            AssertAnalyzesTo(analyzer, "xyzzy bbb pot of gold", new string[] { "xyzzy", "bbbb1", "pot", "bbbb2", "of", "gold" });

            // this one nukes 'pot' and 'of'
            // xyzzy aaa pot of gold -> xyzzy aaaa1 aaaa2 aaaa3 gold
            AssertAnalyzesTo(analyzer, "xyzzy aaa pot of gold", new string[] { "xyzzy", "aaaa1", "pot", "aaaa2", "of", "aaaa3", "gold" });
        }
Beispiel #11
0
        public virtual void TestSimple()
        {
            string testFile = "i-pod, ipod, ipoooood\n" + "foo => foo bar\n" + "foo => baz\n" + "this test, that testing";

            SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(Random()));

            parser.Parse(new StringReader(testFile));
            SynonymMap map = parser.Build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map);

            AssertAnalyzesTo(analyzer, "ball", new string[] { "ball" }, new int[] { 1 });

            AssertAnalyzesTo(analyzer, "i-pod", new string[] { "i-pod", "ipod", "ipoooood" }, new int[] { 1, 0, 0 });

            AssertAnalyzesTo(analyzer, "foo", new string[] { "foo", "baz", "bar" }, new int[] { 1, 0, 1 });

            AssertAnalyzesTo(analyzer, "this test", new string[] { "this", "that", "test", "testing" }, new int[] { 1, 0, 1, 0 });
        }
        public virtual void TestEscapedStuff()
        {
            string            testFile = "a\\=>a => b\\=>b\n" + "a\\,a => b\\,b";
            SolrSynonymParser parser   = new SolrSynonymParser(true, true, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false));

            parser.Parse(new StringReader(testFile));
            SynonymMap map      = parser.Build();
            Analyzer   analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
                return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false)));
            });

            AssertAnalyzesTo(analyzer, "ball", new string[] { "ball" }, new int[] { 1 });

            AssertAnalyzesTo(analyzer, "a=>a", new string[] { "b=>b" }, new int[] { 1 });

            AssertAnalyzesTo(analyzer, "a,a", new string[] { "b,b" }, new int[] { 1 });
        }
Beispiel #13
0
        public virtual void TestVanishingTerms()
        {
            string testFile = "aaa => aaaa1 aaaa2 aaaa3\n" + "bbb => bbbb1 bbbb2\n";

            SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(Random));

            parser.Parse(new StringReader(testFile));
            SynonymMap map = parser.Build();

            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
                return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true)));
            });

            // where did my pot go?!
            AssertAnalyzesTo(analyzer, "xyzzy bbb pot of gold", new string[] { "xyzzy", "bbbb1", "pot", "bbbb2", "of", "gold" });

            // this one nukes 'pot' and 'of'
            // xyzzy aaa pot of gold -> xyzzy aaaa1 aaaa2 aaaa3 gold
            AssertAnalyzesTo(analyzer, "xyzzy aaa pot of gold", new string[] { "xyzzy", "aaaa1", "pot", "aaaa2", "of", "aaaa3", "gold" });
        }
        public virtual void TestSimple()
        {
            string testFile = "i-pod, ipod, ipoooood\n" + "foo => foo bar\n" + "foo => baz\n" + "this test, that testing";

            SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(Random));

            parser.Parse(new StringReader(testFile));
            SynonymMap map = parser.Build();

            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
                return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true)));
            });

            AssertAnalyzesTo(analyzer, "ball", new string[] { "ball" }, new int[] { 1 });

            AssertAnalyzesTo(analyzer, "i-pod", new string[] { "i-pod", "ipod", "ipoooood" }, new int[] { 1, 0, 0 });

            AssertAnalyzesTo(analyzer, "foo", new string[] { "foo", "baz", "bar" }, new int[] { 1, 0, 1 });

            AssertAnalyzesTo(analyzer, "this test", new string[] { "this", "that", "test", "testing" }, new int[] { 1, 0, 1, 0 });
        }
        public virtual void TestVanishingTerms()
        {
            string testFile = "aaa => aaaa1 aaaa2 aaaa3\n" + "bbb => bbbb1 bbbb2\n";

            SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(Random()));
            parser.Parse(new StringReader(testFile));
            SynonymMap map = parser.Build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper5(this, map);

            // where did my pot go?!
            AssertAnalyzesTo(analyzer, "xyzzy bbb pot of gold", new string[] { "xyzzy", "bbbb1", "pot", "bbbb2", "of", "gold" });

            // this one nukes 'pot' and 'of'
            // xyzzy aaa pot of gold -> xyzzy aaaa1 aaaa2 aaaa3 gold
            AssertAnalyzesTo(analyzer, "xyzzy aaa pot of gold", new string[] { "xyzzy", "aaaa1", "pot", "aaaa2", "of", "aaaa3", "gold" });
        }