Builds an FSTSynonymMap.

Call add() until you have added all the mappings, then call build() to get an FSTSynonymMap @lucene.experimental

コード例 #1
0
        public virtual void TestMaxPosition3WithSynomyms()
        {
            foreach (bool consumeAll in new bool[] { true, false })
            {
                MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
                // if we are consuming all tokens, we can use the checks, otherwise we can't
                tokenizer.EnableChecks = consumeAll;

                SynonymMap.Builder builder = new SynonymMap.Builder(true);
                builder.Add(new CharsRef("one"), new CharsRef("first"), true);
                builder.Add(new CharsRef("one"), new CharsRef("alpha"), true);
                builder.Add(new CharsRef("one"), new CharsRef("beguine"), true);
                CharsRef multiWordCharsRef = new CharsRef();
                SynonymMap.Builder.Join(new string[] { "and", "indubitably", "single", "only" }, multiWordCharsRef);
                builder.Add(new CharsRef("one"), multiWordCharsRef, true);
                SynonymMap.Builder.Join(new string[] { "dopple", "ganger" }, multiWordCharsRef);
                builder.Add(new CharsRef("two"), multiWordCharsRef, true);
                SynonymMap synonymMap = builder.Build();
                TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
                stream = new LimitTokenPositionFilter(stream, 3, consumeAll);

                // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
                AssertTokenStreamContents(stream, new string[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" }, new int[] { 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0 });
            }
        }
コード例 #2
0
        public virtual void TestRandomStrings()
        {
            int numIters = AtLeast(10);
            for (int i = 0; i < numIters; i++)
            {
                SynonymMap.Builder b = new SynonymMap.Builder(Random().nextBoolean());
                int numEntries = AtLeast(10);
                for (int j = 0; j < numEntries; j++)
                {
                    Add(b, RandomNonEmptyString(), RandomNonEmptyString(), Random().nextBoolean());
                }
                SynonymMap map = b.Build();
                bool ignoreCase = Random().nextBoolean();

                Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map, ignoreCase);

                CheckRandomData(Random(), analyzer, 200);
            }
        }
コード例 #3
0
        public virtual void TestRecursion()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = false;
            Add("zoo", "zoo", keepOrig);
            SynonymMap map = b.Build();
            Analyzer a = new AnalyzerAnonymousInnerClassHelper9(this, map);

            AssertAnalyzesTo(a, "zoo zoo $ zoo", new string[] { "zoo", "zoo", "$", "zoo" }, new int[] { 1, 1, 1, 1 });
        }
コード例 #4
0
        public virtual void TestRepeatsOn()
        {
            b = new SynonymMap.Builder(false);
            const bool keepOrig = false;
            Add("a b", "ab", keepOrig);
            Add("a b", "ab", keepOrig);
            Add("a b", "ab", keepOrig);
            SynonymMap map = b.Build();
            Analyzer a = new AnalyzerAnonymousInnerClassHelper8(this, map);

            AssertAnalyzesTo(a, "a b", new string[] { "ab", "ab", "ab" }, new int[] { 1, 0, 0 });
        }
コード例 #5
0
        public virtual void TestMatching()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = false;
            Add("a b", "ab", keepOrig);
            Add("a c", "ac", keepOrig);
            Add("a", "aa", keepOrig);
            Add("b", "bb", keepOrig);
            Add("z x c v", "zxcv", keepOrig);
            Add("x c", "xc", keepOrig);
            SynonymMap map = b.Build();
            Analyzer a = new AnalyzerAnonymousInnerClassHelper6(this, map);

            CheckOneTerm(a, "$", "$");
            CheckOneTerm(a, "a", "aa");
            CheckOneTerm(a, "b", "bb");

            AssertAnalyzesTo(a, "a $", new string[] { "aa", "$" }, new int[] { 1, 1 });

            AssertAnalyzesTo(a, "$ a", new string[] { "$", "aa" }, new int[] { 1, 1 });

            AssertAnalyzesTo(a, "a a", new string[] { "aa", "aa" }, new int[] { 1, 1 });

            AssertAnalyzesTo(a, "z x c v", new string[] { "zxcv" }, new int[] { 1 });

            AssertAnalyzesTo(a, "z x c $", new string[] { "z", "xc", "$" }, new int[] { 1, 1, 1 });
        }
コード例 #6
0
        public virtual void TestBasic2()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = false;
            Add("aaa", "aaaa1 aaaa2 aaaa3", keepOrig);
            Add("bbb", "bbbb1 bbbb2", keepOrig);
            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.Reset();
            assertTrue(tokensIn.IncrementToken());
            assertFalse(tokensIn.IncrementToken());
            tokensIn.End();
            tokensIn.Dispose();

            tokensOut = new SynonymFilter(tokensIn, b.Build(), true);
            termAtt = tokensOut.AddAttribute<ICharTermAttribute>();
            posIncrAtt = tokensOut.AddAttribute<IPositionIncrementAttribute>();
            posLenAtt = tokensOut.AddAttribute<IPositionLengthAttribute>();
            offsetAtt = tokensOut.AddAttribute<IOffsetAttribute>();

#pragma warning disable 162
            if (keepOrig)
            {
                Verify("xyzzy bbb pot of gold", "xyzzy bbb/bbbb1 pot/bbbb2 of gold");
                Verify("xyzzy aaa pot of gold", "xyzzy aaa/aaaa1 pot/aaaa2 of/aaaa3 gold");
            }
            else
            {
                Verify("xyzzy bbb pot of gold", "xyzzy bbbb1 pot/bbbb2 of gold");
                Verify("xyzzy aaa pot of gold", "xyzzy aaaa1 pot/aaaa2 of/aaaa3 gold");
            }
#pragma warning restore 612, 618
        }
コード例 #7
0
        public virtual void TestRandomHuge()
        {
            Random random = Random();
            int numIters = AtLeast(3);
            for (int i = 0; i < numIters; i++)
            {
                b = new SynonymMap.Builder(random.nextBoolean());
                int numEntries = AtLeast(10);
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + i + " numEntries=" + numEntries);
                }
                for (int j = 0; j < numEntries; j++)
                {
                    Add(RandomNonEmptyString(), RandomNonEmptyString(), random.nextBoolean());
                }
                SynonymMap map = b.Build();
                bool ignoreCase = random.nextBoolean();

                Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper4(this, map, ignoreCase);

                CheckRandomData(random, analyzer, 100, 1024);
            }
        }
コード例 #8
0
        public virtual void TestEmptyTerm()
        {
            Random random = Random();
            int numIters = AtLeast(10);
            for (int i = 0; i < numIters; i++)
            {
                b = new SynonymMap.Builder(random.nextBoolean());
                int numEntries = AtLeast(10);
                for (int j = 0; j < numEntries; j++)
                {
                    Add(RandomNonEmptyString(), RandomNonEmptyString(), random.nextBoolean());
                }
                SynonymMap map = b.Build();
                bool ignoreCase = random.nextBoolean();

                Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map, ignoreCase);

                CheckAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
            }
        }
コード例 #9
0
        public virtual void TestRandom2GraphAfter()
        {
            int numIters = AtLeast(3);
            Random random = Random();
            for (int i = 0; i < numIters; i++)
            {
                b = new SynonymMap.Builder(random.nextBoolean());
                int numEntries = AtLeast(10);
                for (int j = 0; j < numEntries; j++)
                {
                    Add(RandomNonEmptyString(), RandomNonEmptyString(), random.nextBoolean());
                }
                SynonymMap map = b.Build();
                bool ignoreCase = random.nextBoolean();

                Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper101(this, map, ignoreCase);

                CheckRandomData(random, analyzer, 100);
            }
        }
コード例 #10
0
        public virtual void TestRandom()
        {

            int alphabetSize = TestUtil.NextInt(Random(), 2, 7);

            int docLen = AtLeast(3000);
            //final int docLen = 50;

            string document = GetRandomString('a', alphabetSize, docLen);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: doc=" + document);
            }

            int numSyn = AtLeast(5);
            //final int numSyn = 2;

            IDictionary<string, OneSyn> synMap = new Dictionary<string, OneSyn>();
            IList<OneSyn> syns = new List<OneSyn>();
            bool dedup = Random().nextBoolean();
            if (VERBOSE)
            {
                Console.WriteLine("  dedup=" + dedup);
            }
            b = new SynonymMap.Builder(dedup);
            for (int synIDX = 0; synIDX < numSyn; synIDX++)
            {
                string synIn = GetRandomString('a', alphabetSize, TestUtil.NextInt(Random(), 1, 5)).Trim();
                OneSyn s = synMap.ContainsKey(synIn) ? synMap[synIn] : null;
                if (s == null)
                {
                    s = new OneSyn();
                    [email protected] = synIn;
                    syns.Add(s);
                    [email protected] = new List<string>();
                    synMap[synIn] = s;
                    s.keepOrig = Random().nextBoolean();
                }
                string synOut = GetRandomString('0', 10, TestUtil.NextInt(Random(), 1, 5)).Trim();
                [email protected](synOut);
                Add(synIn, synOut, s.keepOrig);
                if (VERBOSE)
                {
                    Console.WriteLine("  syns[" + synIDX + "] = " + [email protected] + " -> " + [email protected] + " keepOrig=" + s.keepOrig);
                }
            }

            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.Reset();
            assertTrue(tokensIn.IncrementToken());
            assertFalse(tokensIn.IncrementToken());
            tokensIn.End();
            tokensIn.Dispose();

            tokensOut = new SynonymFilter(tokensIn, b.Build(), true);
            termAtt = tokensOut.AddAttribute<ICharTermAttribute>();
            posIncrAtt = tokensOut.AddAttribute<IPositionIncrementAttribute>();
            posLenAtt = tokensOut.AddAttribute<IPositionLengthAttribute>();
            offsetAtt = tokensOut.AddAttribute<IOffsetAttribute>();

            if (dedup)
            {
                PruneDups(syns);
            }

            string expected = SlowSynMatcher(document, syns, 5);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: expected=" + expected);
            }

            Verify(document, expected);
        }
コード例 #11
0
        public virtual void TestBasic()
        {
            b = new SynonymMap.Builder(true);
            Add("a", "foo", true);
            Add("a b", "bar fee", true);
            Add("b c", "dog collar", true);
            Add("c d", "dog harness holder extras", true);
            Add("m c e", "dog barks loudly", false);
            Add("i j k", "feep", true);

            Add("e f", "foo bar", false);
            Add("e f", "baz bee", false);

            Add("z", "boo", false);
            Add("y", "bee", true);

            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.Reset();
            assertTrue(tokensIn.IncrementToken());
            assertFalse(tokensIn.IncrementToken());
            tokensIn.End();
            tokensIn.Dispose();

            tokensOut = new SynonymFilter(tokensIn, b.Build(), true);
            termAtt = tokensOut.AddAttribute<ICharTermAttribute>();
            posIncrAtt = tokensOut.AddAttribute<IPositionIncrementAttribute>();
            posLenAtt = tokensOut.AddAttribute<IPositionLengthAttribute>();
            offsetAtt = tokensOut.AddAttribute<IOffsetAttribute>();

            Verify("a b c", "a/bar b/fee c");

            // syn output extends beyond input tokens
            Verify("x a b c d", "x a/bar b/fee c/dog d/harness holder extras");

            Verify("a b a", "a/bar b/fee a/foo");

            // outputs that add to one another:
            Verify("c d c d", "c/dog d/harness c/holder/dog d/extras/harness holder extras");

            // two outputs for same input
            Verify("e f", "foo/baz bar/bee");

            // verify multi-word / single-output offsets:
            Verify("g i j k g", "g i/feep:7_3 j k g");

            // mixed keepOrig true/false:
            Verify("a m c e x", "a/foo dog barks loudly x");
            Verify("c d m c e x", "c/dog d/harness holder/dog extras/barks loudly x");
            assertTrue(tokensOut.CaptureCount > 0);

            // no captureStates when no syns matched
            Verify("p q r s t", "p q r s t");
            assertEquals(0, tokensOut.CaptureCount);

            // no captureStates when only single-input syns, w/ no
            // lookahead needed, matched
            Verify("p q z y t", "p q boo y/bee t");
            assertEquals(0, tokensOut.CaptureCount);
        }
コード例 #12
0
        public virtual void TestDoKeepOrig()
        {
            b = new SynonymMap.Builder(true);
            Add("a b", "foo", true);

            SynonymMap map = b.Build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);

            AssertAnalyzesTo(analyzer, "a b c", 
                            new string[] { "a", "foo", "b", "c" }, 
                            new int[] { 0, 0, 2, 4 }, 
                            new int[] { 1, 3, 3, 5 }, 
                            null, 
                            new int[] { 1, 0, 1, 1 }, 
                            new int[] { 1, 2, 1, 1 }, 
                            true);
            CheckAnalysisConsistency(Random(), analyzer, false, "a b c");
        }
コード例 #13
0
        public virtual void TestMultiwordOffsets()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = true;
            Add("national hockey league", "nhl", keepOrig);
            SynonymMap map = b.Build();
            Analyzer a = new AnalyzerAnonymousInnerClassHelper14(this, map);

            AssertAnalyzesTo(a, "national hockey league", new string[] { "national", "nhl", "hockey", "league" }, new int[] { 0, 0, 9, 16 }, new int[] { 8, 22, 15, 22 }, new int[] { 1, 0, 1, 1 });
        }
コード例 #14
0
        public virtual void TestIncludeOrig()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = true;
            Add("a b", "ab", keepOrig);
            Add("a c", "ac", keepOrig);
            Add("a", "aa", keepOrig);
            Add("b", "bb", keepOrig);
            Add("z x c v", "zxcv", keepOrig);
            Add("x c", "xc", keepOrig);
            SynonymMap map = b.Build();
            Analyzer a = new AnalyzerAnonymousInnerClassHelper11(this, map);

            AssertAnalyzesTo(a, "$", new string[] { "$" }, new int[] { 1 });
            AssertAnalyzesTo(a, "a", new string[] { "a", "aa" }, new int[] { 1, 0 });
            AssertAnalyzesTo(a, "a", new string[] { "a", "aa" }, new int[] { 1, 0 });
            AssertAnalyzesTo(a, "$ a", new string[] { "$", "a", "aa" }, new int[] { 1, 1, 0 });
            AssertAnalyzesTo(a, "a $", new string[] { "a", "aa", "$" }, new int[] { 1, 0, 1 });
            AssertAnalyzesTo(a, "$ a !", new string[] { "$", "a", "aa", "!" }, new int[] { 1, 1, 0, 1 });
            AssertAnalyzesTo(a, "a a", new string[] { "a", "aa", "a", "aa" }, new int[] { 1, 0, 1, 0 });
            AssertAnalyzesTo(a, "b", new string[] { "b", "bb" }, new int[] { 1, 0 });
            AssertAnalyzesTo(a, "z x c v", new string[] { "z", "zxcv", "x", "c", "v" }, new int[] { 1, 0, 1, 1, 1 });
            AssertAnalyzesTo(a, "z x c $", new string[] { "z", "x", "xc", "c", "$" }, new int[] { 1, 1, 0, 1, 1 });
        }
コード例 #15
0
        public virtual void TestOutputHangsOffEnd()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = false;
            // b hangs off the end (no input token under it):
            Add("a", "a b", keepOrig);
            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.Reset();
            assertTrue(tokensIn.IncrementToken());
            assertFalse(tokensIn.IncrementToken());
            tokensIn.End();
            tokensIn.Dispose();

            tokensOut = new SynonymFilter(tokensIn, b.Build(), true);
            termAtt = tokensOut.AddAttribute<ICharTermAttribute>();
            posIncrAtt = tokensOut.AddAttribute<IPositionIncrementAttribute>();
            offsetAtt = tokensOut.AddAttribute<IOffsetAttribute>();
            posLenAtt = tokensOut.AddAttribute<IPositionLengthAttribute>();

            // Make sure endOffset inherits from previous input token:
            Verify("a", "a b:1");
        }