Builds an FSTSynonymMap.

Call add() until you have added all the mappings, then call build() to get an FSTSynonymMap @lucene.experimental

예제 #1
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRecursion4() throws Exception
        public virtual void testRecursion4()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = true;
            add("zoo zoo", "zoo", keepOrig);
            add("zoo", "zoo zoo", keepOrig);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final SynonymMap map = b.build();
            SynonymMap map = b.build();
            Analyzer a = new AnalyzerAnonymousInnerClassHelper13(this, map);

            assertAnalyzesTo(a, "zoo zoo $ zoo", new string[] {"zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo"}, new int[] {1, 0, 1, 1, 1, 0, 1});
        }
예제 #2
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRepeatsOn() throws Exception
        public virtual void testRepeatsOn()
        {
            b = new SynonymMap.Builder(false);
            const bool keepOrig = false;
            add("a b", "ab", keepOrig);
            add("a b", "ab", keepOrig);
            add("a b", "ab", keepOrig);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final SynonymMap map = b.build();
            SynonymMap map = b.build();
            Analyzer a = new AnalyzerAnonymousInnerClassHelper8(this, map);

            assertAnalyzesTo(a, "a b", new string[] {"ab", "ab", "ab"}, new int[] {1, 0, 0});
        }
예제 #3
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandom() throws Exception
        public virtual void testRandom()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int alphabetSize = org.apache.lucene.util.TestUtil.nextInt(random(), 2, 7);
            int alphabetSize = TestUtil.Next(random(), 2, 7);

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int docLen = atLeast(3000);
            int docLen = atLeast(3000);
            //final int docLen = 50;

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String document = getRandomString('a', alphabetSize, docLen);
            string document = getRandomString('a', alphabetSize, docLen);

            if (VERBOSE)
            {
              Console.WriteLine("TEST: doc=" + document);
            }

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numSyn = atLeast(5);
            int numSyn = atLeast(5);
            //final int numSyn = 2;

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final java.util.Map<String,OneSyn> synMap = new java.util.HashMap<>();
            IDictionary<string, OneSyn> synMap = new Dictionary<string, OneSyn>();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final java.util.List<OneSyn> syns = new java.util.ArrayList<>();
            IList<OneSyn> syns = new List<OneSyn>();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final boolean dedup = random().nextBoolean();
            bool dedup = random().nextBoolean();
            if (VERBOSE)
            {
              Console.WriteLine("  dedup=" + dedup);
            }
            b = new SynonymMap.Builder(dedup);
            for (int synIDX = 0;synIDX < numSyn;synIDX++)
            {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String synIn = getRandomString('a', alphabetSize, org.apache.lucene.util.TestUtil.nextInt(random(), 1, 5)).trim();
              string synIn = getRandomString('a', alphabetSize, TestUtil.Next(random(), 1, 5)).Trim();
              OneSyn s = synMap[synIn];
              if (s == null)
              {
            s = new OneSyn();
            s.@in = synIn;
            syns.Add(s);
            s.@out = new List<>();
            synMap[synIn] = s;
            s.keepOrig = random().nextBoolean();
              }
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String synOut = getRandomString('0', 10, org.apache.lucene.util.TestUtil.nextInt(random(), 1, 5)).trim();
              string synOut = getRandomString('0', 10, TestUtil.Next(random(), 1, 5)).Trim();
              [email protected](synOut);
              add(synIn, synOut, s.keepOrig);
              if (VERBOSE)
              {
            Console.WriteLine("  syns[" + synIDX + "] = " + s.@in + " -> " + s.@out + " keepOrig=" + s.keepOrig);
              }
            }

            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.reset();
            assertTrue(tokensIn.incrementToken());
            assertFalse(tokensIn.incrementToken());
            tokensIn.end();
            tokensIn.close();

            tokensOut = new SynonymFilter(tokensIn, b.build(), true);
            termAtt = tokensOut.addAttribute(typeof(CharTermAttribute));
            posIncrAtt = tokensOut.addAttribute(typeof(PositionIncrementAttribute));
            posLenAtt = tokensOut.addAttribute(typeof(PositionLengthAttribute));
            offsetAtt = tokensOut.addAttribute(typeof(OffsetAttribute));

            if (dedup)
            {
              pruneDups(syns);
            }

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String expected = slowSynMatcher(document, syns, 5);
            string expected = slowSynMatcher(document, syns, 5);

            if (VERBOSE)
            {
              Console.WriteLine("TEST: expected=" + expected);
            }

            verify(document, expected);
        }
예제 #4
0
        /// <summary>
        /// simple random test like testRandom2, but for larger docs
        /// </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandomHuge() throws Exception
        public virtual void testRandomHuge()
        {
            Random random = random();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numIters = atLeast(3);
            int numIters = atLeast(3);
            for (int i = 0; i < numIters; i++)
            {
              b = new SynonymMap.Builder(random.nextBoolean());
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numEntries = atLeast(10);
              int numEntries = atLeast(10);
              if (VERBOSE)
              {
            Console.WriteLine("TEST: iter=" + i + " numEntries=" + numEntries);
              }
              for (int j = 0; j < numEntries; j++)
              {
            add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
              }
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final SynonymMap map = b.build();
              SynonymMap map = b.build();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final boolean ignoreCase = random.nextBoolean();
              bool ignoreCase = random.nextBoolean();

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.Analyzer analyzer = new org.apache.lucene.analysis.Analyzer()
              Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper4(this, map, ignoreCase);

              checkRandomData(random, analyzer, 100, 1024);
            }
        }
예제 #5
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testMultiwordOffsets() throws Exception
        public virtual void testMultiwordOffsets()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = true;
            add("national hockey league", "nhl", keepOrig);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final SynonymMap map = b.build();
            SynonymMap map = b.build();
            Analyzer a = new AnalyzerAnonymousInnerClassHelper14(this, map);

            assertAnalyzesTo(a, "national hockey league", new string[] {"national", "nhl", "hockey", "league"}, new int[] {0, 0, 9, 16}, new int[] {8, 22, 15, 22}, new int[] {1, 0, 1, 1});
        }
예제 #6
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testOutputHangsOffEnd() throws Exception
        public virtual void testOutputHangsOffEnd()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = false;
            // b hangs off the end (no input token under it):
            add("a", "a b", keepOrig);
            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.reset();
            assertTrue(tokensIn.incrementToken());
            assertFalse(tokensIn.incrementToken());
            tokensIn.end();
            tokensIn.close();

            tokensOut = new SynonymFilter(tokensIn, b.build(), true);
            termAtt = tokensOut.addAttribute(typeof(CharTermAttribute));
            posIncrAtt = tokensOut.addAttribute(typeof(PositionIncrementAttribute));
            offsetAtt = tokensOut.addAttribute(typeof(OffsetAttribute));
            posLenAtt = tokensOut.addAttribute(typeof(PositionLengthAttribute));

            // Make sure endOffset inherits from previous input token:
            verify("a", "a b:1");
        }
예제 #7
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testMatching() throws Exception
        public virtual void testMatching()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = false;
            add("a b", "ab", keepOrig);
            add("a c", "ac", keepOrig);
            add("a", "aa", keepOrig);
            add("b", "bb", keepOrig);
            add("z x c v", "zxcv", keepOrig);
            add("x c", "xc", keepOrig);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final SynonymMap map = b.build();
            SynonymMap map = b.build();
            Analyzer a = new AnalyzerAnonymousInnerClassHelper6(this, map);

            checkOneTerm(a, "$", "$");
            checkOneTerm(a, "a", "aa");
            checkOneTerm(a, "b", "bb");

            assertAnalyzesTo(a, "a $", new string[] {"aa", "$"}, new int[] {1, 1});

            assertAnalyzesTo(a, "$ a", new string[] {"$", "aa"}, new int[] {1, 1});

            assertAnalyzesTo(a, "a a", new string[] {"aa", "aa"}, new int[] {1, 1});

            assertAnalyzesTo(a, "z x c v", new string[] {"zxcv"}, new int[] {1});

            assertAnalyzesTo(a, "z x c $", new string[] {"z", "xc", "$"}, new int[] {1, 1, 1});
        }
예제 #8
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
        public virtual void testEmptyTerm()
        {
            Random random = random();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numIters = atLeast(10);
            int numIters = atLeast(10);
            for (int i = 0; i < numIters; i++)
            {
              b = new SynonymMap.Builder(random.nextBoolean());
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numEntries = atLeast(10);
              int numEntries = atLeast(10);
              for (int j = 0; j < numEntries; j++)
              {
            add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
              }
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final SynonymMap map = b.build();
              SynonymMap map = b.build();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final boolean ignoreCase = random.nextBoolean();
              bool ignoreCase = random.nextBoolean();

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.Analyzer analyzer = new org.apache.lucene.analysis.Analyzer()
              Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map, ignoreCase);

              checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
            }
        }
예제 #9
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testDontKeepOrig() throws Exception
        public virtual void testDontKeepOrig()
        {
            b = new SynonymMap.Builder(true);
            add("a b", "foo", false);

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final SynonymMap map = b.build();
            SynonymMap map = b.build();

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.Analyzer analyzer = new org.apache.lucene.analysis.Analyzer()
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map);

            assertAnalyzesTo(analyzer, "a b c", new string[] {"foo", "c"}, new int[] {0, 4}, new int[] {3, 5}, null, new int[] {1, 1}, new int[] {1, 1}, true);
            checkAnalysisConsistency(random(), analyzer, false, "a b c");
        }
예제 #10
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testBasic2() throws Exception
        public virtual void testBasic2()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = false;
            add("aaa", "aaaa1 aaaa2 aaaa3", keepOrig);
            add("bbb", "bbbb1 bbbb2", keepOrig);
            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.reset();
            assertTrue(tokensIn.incrementToken());
            assertFalse(tokensIn.incrementToken());
            tokensIn.end();
            tokensIn.close();

            tokensOut = new SynonymFilter(tokensIn, b.build(), true);
            termAtt = tokensOut.addAttribute(typeof(CharTermAttribute));
            posIncrAtt = tokensOut.addAttribute(typeof(PositionIncrementAttribute));
            posLenAtt = tokensOut.addAttribute(typeof(PositionLengthAttribute));
            offsetAtt = tokensOut.addAttribute(typeof(OffsetAttribute));

            if (keepOrig)
            {
              verify("xyzzy bbb pot of gold", "xyzzy bbb/bbbb1 pot/bbbb2 of gold");
              verify("xyzzy aaa pot of gold", "xyzzy aaa/aaaa1 pot/aaaa2 of/aaaa3 gold");
            }
            else
            {
              verify("xyzzy bbb pot of gold", "xyzzy bbbb1 pot/bbbb2 of gold");
              verify("xyzzy aaa pot of gold", "xyzzy aaaa1 pot/aaaa2 of/aaaa3 gold");
            }
        }
예제 #11
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testBasic() throws Exception
        public virtual void testBasic()
        {
            b = new SynonymMap.Builder(true);
            add("a", "foo", true);
            add("a b", "bar fee", true);
            add("b c", "dog collar", true);
            add("c d", "dog harness holder extras", true);
            add("m c e", "dog barks loudly", false);
            add("i j k", "feep", true);

            add("e f", "foo bar", false);
            add("e f", "baz bee", false);

            add("z", "boo", false);
            add("y", "bee", true);

            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.reset();
            assertTrue(tokensIn.incrementToken());
            assertFalse(tokensIn.incrementToken());
            tokensIn.end();
            tokensIn.close();

            tokensOut = new SynonymFilter(tokensIn, b.build(), true);
            termAtt = tokensOut.addAttribute(typeof(CharTermAttribute));
            posIncrAtt = tokensOut.addAttribute(typeof(PositionIncrementAttribute));
            posLenAtt = tokensOut.addAttribute(typeof(PositionLengthAttribute));
            offsetAtt = tokensOut.addAttribute(typeof(OffsetAttribute));

            verify("a b c", "a/bar b/fee c");

            // syn output extends beyond input tokens
            verify("x a b c d", "x a/bar b/fee c/dog d/harness holder extras");

            verify("a b a", "a/bar b/fee a/foo");

            // outputs that add to one another:
            verify("c d c d", "c/dog d/harness c/holder/dog d/extras/harness holder extras");

            // two outputs for same input
            verify("e f", "foo/baz bar/bee");

            // verify multi-word / single-output offsets:
            verify("g i j k g", "g i/feep:7_3 j k g");

            // mixed keepOrig true/false:
            verify("a m c e x", "a/foo dog barks loudly x");
            verify("c d m c e x", "c/dog d/harness holder/dog extras/barks loudly x");
            assertTrue(tokensOut.CaptureCount > 0);

            // no captureStates when no syns matched
            verify("p q r s t", "p q r s t");
            assertEquals(0, tokensOut.CaptureCount);

            // no captureStates when only single-input syns, w/ no
            // lookahead needed, matched
            verify("p q z y t", "p q boo y/bee t");
            assertEquals(0, tokensOut.CaptureCount);
        }