Ejemplo n.º 1
0
        public virtual void TestCrossPlaneNormalization2()
        {
            var analyzer = new AnalyzerAnonymousInnerClassHelper2();
            var num      = 1000 * RANDOM_MULTIPLIER;

            for (var i = 0; i < num; i++)
            {
                var s  = TestUtil.RandomUnicodeString(Random);
                var ts = analyzer.GetTokenStream("foo", s);
                try
                {
                    ts.Reset();
                    var offsetAtt = ts.AddAttribute <IOffsetAttribute>();
                    while (ts.IncrementToken())
                    {
                        string highlightedText = s.Substring(offsetAtt.StartOffset, offsetAtt.EndOffset - offsetAtt.StartOffset);
                        for (int j = 0, cp = 0; j < highlightedText.Length; j += Character.CharCount(cp))
                        {
                            cp = char.ConvertToUtf32(highlightedText, j);
                            assertTrue("non-letter:" + cp.ToString("x"), Character.IsLetter(cp));
                        }
                    }
                    ts.End();
                }
                finally
                {
                    IOUtils.DisposeWhileHandlingException(ts);
                }
            }
            // just for fun
            CheckRandomData(Random, analyzer, num);
        }
Ejemplo n.º 2
0
        public virtual void TestTokenReuse()
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this);

            IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));

            Document doc = new Document();

            doc.Add(NewTextField("f1", "a 5 a a", Field.Store.YES));

            writer.AddDocument(doc);
            writer.Commit();
            SegmentCommitInfo info = writer.NewestSegment();

            writer.Dispose();
            SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random));

            DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, reader.LiveDocs, "f1", new BytesRef("a"));

            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            int freq = termPositions.Freq;

            Assert.AreEqual(3, freq);
            Assert.AreEqual(0, termPositions.NextPosition());
            Assert.IsNotNull(termPositions.GetPayload());
            Assert.AreEqual(6, termPositions.NextPosition());
            Assert.IsNull(termPositions.GetPayload());
            Assert.AreEqual(7, termPositions.NextPosition());
            Assert.IsNull(termPositions.GetPayload());
            reader.Dispose();
        }
Ejemplo n.º 3
0
        public virtual void TestRandomHugeStrings()
        {
            Random   random = Random;
            Analyzer a      = new AnalyzerAnonymousInnerClassHelper2(this);

            CheckRandomData(random, a, 100 * RANDOM_MULTIPLIER, 1027);
        }
Ejemplo n.º 4
0
 public virtual void TestCrossPlaneNormalization2()
 {
     var analyzer = new AnalyzerAnonymousInnerClassHelper2();
     var num = 1000 * RANDOM_MULTIPLIER;
     for (var i = 0; i < num; i++)
     {
         var s = TestUtil.RandomUnicodeString(Random());
         var ts = analyzer.TokenStream("foo", s);
         try
         {
             ts.Reset();
             var offsetAtt = ts.AddAttribute<IOffsetAttribute>();
             while (ts.IncrementToken())
             {
                 string highlightedText = s.Substring(offsetAtt.StartOffset(), offsetAtt.EndOffset() - offsetAtt.StartOffset());
                 for (int j = 0, cp = 0; j < highlightedText.Length; j += Character.CharCount(cp))
                 {
                     cp = char.ConvertToUtf32(highlightedText, j);
                     assertTrue("non-letter:" + cp.ToString("x"), Character.IsLetter(cp));
                 }
             }
             ts.End();
         }
         finally
         {
             IOUtils.CloseWhileHandlingException(ts);
         }
     }
     // just for fun
     CheckRandomData(Random(), analyzer, num);
 }
Ejemplo n.º 5
0
        public virtual void TestRandomHugeStrings()
        {
            Random   random = Random;
            Analyzer a      = new AnalyzerAnonymousInnerClassHelper2(this);

            CheckRandomData(random, a, 100 * RandomMultiplier, 8192);
        }
        public virtual void TestKeyword()
        {
            CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("sängerinnen"), false);
            Analyzer     a            = new AnalyzerAnonymousInnerClassHelper2(this, exclusionSet);

            CheckOneTerm(a, "sängerinnen", "sängerinnen");
        }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testKeyword() throws java.io.IOException
 public virtual void testKeyword()
 {
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet exclusionSet = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false);
     CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false);
     Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, exclusionSet);
     checkOneTerm(a, "quilométricas", "quilométricas");
 }
        /// <summary>
        /// blast some random strings through the analyzer </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
            checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);

            Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this);
            checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
        }
Ejemplo n.º 9
0
        public virtual void TestCuriousWikipediaString()
        {
            CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet <string>(Arrays.AsList("rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha")), false);

            sbyte[]  table = new sbyte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20 };
            Analyzer a     = new AnalyzerAnonymousInnerClassHelper2(this, protWords, table);

            CheckAnalysisConsistency(Random(), a, false, "B\u28c3\ue0f8[ \ud800\udfc2 </p> jb");
        }
Ejemplo n.º 10
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testKeyword() throws java.io.IOException
        public virtual void testKeyword()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet exclusionSet = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlens"), false);
            CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlens"), false);
            Analyzer     a            = new AnalyzerAnonymousInnerClassHelper2(this, exclusionSet);

            checkOneTerm(a, "jaktkarlens", "jaktkarlens");
        }
Ejemplo n.º 11
0
 public TokenFilterAnonymousInnerClassHelper(AnalyzerAnonymousInnerClassHelper2 outerInstance, Tokenizer tokenizer)
     : base(tokenizer)
 {
     this.OuterInstance = outerInstance;
     first      = true;
     termAtt    = AddAttribute <ICharTermAttribute>();
     payloadAtt = AddAttribute <IPayloadAttribute>();
     posIncrAtt = AddAttribute <IPositionIncrementAttribute>();
 }
Ejemplo n.º 12
0
        public virtual void TestRandomStrings()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);

            CheckRandomData(Random, a, 1000 * RandomMultiplier);

            Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this);

            CheckRandomData(Random, b, 1000 * RandomMultiplier);
        }
Ejemplo n.º 13
0
        public virtual void TestRandomStrings()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);

            CheckRandomData(Random, a, 1000 * RANDOM_MULTIPLIER);

            Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this);

            CheckRandomData(Random, b, 1000 * RANDOM_MULTIPLIER);
        }
Ejemplo n.º 14
0
 public virtual void TestRandomStrings()
 {
     for (int i = 0; i < 10; i++)
     {
         int      min = TestUtil.NextInt32(Random, 2, 10);
         int      max = TestUtil.NextInt32(Random, min, 20);
         Analyzer a   = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
         CheckRandomData(Random, a, 200 * RANDOM_MULTIPLIER, 20);
     }
 }
Ejemplo n.º 15
0
        public virtual void TestRandomRegexps()
        {
            int iters = AtLeast(30);

            for (int i = 0; i < iters; i++)
            {
                CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.RandomAutomaton(Random()));
                bool     lowercase        = Random().NextBoolean();
                int      limit            = TestUtil.NextInt(Random(), 0, 500);
                Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dfa, lowercase, limit);
                CheckRandomData(Random(), a, 100);
                a.Dispose();
            }
        }
Ejemplo n.º 16
0
        /// <summary>
        /// blast some random strings through the analyzer </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
            for (int i = 0; i < 10; i++)
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int min = org.apache.lucene.util.TestUtil.nextInt(random(), 2, 10);
                int min = TestUtil.Next(random(), 2, 10);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int max = org.apache.lucene.util.TestUtil.nextInt(random(), min, 20);
                int      max = TestUtil.Next(random(), min, 20);
                Analyzer a   = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
                checkRandomData(random(), a, 200 * RANDOM_MULTIPLIER, 20);
            }
        }
        public virtual void TestRandomStrings()
        {
            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            Analyzer     a    = new AnalyzerAnonymousInnerClassHelper2(this, dict);

            CheckRandomData(Random, a, 1000 * RANDOM_MULTIPLIER);

            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
            {
                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
                Analyzer        b          = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);
                CheckRandomData(Random, b, 1000 * RANDOM_MULTIPLIER);
            }
        }
Ejemplo n.º 18
0
        public virtual void TestFinalOffsetSpecialCase()
        {
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.Add("t", "");
            // even though this below rule has no effect, the test passes if you remove it!!
            builder.Add("tmakdbl", "c");

            NormalizeCharMap map = builder.Build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);

            string text = "gzw f quaxot";

            CheckAnalysisConsistency(Random(), analyzer, false, text);
        }
        public virtual void TestRandomStrings()
        {
            for (int i = 0; i < 10; i++)
            {
                int min = TestUtil.NextInt32(Random, 2, 10);
                int max = TestUtil.NextInt32(Random, min, 20);

                Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
                CheckRandomData(Random, a, 100 * RandomMultiplier);
            }

            Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this);

            CheckRandomData(Random, b, 1000 * RandomMultiplier, 20, false, false);
        }
Ejemplo n.º 20
0
        public virtual void TestRandomStrings()
        {
            for (int i = 0; i < 10; i++)
            {
                int min = TestUtil.NextInt(Random(), 2, 10);
                int max = TestUtil.NextInt(Random(), min, 20);

                Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
                CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER);
            }

            Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this);

            CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
        }
Ejemplo n.º 21
0
        public virtual void TestEscapedStuff()
        {
            string            testFile = "a\\=>a => b\\=>b\n" + "a\\,a => b\\,b";
            SolrSynonymParser parser   = new SolrSynonymParser(true, true, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false));

            parser.Parse(new StringReader(testFile));
            SynonymMap map      = parser.Build();
            Analyzer   analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);

            AssertAnalyzesTo(analyzer, "ball", new string[] { "ball" }, new int[] { 1 });

            AssertAnalyzesTo(analyzer, "a=>a", new string[] { "b=>b" }, new int[] { 1 });

            AssertAnalyzesTo(analyzer, "a,a", new string[] { "b,b" }, new int[] { 1 });
        }
Ejemplo n.º 22
0
        /// <summary>
        /// parse a syn file with some escaped syntax chars </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testEscapedStuff() throws Exception
        public virtual void testEscapedStuff()
        {
            string testFile = "a\\=>a => b\\=>b\n" + "a\\,a => b\\,b";
            SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
            parser.parse(new StringReader(testFile));
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final SynonymMap map = parser.build();
            SynonymMap map = parser.build();
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);

            assertAnalyzesTo(analyzer, "ball", new string[] {"ball"}, new int[] {1});

            assertAnalyzesTo(analyzer, "a=>a", new string[] {"b=>b"}, new int[] {1});

            assertAnalyzesTo(analyzer, "a,a", new string[] {"b,b"}, new int[] {1});
        }
Ejemplo n.º 23
0
        public virtual void TestMockGraphTokenFilterOnGraphInput()
        {
            for (int iter = 0; iter < 100 * RANDOM_MULTIPLIER; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter);
                }

                // Make new analyzer each time, because MGTF has fixed
                // seed:
                Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);

                CheckAnalysisConsistency(Random, a, false, "a/x:3 c/y:2 d e f/z:4 g h i j k");
            }
        }
Ejemplo n.º 24
0
        public virtual void TestLetterAsciiHuge()
        {
            Random       random    = Random;
            int          maxLength = 8192; // CharTokenizer.IO_BUFFER_SIZE*2
            MockAnalyzer left      = new MockAnalyzer(random, jvmLetter, false);

            left.MaxTokenLength = 255; // match CharTokenizer's max token length
            Analyzer right         = new AnalyzerAnonymousInnerClassHelper2(this);
            int      numIterations = AtLeast(50);

            for (int i = 0; i < numIterations; i++)
            {
                string s = TestUtil.RandomSimpleString(random, maxLength);
                assertEquals(s, left.GetTokenStream("foo", newStringReader(s)), right.GetTokenStream("foo", newStringReader(s)));
            }
        }
Ejemplo n.º 25
0
        /// <summary>
        /// parse a syn file with some escaped syntax chars </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testEscapedStuff() throws Exception
        public virtual void testEscapedStuff()
        {
            string            testFile = "a\\=>a => b\\=>b\n" + "a\\,a => b\\,b";
            SolrSynonymParser parser   = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));

            parser.parse(new StringReader(testFile));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final SynonymMap map = parser.build();
            SynonymMap map      = parser.build();
            Analyzer   analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);

            assertAnalyzesTo(analyzer, "ball", new string[] { "ball" }, new int[] { 1 });

            assertAnalyzesTo(analyzer, "a=>a", new string[] { "b=>b" }, new int[] { 1 });

            assertAnalyzesTo(analyzer, "a,a", new string[] { "b,b" }, new int[] { 1 });
        }
Ejemplo n.º 26
0
        /// <summary>
        /// blast some random strings through the analyzer </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            Analyzer     a    = new AnalyzerAnonymousInnerClassHelper2(this, dict);

            checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);

            InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
            Analyzer        b          = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);

            checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
        }
Ejemplo n.º 27
0
        public virtual void TestPositionIncrements()
        {
            WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS
                                       | WordDelimiterFlags.GENERATE_NUMBER_PARTS
                                       | WordDelimiterFlags.CATENATE_ALL
                                       | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE
                                       | WordDelimiterFlags.SPLIT_ON_NUMERICS
                                       | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE;

            CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "NUTCH" }, false);

            /* analyzer that uses whitespace + wdf */
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords);

            /* in this case, works as expected. */
            AssertAnalyzesTo(a, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 1 });

            /* only in this case, posInc of 2 ?! */
            AssertAnalyzesTo(a, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 1, 0, 1 });

            AssertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 1, 1 });

            /* analyzer that will consume tokens with large position increments */
            Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords);

            /* increment of "largegap" is preserved */
            AssertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] { "LUCENE", "largegap", "SOLR" }, new int[] { 0, 7, 16 }, new int[] { 6, 15, 20 }, new int[] { 1, 10, 1 });

            /* the "/" had a position increment of 10, where did it go?!?!! */
            AssertAnalyzesTo(a2, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 11 });

            /* in this case, the increment of 10 from the "/" is carried over */
            AssertAnalyzesTo(a2, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 11, 0, 1 });

            AssertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 11, 1 });

            Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords);

            AssertAnalyzesTo(a3, "lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 0, 0, 7 }, new int[] { 6, 11, 11 }, new int[] { 1, 0, 1 });

            /* the stopword should add a gap here */
            AssertAnalyzesTo(a3, "the lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 4, 4, 11 }, new int[] { 10, 15, 15 }, new int[] { 2, 0, 1 });
        }
Ejemplo n.º 28
0
        public virtual void TestDoKeepOrig()
        {
            b = new SynonymMap.Builder(true);
            Add("a b", "foo", true);

            SynonymMap map = b.Build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);

            AssertAnalyzesTo(analyzer, "a b c",
                             new string[] { "a", "foo", "b", "c" },
                             new int[] { 0, 0, 2, 4 },
                             new int[] { 1, 3, 3, 5 },
                             null,
                             new int[] { 1, 0, 1, 1 },
                             new int[] { 1, 2, 1, 1 },
                             true);
            CheckAnalysisConsistency(Random(), analyzer, false, "a b c");
        }
Ejemplo n.º 29
0
        //@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testFinalOffsetSpecialCase() throws Exception
        public virtual void testFinalOffsetSpecialCase()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("t", "");
            // even though this below rule has no effect, the test passes if you remove it!!
            builder.add("tmakdbl", "c");

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final NormalizeCharMap map = builder.build();
            NormalizeCharMap map = builder.build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);

            string text = "gzw f quaxot";

            checkAnalysisConsistency(random(), analyzer, false, text);
        }
Ejemplo n.º 30
0
//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
//ORIGINAL LINE: @Test public void testPositionIncrements() throws Exception
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        public virtual void testPositionIncrements()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
            int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet protWords = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("NUTCH")), false);
            CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet <>("NUTCH"), false);

            /* analyzer that uses whitespace + wdf */
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords);

            /* in this case, works as expected. */
            assertAnalyzesTo(a, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 1 });

            /* only in this case, posInc of 2 ?! */
            assertAnalyzesTo(a, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 1, 0, 1 });

            assertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 1, 1 });

            /* analyzer that will consume tokens with large position increments */
            Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords);

            /* increment of "largegap" is preserved */
            assertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] { "LUCENE", "largegap", "SOLR" }, new int[] { 0, 7, 16 }, new int[] { 6, 15, 20 }, new int[] { 1, 10, 1 });

            /* the "/" had a position increment of 10, where did it go?!?!! */
            assertAnalyzesTo(a2, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 11 });

            /* in this case, the increment of 10 from the "/" is carried over */
            assertAnalyzesTo(a2, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 11, 0, 1 });

            assertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 11, 1 });

            Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords);

            assertAnalyzesTo(a3, "lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 0, 0, 7 }, new int[] { 6, 11, 11 }, new int[] { 1, 0, 1 });

            /* the stopword should add a gap here */
            assertAnalyzesTo(a3, "the lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 4, 4, 11 }, new int[] { 10, 15, 15 }, new int[] { 2, 0, 1 });
        }
Ejemplo n.º 31
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testCommonGramsFilter() throws Exception
        public virtual void testCommonGramsFilter()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);

            // Stop words used below are "of" "the" and "s"
            // one word queries
            assertAnalyzesTo(a, "the", new string[] {"the"});
            assertAnalyzesTo(a, "foo", new string[] {"foo"});

            // two word queries
            assertAnalyzesTo(a, "brown fox", new string[] {"brown", "fox"}, new int[] {1, 1});
            assertAnalyzesTo(a, "the fox", new string[] {"the", "the_fox", "fox"}, new int[] {1, 0, 1});
            assertAnalyzesTo(a, "fox of", new string[] {"fox", "fox_of", "of"}, new int[] {1, 0, 1});
            assertAnalyzesTo(a, "of the", new string[] {"of", "of_the", "the"}, new int[] {1, 0, 1});

            // 3 word combinations s=stopword/common word n=not a stop word
            assertAnalyzesTo(a, "n n n", new string[] {"n", "n", "n"}, new int[] {1, 1, 1});
            assertAnalyzesTo(a, "quick brown fox", new string[] {"quick", "brown", "fox"}, new int[] {1, 1, 1});

            assertAnalyzesTo(a, "n n s", new string[] {"n", "n", "n_s", "s"}, new int[] {1, 1, 0, 1});
            assertAnalyzesTo(a, "quick brown the", new string[] {"quick", "brown", "brown_the", "the"}, new int[] {1, 1, 0, 1});

            assertAnalyzesTo(a, "n s n", new string[] {"n", "n_s", "s", "s_n", "n"}, new int[] {1, 0, 1, 0, 1});
            assertAnalyzesTo(a, "quick the fox", new string[] {"quick", "quick_the", "the", "the_fox", "fox"}, new int[] {1, 0, 1, 0, 1});

            assertAnalyzesTo(a, "n s s", new string[] {"n", "n_s", "s", "s_s", "s"}, new int[] {1, 0, 1, 0, 1});
            assertAnalyzesTo(a, "fox of the", new string[] {"fox", "fox_of", "of", "of_the", "the"}, new int[] {1, 0, 1, 0, 1});

            assertAnalyzesTo(a, "s n n", new string[] {"s", "s_n", "n", "n"}, new int[] {1, 0, 1, 1});
            assertAnalyzesTo(a, "the quick brown", new string[] {"the", "the_quick", "quick", "brown"}, new int[] {1, 0, 1, 1});

            assertAnalyzesTo(a, "s n s", new string[] {"s", "s_n", "n", "n_s", "s"}, new int[] {1, 0, 1, 0, 1});
            assertAnalyzesTo(a, "the fox of", new string[] {"the", "the_fox", "fox", "fox_of", "of"}, new int[] {1, 0, 1, 0, 1});

            assertAnalyzesTo(a, "s s n", new string[] {"s", "s_s", "s", "s_n", "n"}, new int[] {1, 0, 1, 0, 1});
            assertAnalyzesTo(a, "of the fox", new string[] {"of", "of_the", "the", "the_fox", "fox"}, new int[] {1, 0, 1, 0, 1});

            assertAnalyzesTo(a, "s s s", new string[] {"s", "s_s", "s", "s_s", "s"}, new int[] {1, 0, 1, 0, 1});
            assertAnalyzesTo(a, "of the of", new string[] {"of", "of_the", "the", "the_of", "of"}, new int[] {1, 0, 1, 0, 1});
        }
        public virtual void TestCommonGramsFilter()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);

            // Stop words used below are "of" "the" and "s"
            // one word queries
            AssertAnalyzesTo(a, "the", new string[] { "the" });
            AssertAnalyzesTo(a, "foo", new string[] { "foo" });

            // two word queries
            AssertAnalyzesTo(a, "brown fox", new string[] { "brown", "fox" }, new int[] { 1, 1 });
            AssertAnalyzesTo(a, "the fox", new string[] { "the", "the_fox", "fox" }, new int[] { 1, 0, 1 });
            AssertAnalyzesTo(a, "fox of", new string[] { "fox", "fox_of", "of" }, new int[] { 1, 0, 1 });
            AssertAnalyzesTo(a, "of the", new string[] { "of", "of_the", "the" }, new int[] { 1, 0, 1 });

            // 3 word combinations s=stopword/common word n=not a stop word
            AssertAnalyzesTo(a, "n n n", new string[] { "n", "n", "n" }, new int[] { 1, 1, 1 });
            AssertAnalyzesTo(a, "quick brown fox", new string[] { "quick", "brown", "fox" }, new int[] { 1, 1, 1 });

            AssertAnalyzesTo(a, "n n s", new string[] { "n", "n", "n_s", "s" }, new int[] { 1, 1, 0, 1 });
            AssertAnalyzesTo(a, "quick brown the", new string[] { "quick", "brown", "brown_the", "the" }, new int[] { 1, 1, 0, 1 });

            AssertAnalyzesTo(a, "n s n", new string[] { "n", "n_s", "s", "s_n", "n" }, new int[] { 1, 0, 1, 0, 1 });
            AssertAnalyzesTo(a, "quick the fox", new string[] { "quick", "quick_the", "the", "the_fox", "fox" }, new int[] { 1, 0, 1, 0, 1 });

            AssertAnalyzesTo(a, "n s s", new string[] { "n", "n_s", "s", "s_s", "s" }, new int[] { 1, 0, 1, 0, 1 });
            AssertAnalyzesTo(a, "fox of the", new string[] { "fox", "fox_of", "of", "of_the", "the" }, new int[] { 1, 0, 1, 0, 1 });

            AssertAnalyzesTo(a, "s n n", new string[] { "s", "s_n", "n", "n" }, new int[] { 1, 0, 1, 1 });
            AssertAnalyzesTo(a, "the quick brown", new string[] { "the", "the_quick", "quick", "brown" }, new int[] { 1, 0, 1, 1 });

            AssertAnalyzesTo(a, "s n s", new string[] { "s", "s_n", "n", "n_s", "s" }, new int[] { 1, 0, 1, 0, 1 });
            AssertAnalyzesTo(a, "the fox of", new string[] { "the", "the_fox", "fox", "fox_of", "of" }, new int[] { 1, 0, 1, 0, 1 });

            AssertAnalyzesTo(a, "s s n", new string[] { "s", "s_s", "s", "s_n", "n" }, new int[] { 1, 0, 1, 0, 1 });
            AssertAnalyzesTo(a, "of the fox", new string[] { "of", "of_the", "the", "the_fox", "fox" }, new int[] { 1, 0, 1, 0, 1 });

            AssertAnalyzesTo(a, "s s s", new string[] { "s", "s_s", "s", "s_s", "s" }, new int[] { 1, 0, 1, 0, 1 });
            AssertAnalyzesTo(a, "of the of", new string[] { "of", "of_the", "the", "the_of", "of" }, new int[] { 1, 0, 1, 0, 1 });
        }
Ejemplo n.º 33
0
	  // LUCENE-3642: normalize BMP->SMP and check that offsets are correct
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testCrossPlaneNormalization2() throws java.io.IOException
	  public virtual void testCrossPlaneNormalization2()
	  {
		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this);
		int num = 1000 * RANDOM_MULTIPLIER;
		for (int i = 0; i < num; i++)
		{
		  string s = TestUtil.randomUnicodeString(random());
		  TokenStream ts = analyzer.tokenStream("foo", s);
		  try
		  {
			ts.reset();
			OffsetAttribute offsetAtt = ts.addAttribute(typeof(OffsetAttribute));
			while (ts.incrementToken())
			{
			  string highlightedText = StringHelperClass.SubstringSpecial(s, offsetAtt.startOffset(), offsetAtt.endOffset());
			  for (int j = 0, cp = 0; j < highlightedText.Length; j += char.charCount(cp))
			  {
				cp = char.ConvertToUtf32(highlightedText, j);
				assertTrue("non-letter:" + cp.ToString("x"), char.IsLetter(cp));
			  }
			}
			ts.end();
		  }
		  finally
		  {
			IOUtils.closeWhileHandlingException(ts);
		  }
		}
		// just for fun
		checkRandomData(random(), analyzer, num);
	  }
        public virtual void TestDoKeepOrig()
        {
            b = new SynonymMap.Builder(true);
            Add("a b", "foo", true);

            SynonymMap map = b.Build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);

            AssertAnalyzesTo(analyzer, "a b c", 
                            new string[] { "a", "foo", "b", "c" }, 
                            new int[] { 0, 0, 2, 4 }, 
                            new int[] { 1, 3, 3, 5 }, 
                            null, 
                            new int[] { 1, 0, 1, 1 }, 
                            new int[] { 1, 2, 1, 1 }, 
                            true);
            CheckAnalysisConsistency(Random(), analyzer, false, "a b c");
        }
Ejemplo n.º 35
0
			  public LetterTokenizerAnonymousInnerClassHelper2(AnalyzerAnonymousInnerClassHelper2 outerInstance, UnknownType TEST_VERSION_CURRENT, Reader reader) : base(TEST_VERSION_CURRENT, reader)
			  {
				  this.outerInstance = outerInstance;
			  }
 public virtual void TestKeyword()
 {
     CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("quilométricas"), false);
     Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, exclusionSet);
     CheckOneTerm(a, "quilométricas", "quilométricas");
 }
 public virtual void TestRandomHugeStrings()
 {
     Random random = Random();
     Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
     CheckRandomData(random, a, 100 * RANDOM_MULTIPLIER, 8192);
 }
Ejemplo n.º 38
0
        public virtual void TestEmptyTerm()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper2();

            CheckOneTerm(a, "", "");
        }
        public virtual void TestRandomStrings()
        {
            for (int i = 0; i < 10; i++)
            {
                int min = TestUtil.NextInt(Random(), 2, 10);
                int max = TestUtil.NextInt(Random(), min, 20);

                Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max);
                CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER, 20);
                CheckRandomData(Random(), a, 10 * RANDOM_MULTIPLIER, 8192);
            }

            Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this);
            CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
            CheckRandomData(Random(), b, 100 * RANDOM_MULTIPLIER, 8192, false, false);
        }
Ejemplo n.º 40
0
        public virtual void TestMockGraphTokenFilterOnGraphInput()
        {
            for (int iter = 0; iter < 100 * RANDOM_MULTIPLIER; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter);
                }

                // Make new analyzer each time, because MGTF has fixed
                // seed:
                Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);

                CheckAnalysisConsistency(Random(), a, false, "a/x:3 c/y:2 d e f/z:4 g h i j k");
            }
        }
Ejemplo n.º 41
0
        // NOTE: this is an invalid test... SynFilter today can't
        // properly consume a graph... we can re-enable this once
        // we fix that...
        /*
          // Adds MockGraphTokenFilter before SynFilter:
          public void testRandom2GraphBefore() throws Exception {
        final int numIters = atLeast(10);
        Random random = random();
        for (int i = 0; i < numIters; i++) {
          b = new SynonymMap.Builder(random.nextBoolean());
          final int numEntries = atLeast(10);
          for (int j = 0; j < numEntries; j++) {
            add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
          }
          final SynonymMap map = b.build();
          final boolean ignoreCase = random.nextBoolean();

          final Analyzer analyzer = new Analyzer() {
            @Override
            protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
              Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
              TokenStream graph = new MockGraphTokenFilter(random(), tokenizer);
              return new TokenStreamComponents(tokenizer, new SynonymFilter(graph, map, ignoreCase));
            }
          };

          checkRandomData(random, analyzer, 1000*RANDOM_MULTIPLIER);
        }
          }
          */
        // Adds MockGraphTokenFilter after SynFilter:
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandom2GraphAfter() throws Exception
        public virtual void testRandom2GraphAfter()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numIters = atLeast(3);
            int numIters = atLeast(3);
            Random random = random();
            for (int i = 0; i < numIters; i++)
            {
              b = new SynonymMap.Builder(random.nextBoolean());
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numEntries = atLeast(10);
              int numEntries = atLeast(10);
              for (int j = 0; j < numEntries; j++)
              {
            add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
              }
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final SynonymMap map = b.build();
              SynonymMap map = b.build();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final boolean ignoreCase = random.nextBoolean();
              bool ignoreCase = random.nextBoolean();

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.Analyzer analyzer = new org.apache.lucene.analysis.Analyzer()
              Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map, ignoreCase);

              checkRandomData(random, analyzer, 100);
            }
        }
        public virtual void TestRandomStrings()
        {
            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dict);
            CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);

            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
            {
                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
                Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);
                CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER);
            }
        }
Ejemplo n.º 43
0
        /// <summary>
        /// blast some random strings through the analyzer </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
            for (int i = 0; i < 10; i++)
            {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int min = org.apache.lucene.util.TestUtil.nextInt(random(), 2, 10);
              int min = TestUtil.Next(random(), 2, 10);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int max = org.apache.lucene.util.TestUtil.nextInt(random(), min, 20);
              int max = TestUtil.Next(random(), min, 20);

              Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max);
              checkRandomData(random(), a, 100 * RANDOM_MULTIPLIER, 20);
              checkRandomData(random(), a, 10 * RANDOM_MULTIPLIER, 8192);
            }

            Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this);
            checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
            checkRandomData(random(), b, 100 * RANDOM_MULTIPLIER, 8192, false, false);
        }
Ejemplo n.º 44
0
 public TokenFilterAnonymousInnerClassHelper(AnalyzerAnonymousInnerClassHelper2 outerInstance, Tokenizer tokenizer)
     : base(tokenizer)
 {
     this.OuterInstance = outerInstance;
     first = true;
     termAtt = AddAttribute<ICharTermAttribute>();
     payloadAtt = AddAttribute<IPayloadAttribute>();
     posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
 }
        /// <summary>
        /// blast some random strings through the analyzer </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dict);
            checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);

            InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
            Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);
            checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
        }
 public virtual void TestNynorskVocabulary()
 {
     Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this);
     VocabularyAssert.AssertVocabulary(analyzer, GetDataFile("nn_light.txt"));
 }
Ejemplo n.º 47
0
        //@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testFinalOffsetSpecialCase() throws Exception
        public virtual void testFinalOffsetSpecialCase()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("t", "");
            // even though this below rule has no effect, the test passes if you remove it!!
            builder.add("tmakdbl", "c");

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final NormalizeCharMap map = builder.build();
            NormalizeCharMap map = builder.build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);

            string text = "gzw f quaxot";
            checkAnalysisConsistency(random(), analyzer, false, text);
        }
Ejemplo n.º 48
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testDoKeepOrig() throws Exception
        public virtual void testDoKeepOrig()
        {
            b = new SynonymMap.Builder(true);
            add("a b", "foo", true);

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final SynonymMap map = b.build();
            SynonymMap map = b.build();

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.Analyzer analyzer = new org.apache.lucene.analysis.Analyzer()
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);

            assertAnalyzesTo(analyzer, "a b c", new string[] {"a", "foo", "b", "c"}, new int[] {0, 0, 2, 4}, new int[] {1, 3, 3, 5}, null, new int[] {1, 0, 1, 1}, new int[] {1, 2, 1, 1}, true);
            checkAnalysisConsistency(random(), analyzer, false, "a b c");
        }
        public virtual void TestPositionIncrements()
        {
            int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE;

            CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "NUTCH" }, false);

            /* analyzer that uses whitespace + wdf */
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords);

            /* in this case, works as expected. */
            AssertAnalyzesTo(a, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 1 });

            /* only in this case, posInc of 2 ?! */
            AssertAnalyzesTo(a, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 1, 0, 1 });

            AssertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 1, 1 });

            /* analyzer that will consume tokens with large position increments */
            Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords);

            /* increment of "largegap" is preserved */
            AssertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] { "LUCENE", "largegap", "SOLR" }, new int[] { 0, 7, 16 }, new int[] { 6, 15, 20 }, new int[] { 1, 10, 1 });

            /* the "/" had a position increment of 10, where did it go?!?!! */
            AssertAnalyzesTo(a2, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 11 });

            /* in this case, the increment of 10 from the "/" is carried over */
            AssertAnalyzesTo(a2, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 11, 0, 1 });

            AssertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 11, 1 });

            Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords);

            AssertAnalyzesTo(a3, "lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 0, 0, 7 }, new int[] { 6, 11, 11 }, new int[] { 1, 0, 1 });

            /* the stopword should add a gap here */
            AssertAnalyzesTo(a3, "the lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 4, 4, 11 }, new int[] { 10, 15, 15 }, new int[] { 2, 0, 1 });
        }
 public virtual void TestEmptyTerm()
 {
     Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
     CheckOneTerm(a, "", "");
 }
 /// <summary>
 /// Test against a Nynorsk vocabulary file </summary>
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testNynorskVocabulary() throws java.io.IOException
 public virtual void testNynorskVocabulary()
 {
     Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this);
     assertVocabulary(analyzer, new System.IO.FileStream(getDataFile("nn_light.txt"), System.IO.FileMode.Open, System.IO.FileAccess.Read));
 }
Ejemplo n.º 52
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void checkRandomStrings(final String snowballLanguage) throws java.io.IOException
 //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
 public virtual void checkRandomStrings(string snowballLanguage)
 {
     Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, snowballLanguage);
     checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
 }
 public virtual void TestVersion36()
 {
     Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
     AssertAnalyzesTo(a, "this is just a t\u08E6st [email protected]", new string[] { "this", "is", "just", "a", "t", "st", "lucene", "apache.org" }); // new combining mark in 6.1
 }
Ejemplo n.º 54
0
        //JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
        //ORIGINAL LINE: @Test public void testPositionIncrements() throws Exception
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        public virtual void testPositionIncrements()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
            int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet protWords = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("NUTCH")), false);
            CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>("NUTCH"), false);

            /* analyzer that uses whitespace + wdf */
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords);

            /* in this case, works as expected. */
            assertAnalyzesTo(a, "LUCENE / SOLR", new string[] {"LUCENE", "SOLR"}, new int[] {0, 9}, new int[] {6, 13}, new int[] {1, 1});

            /* only in this case, posInc of 2 ?! */
            assertAnalyzesTo(a, "LUCENE / solR", new string[] {"LUCENE", "sol", "solR", "R"}, new int[] {0, 9, 9, 12}, new int[] {6, 12, 13, 13}, new int[] {1, 1, 0, 1});

            assertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] {"LUCENE", "NUTCH", "SOLR"}, new int[] {0, 9, 15}, new int[] {6, 14, 19}, new int[] {1, 1, 1});

            /* analyzer that will consume tokens with large position increments */
            Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords);

            /* increment of "largegap" is preserved */
            assertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] {"LUCENE", "largegap", "SOLR"}, new int[] {0, 7, 16}, new int[] {6, 15, 20}, new int[] {1, 10, 1});

            /* the "/" had a position increment of 10, where did it go?!?!! */
            assertAnalyzesTo(a2, "LUCENE / SOLR", new string[] {"LUCENE", "SOLR"}, new int[] {0, 9}, new int[] {6, 13}, new int[] {1, 11});

            /* in this case, the increment of 10 from the "/" is carried over */
            assertAnalyzesTo(a2, "LUCENE / solR", new string[] {"LUCENE", "sol", "solR", "R"}, new int[] {0, 9, 9, 12}, new int[] {6, 12, 13, 13}, new int[] {1, 11, 0, 1});

            assertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] {"LUCENE", "NUTCH", "SOLR"}, new int[] {0, 9, 15}, new int[] {6, 14, 19}, new int[] {1, 11, 1});

            Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords);

            assertAnalyzesTo(a3, "lucene.solr", new string[] {"lucene", "lucenesolr", "solr"}, new int[] {0, 0, 7}, new int[] {6, 11, 11}, new int[] {1, 0, 1});

            /* the stopword should add a gap here */
            assertAnalyzesTo(a3, "the lucene.solr", new string[] {"lucene", "lucenesolr", "solr"}, new int[] {4, 4, 11}, new int[] {10, 15, 15}, new int[] {2, 0, 1});
        }
Ejemplo n.º 55
0
 public virtual void TestRandomRegexps()
 {
     int iters = AtLeast(30);
     for (int i = 0; i < iters; i++)
     {
         CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.RandomAutomaton(Random()));
         bool lowercase = Random().NextBoolean();
         int limit = TestUtil.NextInt(Random(), 0, 500);
         Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dfa, lowercase, limit);
         CheckRandomData(Random(), a, 100);
         a.Dispose();
     }
 }
 public virtual void TestLetterAsciiHuge()
 {
     Random random = Random();
     int maxLength = 8192; // CharTokenizer.IO_BUFFER_SIZE*2
     MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
     left.MaxTokenLength = 255; // match CharTokenizer's max token length
     Analyzer right = new AnalyzerAnonymousInnerClassHelper2(this);
     int numIterations = AtLeast(50);
     for (int i = 0; i < numIterations; i++)
     {
         string s = TestUtil.RandomSimpleString(random, maxLength);
         assertEquals(s, left.TokenStream("foo", newStringReader(s)), right.TokenStream("foo", newStringReader(s)));
     }
 }
Ejemplo n.º 57
0
        public virtual void TestVersion36()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);

            AssertAnalyzesTo(a, "this is just a t\u08E6st [email protected]", new string[] { "this", "is", "just", "a", "t", "st", "lucene", "apache.org" }); // new combining mark in 6.1
        }
Ejemplo n.º 58
0
        public virtual void TestCommitOnCloseDiskUsage()
        {
            // MemoryCodec, since it uses FST, is not necessarily
            // "additive", ie if you add up N small FSTs, then merge
            // them, the merged result can easily be larger than the
            // sum because the merged FST may use array encoding for
            // some arcs (which uses more space):

            string idFormat = TestUtil.GetPostingsFormat("id");
            string contentFormat = TestUtil.GetPostingsFormat("content");
            AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory") || contentFormat.Equals("Memory"));
            MockDirectoryWrapper dir = NewMockDirectory();
            Analyzer analyzer;
            if (Random().NextBoolean())
            {
                // no payloads
                analyzer = new AnalyzerAnonymousInnerClassHelper(this);
            }
            else
            {
                // fixed length payloads
                int length = Random().Next(200);
                analyzer = new AnalyzerAnonymousInnerClassHelper2(this, length);
            }

            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10)));
            for (int j = 0; j < 30; j++)
            {
                TestIndexWriter.AddDocWithIndex(writer, j);
            }
            writer.Dispose();
            dir.ResetMaxUsedSizeInBytes();

            dir.TrackDiskUsage = true;
            long startDiskUsage = dir.MaxUsedSizeInBytes;
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(10).SetMergeScheduler(new SerialMergeScheduler()).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10)));
            for (int j = 0; j < 1470; j++)
            {
                TestIndexWriter.AddDocWithIndex(writer, j);
            }
            long midDiskUsage = dir.MaxUsedSizeInBytes;
            dir.ResetMaxUsedSizeInBytes();
            writer.ForceMerge(1);
            writer.Dispose();

            DirectoryReader.Open(dir).Dispose();

            long endDiskUsage = dir.MaxUsedSizeInBytes;

            // Ending index is 50X as large as starting index; due
            // to 3X disk usage normally we allow 150X max
            // transient usage.  If something is wrong w/ deleter
            // and it doesn't delete intermediate segments then it
            // will exceed this 150X:
            // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage);
            Assert.IsTrue(midDiskUsage < 150 * startDiskUsage, "writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage * 150));
            Assert.IsTrue(endDiskUsage < 150 * startDiskUsage, "writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage * 150));
            dir.Dispose();
        }
Ejemplo n.º 59
0
        public virtual void CheckRandomStrings(string snowballLanguage)
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, snowballLanguage);

            CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);
        }
Ejemplo n.º 60
0
        public virtual void TestTokenReuse()
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this);

            IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));

            Document doc = new Document();
            doc.Add(NewTextField("f1", "a 5 a a", Field.Store.YES));

            writer.AddDocument(doc);
            writer.Commit();
            SegmentCommitInfo info = writer.NewestSegment();
            writer.Dispose();
            SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));

            DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, reader.LiveDocs, "f1", new BytesRef("a"));
            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            int freq = termPositions.Freq();
            Assert.AreEqual(3, freq);
            Assert.AreEqual(0, termPositions.NextPosition());
            Assert.IsNotNull(termPositions.Payload);
            Assert.AreEqual(6, termPositions.NextPosition());
            Assert.IsNull(termPositions.Payload);
            Assert.AreEqual(7, termPositions.NextPosition());
            Assert.IsNull(termPositions.Payload);
            reader.Dispose();
        }