public static void BeforeClass()
        {
            Directory = NewDirectory();
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, analyzer);

            Documents.Document doc = new Documents.Document();
            doc.Add(NewTextField("field", "one two three four five", Field.Store.YES));
            doc.Add(NewTextField("repeated", "this is a repeated field - first part", Field.Store.YES));
            IndexableField repeatedField = NewTextField("repeated", "second part of a repeated field", Field.Store.YES);
            doc.Add(repeatedField);
            doc.Add(NewTextField("palindrome", "one two three two one", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES));
            writer.AddDocument(doc);

            Reader = writer.Reader;
            writer.Dispose();

            Searcher = NewSearcher(Reader);
        }
        public void Inform(IResourceLoader loader)
        {
            TokenizerFactory factory = tokenizerFactory == null ? null : LoadTokenizerFactory(loader, tokenizerFactory);

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, factory);

            try
            {
                string formatClass = format;
                if (format == null || format.Equals("solr"))
                {
                    formatClass = typeof(SolrSynonymParser).AssemblyQualifiedName;
                }
                else if (format.Equals("wordnet"))
                {
                    formatClass = typeof(WordnetSynonymParser).AssemblyQualifiedName;
                }
                // TODO: expose dedup as a parameter?
                map = LoadSynonyms(loader, formatClass, true, analyzer);
            }
            catch (Exception e)
            {
                throw new IOException("Error parsing synonyms file:", e);
            }
        }
 public virtual void TestCrossPlaneNormalization()
 {
     var analyzer = new AnalyzerAnonymousInnerClassHelper();
     var num = 1000 * RANDOM_MULTIPLIER;
     for (var i = 0; i < num; i++)
     {
         var s = TestUtil.RandomUnicodeString(Random());
         var ts = analyzer.TokenStream("foo", s);
         try
         {
             ts.Reset();
             var offsetAtt = ts.AddAttribute<IOffsetAttribute>();
             while (ts.IncrementToken())
             {
                 var highlightedText = s.Substring(offsetAtt.StartOffset(), offsetAtt.EndOffset() - offsetAtt.StartOffset());
                 for (int j = 0, cp = 0; j < highlightedText.Length; j += Character.CharCount(cp))
                 {
                     cp = char.ConvertToUtf32(highlightedText, j);
                     assertTrue("non-letter:" + cp.ToString("x"), Character.IsLetter(cp));
                 }
             }
             ts.End();
         }
         finally
         {
             IOUtils.CloseWhileHandlingException(ts);
         }
     }
     // just for fun
     CheckRandomData(Random(), analyzer, num);
 }
        /// <summary>
        /// blast some random strings through the analyzer </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
            checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);

            Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this);
            checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
        }
Exemple #5
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
 public virtual void testEmptyTerm()
 {
     foreach (String lang in SNOWBALL_LANGS)
     {
       Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
       checkOneTerm(a, "", "");
     }
 }
        public virtual void TestCharFilters()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
            AssertAnalyzesTo(a, "ab", new string[] { "aab" }, new int[] { 0 }, new int[] { 2 });

            // now wrap in PFAW
            PerFieldAnalyzerWrapper p = new PerFieldAnalyzerWrapper(a, new Dictionary<string, Analyzer>());

            AssertAnalyzesTo(p, "ab", new string[] { "aab" }, new int[] { 0 }, new int[] { 2 });
        }
        public virtual void Test()
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);

            AssertAnalyzesTo(analyzer, "aab", new string[] { "aab" }, new int[] { 0 }, new int[] { 3 });

            AssertAnalyzesTo(analyzer, "aabaa", new string[] { "aabaa" }, new int[] { 0 }, new int[] { 5 });

            AssertAnalyzesTo(analyzer, "aabcdefgaa", new string[] { "aabcdefgaa" }, new int[] { 0 }, new int[] { 10 });
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testCharFilters() throws Exception
        public virtual void testCharFilters()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
            assertAnalyzesTo(a, "ab", new string[] {"aab"}, new int[] {0}, new int[] {2});

            // now wrap in PFAW
            PerFieldAnalyzerWrapper p = new PerFieldAnalyzerWrapper(a, System.Linq.Enumerable.Empty<string, Analyzer>());

            assertAnalyzesTo(p, "ab", new string[] {"aab"}, new int[] {0}, new int[] {2});
        }
        public virtual void TestRandomStrings()
        {
            ISet<string> words = new HashSet<string>();
            words.Add("a");
            words.Add("b");

            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, words);

            CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);
        }
 public virtual void TestLetterAscii()
 {
     Random random = Random();
     Analyzer left = new MockAnalyzer(random, jvmLetter, false);
     Analyzer right = new AnalyzerAnonymousInnerClassHelper(this);
     for (int i = 0; i < 1000; i++)
     {
         string s = TestUtil.RandomSimpleString(random);
         assertEquals(s, left.TokenStream("foo", newStringReader(s)), right.TokenStream("foo", newStringReader(s)));
     }
 }
        /// <summary>
        /// For the supplied language, run the stemmer against all strings in voc.txt
        /// The output should be the same as the string in output.txt
        /// </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: private void assertCorrectOutput(final String snowballLanguage, String dataDirectory) throws java.io.IOException
        //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
        private void assertCorrectOutput(string snowballLanguage, string dataDirectory)
        {
            if (VERBOSE)
            {
            Console.WriteLine("checking snowball language: " + snowballLanguage);
            }

            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, snowballLanguage);

            assertVocabulary(a, getDataFile("TestSnowballVocabData.zip"), dataDirectory + "/voc.txt", dataDirectory + "/output.txt");
        }
        /// <summary>
        /// blast some random strings through the analyzer </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final java.util.Set<String> words = new java.util.HashSet<>();
            ISet<string> words = new HashSet<string>();
            words.Add("a");
            words.Add("b");

            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, words);

            checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
        }
        public virtual void Test()
        {
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);
            cas.add("jjp");
            cas.add("wlmwoknt");
            cas.add("tcgyreo");

            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.Add("mtqlpi", "");
            builder.Add("mwoknt", "jjp");
            builder.Add("tcgyreo", "zpfpajyws");
            NormalizeCharMap map = builder.Build();

            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map);
            CheckAnalysisConsistency(Random(), a, false, "wmgddzunizdomqyj");
        }
        public virtual void TestMaxPosition2()
        {
            foreach (bool consumeAll in new bool[] { true, false })
            {
                Analyzer a = new AnalyzerAnonymousInnerClassHelper(consumeAll);

                // don't use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)!
                AssertTokenStreamContents(a.TokenStream("dummy", "1  2     3  4  5"), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 16 : (int?)null);
                AssertTokenStreamContents(a.TokenStream("dummy", new StringReader("1 2 3 4 5")), new string[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? 9 : (int?)null);

                // less than the limit, ensure we behave correctly
                AssertTokenStreamContents(a.TokenStream("dummy", "1  "), new string[] { "1" }, new int[] { 0 }, new int[] { 1 }, consumeAll ? 3 : (int?)null);

                // equal to limit
                AssertTokenStreamContents(a.TokenStream("dummy", "1  2  "), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 6 : (int?)null);
            }
        }
        public virtual void TestSynonyms()
        {
            WordnetSynonymParser parser = new WordnetSynonymParser(true, true, new MockAnalyzer(Random()));
            parser.Parse(new StringReader(synonymsFile));
            SynonymMap map = parser.Build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map);

            /* all expansions */
            AssertAnalyzesTo(analyzer, "Lost in the woods", new string[] { "Lost", "in", "the", "woods", "wood", "forest" }, new int[] { 0, 5, 8, 12, 12, 12 }, new int[] { 4, 7, 11, 17, 17, 17 }, new int[] { 1, 1, 1, 1, 0, 0 });

            /* single quote */
            AssertAnalyzesTo(analyzer, "king", new string[] { "king", "baron" });

            /* multi words */
            AssertAnalyzesTo(analyzer, "king's evil", new string[] { "king's", "king's", "evil", "meany" });
        }
        /// <summary>
        /// test that offsets are correct when mappingcharfilter is previously applied </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testChangedOffsets() throws java.io.IOException
        public virtual void testChangedOffsets()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("a", "一二");
            builder.add("b", "二三");
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap norm = builder.build();
            NormalizeCharMap norm = builder.build();
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, norm);

            assertAnalyzesTo(analyzer, "ab", new string[] {"一二", "二二", "二三"}, new int[] {0, 0, 1}, new int[] {1, 1, 2});

            // note: offsets are strange since this is how the charfilter maps them...
            // before bigramming, the 4 tokens look like:
            //   { 0, 0, 1, 1 },
            //   { 0, 1, 1, 2 }
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testSynonyms() throws Exception
        public virtual void testSynonyms()
        {
            WordnetSynonymParser parser = new WordnetSynonymParser(true, true, new MockAnalyzer(random()));
            parser.parse(new StringReader(synonymsFile));
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final SynonymMap map = parser.build();
            SynonymMap map = parser.build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map);

            /* all expansions */
            assertAnalyzesTo(analyzer, "Lost in the woods", new string[] {"Lost", "in", "the", "woods", "wood", "forest"}, new int[] {0, 5, 8, 12, 12, 12}, new int[] {4, 7, 11, 17, 17, 17}, new int[] {1, 1, 1, 1, 0, 0});

            /* single quote */
            assertAnalyzesTo(analyzer, "king", new string[] {"king", "baron"});

            /* multi words */
            assertAnalyzesTo(analyzer, "king's evil", new string[] {"king's", "king's", "evil", "meany"});
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void test() throws Exception
        public virtual void test()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet cas = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, 3, false);
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);
            cas.add("jjp");
            cas.add("wlmwoknt");
            cas.add("tcgyreo");

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("mtqlpi", "");
            builder.add("mwoknt", "jjp");
            builder.add("tcgyreo", "zpfpajyws");
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap map = builder.build();
            NormalizeCharMap map = builder.build();

            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map);
            checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
        }
        //JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
        //ORIGINAL LINE: @Test public void testPositionIncrements() throws Exception
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        public virtual void testPositionIncrements()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
            int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet protWords = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("NUTCH")), false);
            CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>("NUTCH"), false);

            /* analyzer that uses whitespace + wdf */
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords);

            /* in this case, works as expected. */
            assertAnalyzesTo(a, "LUCENE / SOLR", new string[] {"LUCENE", "SOLR"}, new int[] {0, 9}, new int[] {6, 13}, new int[] {1, 1});

            /* only in this case, posInc of 2 ?! */
            assertAnalyzesTo(a, "LUCENE / solR", new string[] {"LUCENE", "sol", "solR", "R"}, new int[] {0, 9, 9, 12}, new int[] {6, 12, 13, 13}, new int[] {1, 1, 0, 1});

            assertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] {"LUCENE", "NUTCH", "SOLR"}, new int[] {0, 9, 15}, new int[] {6, 14, 19}, new int[] {1, 1, 1});

            /* analyzer that will consume tokens with large position increments */
            Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords);

            /* increment of "largegap" is preserved */
            assertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] {"LUCENE", "largegap", "SOLR"}, new int[] {0, 7, 16}, new int[] {6, 15, 20}, new int[] {1, 10, 1});

            /* the "/" had a position increment of 10, where did it go?!?!! */
            assertAnalyzesTo(a2, "LUCENE / SOLR", new string[] {"LUCENE", "SOLR"}, new int[] {0, 9}, new int[] {6, 13}, new int[] {1, 11});

            /* in this case, the increment of 10 from the "/" is carried over */
            assertAnalyzesTo(a2, "LUCENE / solR", new string[] {"LUCENE", "sol", "solR", "R"}, new int[] {0, 9, 9, 12}, new int[] {6, 12, 13, 13}, new int[] {1, 11, 0, 1});

            assertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] {"LUCENE", "NUTCH", "SOLR"}, new int[] {0, 9, 15}, new int[] {6, 14, 19}, new int[] {1, 11, 1});

            Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords);

            assertAnalyzesTo(a3, "lucene.solr", new string[] {"lucene", "lucenesolr", "solr"}, new int[] {0, 0, 7}, new int[] {6, 11, 11}, new int[] {1, 0, 1});

            /* the stopword should add a gap here */
            assertAnalyzesTo(a3, "the lucene.solr", new string[] {"lucene", "lucenesolr", "solr"}, new int[] {4, 4, 11}, new int[] {10, 15, 15}, new int[] {2, 0, 1});
        }
        /// <summary>
        /// blast some random strings through the analyzer </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numIters = atLeast(10);
            int numIters = atLeast(10);
            for (int i = 0; i < numIters; i++)
            {
              SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numEntries = atLeast(10);
              int numEntries = atLeast(10);
              for (int j = 0; j < numEntries; j++)
              {
            add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
              }
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.synonym.SynonymMap map = b.build();
              SynonymMap map = b.build();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final boolean ignoreCase = random().nextBoolean();
              bool ignoreCase = random().nextBoolean();

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.Analyzer analyzer = new org.apache.lucene.analysis.Analyzer()
              Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map, ignoreCase);

              checkRandomData(random(), analyzer, 200);
            }
        }
        public virtual void TestRandomStrings()
        {
            int numIters = AtLeast(10);
            for (int i = 0; i < numIters; i++)
            {
                SynonymMap.Builder b = new SynonymMap.Builder(Random().nextBoolean());
                int numEntries = AtLeast(10);
                for (int j = 0; j < numEntries; j++)
                {
                    Add(b, RandomNonEmptyString(), RandomNonEmptyString(), Random().nextBoolean());
                }
                SynonymMap map = b.Build();
                bool ignoreCase = Random().nextBoolean();

                Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map, ignoreCase);

                CheckRandomData(Random(), analyzer, 200);
            }
        }
Exemple #22
0
        public virtual void TestFirstPosInc()
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);

            AssertAnalyzesTo(analyzer, "the quick brown fox", new string[] { "hte", "quick", "brown", "fox" }, new int[] { 1, 1, 1, 1 });
        }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
 public virtual void testEmptyTerm()
 {
     Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
     checkOneTerm(a, "", "");
 }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandom() throws Exception
        public virtual void testRandom()
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);

            int numRounds = RANDOM_MULTIPLIER * 10000;
            checkRandomData(random(), analyzer, numRounds);
        }
Exemple #25
0
        public virtual void TestRandomStrings()
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);

            CheckRandomData(Random(), analyzer, 1000 * RANDOM_MULTIPLIER);
        }
Exemple #26
0
        public virtual void Test()
        {
            Directory         dir      = NewDirectory();
            Analyzer          analyzer = new AnalyzerAnonymousInnerClassHelper(this, Analyzer.PER_FIELD_REUSE_STRATEGY);
            IndexWriterConfig iwc      = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat()));
            // TODO we could actually add more fields implemented with different PFs
            // or, just put this test into the usual rotation?
            RandomIndexWriter iw           = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwc.Clone());
            Document          doc          = new Document();
            FieldType         docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED);

            // turn this on for a cross-check
            docsOnlyType.StoreTermVectors = true;
            docsOnlyType.IndexOptions     = FieldInfo.IndexOptions.DOCS_ONLY;

            FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED);

            // turn this on for a cross-check
            docsAndFreqsType.StoreTermVectors = true;
            docsAndFreqsType.IndexOptions     = FieldInfo.IndexOptions.DOCS_AND_FREQS;

            FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED);

            // turn these on for a cross-check
            positionsType.StoreTermVectors         = true;
            positionsType.StoreTermVectorPositions = true;
            positionsType.StoreTermVectorOffsets   = true;
            positionsType.StoreTermVectorPayloads  = true;
            FieldType offsetsType = new FieldType(positionsType);

            offsetsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            Field field1 = new Field("field1docs", "", docsOnlyType);
            Field field2 = new Field("field2freqs", "", docsAndFreqsType);
            Field field3 = new Field("field3positions", "", positionsType);
            Field field4 = new Field("field4offsets", "", offsetsType);
            Field field5 = new Field("field5payloadsFixed", "", positionsType);
            Field field6 = new Field("field6payloadsVariable", "", positionsType);
            Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType);
            Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType);

            doc.Add(field1);
            doc.Add(field2);
            doc.Add(field3);
            doc.Add(field4);
            doc.Add(field5);
            doc.Add(field6);
            doc.Add(field7);
            doc.Add(field8);
            for (int i = 0; i < MAXDOC; i++)
            {
                string stringValue = Convert.ToString(i) + " verycommon " + English.IntToEnglish(i).Replace('-', ' ') + " " + TestUtil.RandomSimpleString(Random());
                field1.StringValue = stringValue;
                field2.StringValue = stringValue;
                field3.StringValue = stringValue;
                field4.StringValue = stringValue;
                field5.StringValue = stringValue;
                field6.StringValue = stringValue;
                field7.StringValue = stringValue;
                field8.StringValue = stringValue;
                iw.AddDocument(doc);
            }
            iw.Dispose();
            Verify(dir);
            TestUtil.CheckIndex(dir); // for some extra coverage, checkIndex before we forceMerge
            iwc.SetOpenMode(OpenMode_e.APPEND);
            IndexWriter iw2 = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone());

            iw2.ForceMerge(1);
            iw2.Dispose();
            Verify(dir);
            dir.Dispose();
        }
        public virtual void TestSetPosition()
        {
            Analyzer          analyzer = new AnalyzerAnonymousInnerClassHelper(this);
            Directory         store    = NewDirectory();
            RandomIndexWriter writer   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, store, analyzer);
            Document d = new Document();

            d.Add(NewTextField("field", "bogus", Field.Store.YES));
            writer.AddDocument(d);
            IndexReader reader = writer.GetReader();

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1"));

            pos.NextDoc();
            // first token should be at position 0
            Assert.AreEqual(0, pos.NextPosition());

            pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2"));
            pos.NextDoc();
            // second token should be at position 2
            Assert.AreEqual(2, pos.NextPosition());

            PhraseQuery q;

            ScoreDoc[] hits;

            q = new PhraseQuery();
            q.Add(new Term("field", "1"));
            q.Add(new Term("field", "2"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // same as previous, just specify positions explicitely.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 1);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // specifying correct positions should find the phrase.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 2);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "3"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // phrase query would find it when correct positions are specified.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "4"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // phrase query should fail for non existing searched term
            // even if there exist another searched terms in the same searched position.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "9"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // multi-phrase query should succed for non existing searched term
            // because there exist another searched terms in the same searched position.
            MultiPhraseQuery mq = new MultiPhraseQuery();

            mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0);
            hits = searcher.Search(mq, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "4"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            reader.Dispose();
            store.Dispose();
        }
        public virtual void TestEmptyTerm()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);

            CheckOneTerm(a, "", "");
        }
        public virtual void TestRandomStrings()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);

            CheckRandomData(Random, a, 200 * RANDOM_MULTIPLIER, 8192);
        }
        internal virtual void AddDocs(Random random, Directory dir, int ndocs, string field, string val, int maxTF, float percentDocs)
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(random, val, maxTF, percentDocs);

            Document doc = new Document();

            doc.Add(NewStringField(field, val, Field.Store.NO));
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(100).SetMergePolicy(NewLogMergePolicy(100)));

            for (int i = 0; i < ndocs; i++)
            {
                writer.AddDocument(doc);
            }

            writer.ForceMerge(1);
            writer.Dispose();
        }
Exemple #31
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testSingleChar2() throws Exception
        public virtual void testSingleChar2()
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);

            assertAnalyzesTo(analyzer, "一", new string[] { "一" }, new int[] { 0 }, new int[] { 1 }, new string[] { "<SINGLE>" }, new int[] { 1 });
        }
        public virtual void Test()
        {
            Directory dir = NewDirectory();
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, Analyzer.PER_FIELD_REUSE_STRATEGY);
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat()));
            // TODO we could actually add more fields implemented with different PFs
            // or, just put this test into the usual rotation?
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwc.Clone());
            Document doc = new Document();
            FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED);
            // turn this on for a cross-check
            docsOnlyType.StoreTermVectors = true;
            docsOnlyType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;

            FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED);
            // turn this on for a cross-check
            docsAndFreqsType.StoreTermVectors = true;
            docsAndFreqsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;

            FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED);
            // turn these on for a cross-check
            positionsType.StoreTermVectors = true;
            positionsType.StoreTermVectorPositions = true;
            positionsType.StoreTermVectorOffsets = true;
            positionsType.StoreTermVectorPayloads = true;
            FieldType offsetsType = new FieldType(positionsType);
            offsetsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            Field field1 = new Field("field1docs", "", docsOnlyType);
            Field field2 = new Field("field2freqs", "", docsAndFreqsType);
            Field field3 = new Field("field3positions", "", positionsType);
            Field field4 = new Field("field4offsets", "", offsetsType);
            Field field5 = new Field("field5payloadsFixed", "", positionsType);
            Field field6 = new Field("field6payloadsVariable", "", positionsType);
            Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType);
            Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType);
            doc.Add(field1);
            doc.Add(field2);
            doc.Add(field3);
            doc.Add(field4);
            doc.Add(field5);
            doc.Add(field6);
            doc.Add(field7);
            doc.Add(field8);
            for (int i = 0; i < MAXDOC; i++)
            {
                string stringValue = Convert.ToString(i) + " verycommon " + English.IntToEnglish(i).Replace('-', ' ') + " " + TestUtil.RandomSimpleString(Random());
                field1.StringValue = stringValue;
                field2.StringValue = stringValue;
                field3.StringValue = stringValue;
                field4.StringValue = stringValue;
                field5.StringValue = stringValue;
                field6.StringValue = stringValue;
                field7.StringValue = stringValue;
                field8.StringValue = stringValue;
                iw.AddDocument(doc);
            }
            iw.Dispose();
            Verify(dir);
            TestUtil.CheckIndex(dir); // for some extra coverage, checkIndex before we forceMerge
            iwc.SetOpenMode(OpenMode_e.APPEND);
            IndexWriter iw2 = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone());
            iw2.ForceMerge(1);
            iw2.Dispose();
            Verify(dir);
            dir.Dispose();
        }
Exemple #33
0
 public LetterTokenizerAnonymousInnerClassHelper(AnalyzerAnonymousInnerClassHelper outerInstance, UnknownType TEST_VERSION_CURRENT, Reader reader) : base(TEST_VERSION_CURRENT, reader)
 {
     this.outerInstance = outerInstance;
 }
 /// <summary>
 /// blast some random strings through the analyzer </summary>
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testRandomStrings() throws Exception
 public virtual void testRandomStrings()
 {
     for (int i = 0; i < 10; i++)
     {
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final int min = org.apache.lucene.util.TestUtil.nextInt(random(), 2, 10);
       int min = TestUtil.Next(random(), 2, 10);
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final int max = org.apache.lucene.util.TestUtil.nextInt(random(), min, 20);
       int max = TestUtil.Next(random(), min, 20);
       Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max);
       checkRandomData(random(), a, 200 * RANDOM_MULTIPLIER, 20);
       checkRandomData(random(), a, 10 * RANDOM_MULTIPLIER, 1027);
     }
 }
Exemple #35
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRandomString() throws Exception
        public virtual void testRandomString()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);

            checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
        }
 // LUCENE-5269
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testUnicodeShinglesAndNgrams() throws Exception
 public virtual void testUnicodeShinglesAndNgrams()
 {
     Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
     checkRandomData(random(), analyzer, 2000);
 }
Exemple #37
0
        // LUCENE-5269
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testUnicodeShinglesAndNgrams() throws Exception
        public virtual void testUnicodeShinglesAndNgrams()
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);

            checkRandomData(random(), analyzer, 2000);
        }
        public virtual void TestAltFillerToken()
        {
            Analyzer @delegate = new AnalyzerAnonymousInnerClassHelper(this);

            ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(@delegate, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, false, "--");
            AssertAnalyzesTo(analyzer, "please divide into shingles", new string[] { "please", "please divide", "divide", "divide --", "-- shingles", "shingles" }, new int[] { 0, 0, 7, 7, 19, 19 }, new int[] { 6, 13, 13, 19, 27, 27 }, new int[] { 1, 0, 1, 0, 1, 1 });

            analyzer = new ShingleAnalyzerWrapper(@delegate, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, false, false, null);
            AssertAnalyzesTo(analyzer, "please divide into shingles", new string[] { "please divide", "divide ", " shingles" }, new int[] { 0, 7, 19 }, new int[] { 13, 19, 27 }, new int[] { 1, 1, 1 });

            analyzer = new ShingleAnalyzerWrapper(@delegate, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, false, false, "");
            AssertAnalyzesTo(analyzer, "please divide into shingles", new string[] { "please divide", "divide ", " shingles" }, new int[] { 0, 7, 19 }, new int[] { 13, 19, 27 }, new int[] { 1, 1, 1 });
        }
        public virtual void TestIndexingThenDeleting()
        {
            // TODO: move this test to its own class and just @SuppressCodecs?
            // TODO: is it enough to just use newFSDirectory?
            string fieldFormat = TestUtil.GetPostingsFormat("field");
            AssumeFalse("this test cannot run with Memory codec", fieldFormat.Equals("Memory"));
            AssumeFalse("this test cannot run with SimpleText codec", fieldFormat.Equals("SimpleText"));
            AssumeFalse("this test cannot run with Direct codec", fieldFormat.Equals("Direct"));
            Random r = Random();
            Directory dir = NewDirectory();
            // note this test explicitly disables payloads
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
            IndexWriter w = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetRAMBufferSizeMB(1.0).SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH));
            Document doc = new Document();
            doc.Add(NewTextField("field", "go 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20", Field.Store.NO));
            int num = AtLeast(3);
            for (int iter = 0; iter < num; iter++)
            {
                int count = 0;

                bool doIndexing = r.NextBoolean();
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter doIndexing=" + doIndexing);
                }
                if (doIndexing)
                {
                    // Add docs until a flush is triggered
                    int startFlushCount = w.FlushCount;
                    while (w.FlushCount == startFlushCount)
                    {
                        w.AddDocument(doc);
                        count++;
                    }
                }
                else
                {
                    // Delete docs until a flush is triggered
                    int startFlushCount = w.FlushCount;
                    while (w.FlushCount == startFlushCount)
                    {
                        w.DeleteDocuments(new Term("foo", "" + count));
                        count++;
                    }
                }
                Assert.IsTrue(count > 2500, "flush happened too quickly during " + (doIndexing ? "indexing" : "deleting") + " count=" + count);
            }
            w.Dispose();
            dir.Dispose();
        }
        public virtual void TestInvalidOffsets()
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);

            AssertAnalyzesTo(analyzer, "mosfellsbær", new string[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 }, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
        }
Exemple #41
0
        public virtual void TestRandomStrings()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);

            CheckRandomData(Random, a, 1000 * RandomMultiplier);
        }
        public virtual void TestRandomString()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);

            CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);
        }
        public virtual void TestCommitOnCloseDiskUsage()
        {
            // MemoryCodec, since it uses FST, is not necessarily
            // "additive", ie if you add up N small FSTs, then merge
            // them, the merged result can easily be larger than the
            // sum because the merged FST may use array encoding for
            // some arcs (which uses more space):

            string idFormat      = TestUtil.GetPostingsFormat("id");
            string contentFormat = TestUtil.GetPostingsFormat("content");

            AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory", StringComparison.Ordinal) || contentFormat.Equals("Memory", StringComparison.Ordinal));
            MockDirectoryWrapper dir = NewMockDirectory();
            Analyzer             analyzer;

            if (Random.NextBoolean())
            {
                // no payloads
                analyzer = new AnalyzerAnonymousInnerClassHelper(this);
            }
            else
            {
                // fixed length payloads
                int length = Random.Next(200);
                analyzer = new AnalyzerAnonymousInnerClassHelper2(this, length);
            }

            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10)));

            for (int j = 0; j < 30; j++)
            {
                AddDocWithIndex(writer, j);
            }
            writer.Dispose();
            dir.ResetMaxUsedSizeInBytes();

            dir.TrackDiskUsage = true;
            long startDiskUsage = dir.MaxUsedSizeInBytes;

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.APPEND).SetMaxBufferedDocs(10).SetMergeScheduler(new SerialMergeScheduler()).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10)));
            for (int j = 0; j < 1470; j++)
            {
                AddDocWithIndex(writer, j);
            }
            long midDiskUsage = dir.MaxUsedSizeInBytes;

            dir.ResetMaxUsedSizeInBytes();
            writer.ForceMerge(1);
            writer.Dispose();

            DirectoryReader.Open(dir).Dispose();

            long endDiskUsage = dir.MaxUsedSizeInBytes;

            // Ending index is 50X as large as starting index; due
            // to 3X disk usage normally we allow 150X max
            // transient usage.  If something is wrong w/ deleter
            // and it doesn't delete intermediate segments then it
            // will exceed this 150X:
            // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage);
            Assert.IsTrue(midDiskUsage < 150 * startDiskUsage, "writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage * 150));
            Assert.IsTrue(endDiskUsage < 150 * startDiskUsage, "writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage * 150));
            dir.Dispose();
        }