コード例 #1
0
 public virtual void TestDefaultArticles()
 {
     TextReader reader = new StringReader("l'avion");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = TokenFilterFactory("Elision").Create(stream);
     AssertTokenStreamContents(stream, new string[] { "avion" });
 }
コード例 #2
0
 /// <summary>
 /// if the synonyms are completely empty, test that we still analyze correctly </summary>
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testEmptySynonyms() throws Exception
 public virtual void testEmptySynonyms()
 {
     Reader reader = new StringReader("GB");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("Synonym", TEST_VERSION_CURRENT, new StringMockResourceLoader(""), "synonyms", "synonyms.txt").create(stream); // empty file!
     assertTokenStreamContents(stream, new string[] {"GB"});
 }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testStemming() throws Exception
 public virtual void testStemming()
 {
     Reader reader = new StringReader("räksmörgås");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("ScandinavianNormalization").create(stream);
     assertTokenStreamContents(stream, new string[] {"ræksmørgås"});
 }
コード例 #4
0
 public virtual void TestCaseInsensitive()
 {
     TextReader reader = new StringReader("L'avion");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = TokenFilterFactory("Elision", "articles", "frenchArticles.txt", "ignoreCase", "true").Create(stream);
     AssertTokenStreamContents(stream, new string[] { "avion" });
 }
 public virtual void TestStemming()
 {
     TextReader reader = new StringReader("questões");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = TokenFilterFactory("PortugueseMinimalStem").Create(stream);
     AssertTokenStreamContents(stream, new string[] { "questão" });
 }
コード例 #6
0
 /// <summary>
 /// Ensure the filter actually lowercases text.
 /// </summary>
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testCasing() throws Exception
 public virtual void testCasing()
 {
     Reader reader = new StringReader("AĞACI");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("TurkishLowerCase").create(stream);
     assertTokenStreamContents(stream, new string[] {"ağacı"});
 }
コード例 #7
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testStemming() throws Exception
 public virtual void testStemming()
 {
     Reader reader = new StringReader("cariñosa");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("GalicianStem").create(stream);
     assertTokenStreamContents(stream, new string[] {"cariñ"});
 }
コード例 #8
0
 public virtual void TestPositionIncrements()
 {
     Reader reader = new StringReader("foo foobar super-duper-trooper");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = TokenFilterFactory("Length", LengthFilterFactory.MIN_KEY, "4", LengthFilterFactory.MAX_KEY, "10").Create(stream);
     AssertTokenStreamContents(stream, new string[] { "foobar" }, new int[] { 2 });
 }
コード例 #9
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testStemming() throws Exception
 public virtual void testStemming()
 {
     Reader reader = new StringReader("chevaux");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("FrenchMinimalStem").create(stream);
     assertTokenStreamContents(stream, new string[] {"cheval"});
 }
コード例 #10
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testOffsets() throws Exception
 public virtual void testOffsets()
 {
     string input = "abc- def geh 1234- 5678-";
     TokenStream ts = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
     ts = new HyphenatedWordsFilter(ts);
     assertTokenStreamContents(ts, new string[] {"abcdef", "geh", "12345678-"}, new int[] {0, 9, 13}, new int[] {8, 12, 24});
 }
コード例 #11
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testTrimming() throws Exception
 public virtual void testTrimming()
 {
     Reader reader = new StringReader("trim me    ");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
     stream = tokenFilterFactory("Trim").create(stream);
     assertTokenStreamContents(stream, new string[] {"trim me"});
 }
コード例 #12
0
 public virtual void TestStemming()
 {
     TextReader reader = new StringReader("räksmörgås");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = TokenFilterFactory("ScandinavianFolding").Create(stream);
     AssertTokenStreamContents(stream, new string[] { "raksmorgas" });
 }
コード例 #13
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testConsumeAllTokens() throws Exception
 public virtual void testConsumeAllTokens()
 {
     Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("LimitTokenPosition", "maxTokenPosition", "3", "consumeAllTokens", "true").create(stream);
     assertTokenStreamContents(stream, new string[] {"A1", "B2", "C3"});
 }
コード例 #14
0
 /// <summary>
 /// Ensure the filter actually stems and normalizes text.
 /// </summary>
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testStemming() throws Exception
 public virtual void testStemming()
 {
     Reader reader = new StringReader("Brasília");
     Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     TokenStream stream = tokenFilterFactory("BrazilianStem").create(tokenizer);
     assertTokenStreamContents(stream, new string[] {"brasil"});
 }
コード例 #15
0
 /// <summary>
 /// Ensure the filter actually lowercases (and a bit more) greek text.
 /// </summary>
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testNormalization() throws Exception
 public virtual void testNormalization()
 {
     Reader reader = new StringReader("Μάϊος ΜΆΪΟΣ");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("GreekLowerCase").create(stream);
     assertTokenStreamContents(stream, new string[] {"μαιοσ", "μαιοσ"});
 }
 public virtual void TestNormalization()
 {
     TextReader reader = new StringReader("های");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = TokenFilterFactory("PersianNormalization").Create(stream);
     AssertTokenStreamContents(stream, new string[] { "هاي" });
 }
コード例 #17
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testStemming() throws Exception
 public virtual void testStemming()
 {
     Reader reader = new StringReader("abc");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("HunspellStem", "dictionary", "simple.dic", "affix", "simple.aff").create(stream);
     assertTokenStreamContents(stream, new string[] {"ab"});
 }
コード例 #18
0
 public virtual void TestLongestOnly()
 {
     MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
     tokenizer.EnableChecks = true;
     HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, true);
     AssertTokenStreamContents(filter, new string[] { "lucene", "is", "awesome" }, new int[] { 1, 1, 1 });
 }
コード例 #19
0
 public virtual void TestStemming()
 {
     TextReader reader = new StringReader("weißbier");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = TokenFilterFactory("GermanNormalization").Create(stream);
     AssertTokenStreamContents(stream, new string[] { "weissbier" });
 }
コード例 #20
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testApostropheFilter() throws Exception
 public virtual void testApostropheFilter()
 {
     TokenStream stream = new MockTokenizer(new StringReader("Türkiye'de 2003'te Van Gölü'nü gördüm"), MockTokenizer.WHITESPACE, false);
     stream = new TurkishLowerCaseFilter(stream);
     stream = new ApostropheFilter(stream);
     assertTokenStreamContents(stream, new string[]{"türkiye", "2003", "van", "gölü", "gördüm"});
 }
コード例 #21
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testStemming() throws Exception
 public virtual void testStemming()
 {
     Reader reader = new StringReader("журналы");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("RussianLightStem").create(stream);
     assertTokenStreamContents(stream, new string[] {"журнал"});
 }
コード例 #22
0
 public virtual void TestStemmingInflectional()
 {
     TextReader reader = new StringReader("dibukukannya");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = TokenFilterFactory("IndonesianStem", "stemDerivational", "false").Create(stream);
     AssertTokenStreamContents(stream, new string[] { "dibukukan" });
 }
コード例 #23
0
 /// <summary>
 /// test with numbers </summary>
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testCapitalization12() throws Exception
 public virtual void testCapitalization12()
 {
     Reader reader = new StringReader("1st 2nd third");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("Capitalization", "keep", "and the it BIG", "onlyFirstWord", "false", "minWordLength", "3", "okPrefix", "McK", "forceFirstLetter", "false").create(stream);
     assertTokenStreamContents(stream, new string[] {"1st", "2nd", "Third"});
 }
コード例 #24
0
 /// <summary>
 /// Ensure the ASCIIFoldingFilterFactory works
 /// </summary>
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testASCIIFolding() throws Exception
 public virtual void testASCIIFolding()
 {
     Reader reader = new StringReader("Česká");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("ASCIIFolding").create(stream);
     assertTokenStreamContents(stream, new string[] {"Ceska"});
 }
コード例 #25
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testCapitalization() throws Exception
 public virtual void testCapitalization()
 {
     Reader reader = new StringReader("kiTTEN");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("Capitalization", "keep", "and the it BIG", "onlyFirstWord", "true").create(stream);
     assertTokenStreamContents(stream, new string[] {"Kitten"});
 }
コード例 #26
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testStemming() throws Exception
 public virtual void testStemming()
 {
     Reader reader = new StringReader("äpplen äpple");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("SwedishLightStem").create(stream);
     assertTokenStreamContents(stream, new string[] {"äppl", "äppl"});
 }
コード例 #27
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testStemming() throws Exception
 public virtual void testStemming()
 {
     Reader reader = new StringReader("پیاوەکان");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("SoraniStem").create(stream);
     assertTokenStreamContents(stream, new string[] {"پیاو"});
 }
コード例 #28
0
 /// <summary>
 /// Ensure the filter actually normalizes text (numerics, stopwords)
 /// </summary>
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testFiltering() throws Exception
 public virtual void testFiltering()
 {
     Reader reader = new StringReader("this 1234 Is such a silly filter");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("Chinese").create(stream);
     assertTokenStreamContents(stream, new string[] {"Is", "silly", "filter"});
 }
コード例 #29
0
 /// <summary>
 /// Ensure the filter actually stems text.
 /// </summary>
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testStemming() throws Exception
 public virtual void testStemming()
 {
     Reader reader = new StringReader("dibukukannya");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("IndonesianStem").create(stream);
     assertTokenStreamContents(stream, new string[] {"buku"});
 }
コード例 #30
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testPositionIncrements() throws Exception
 public virtual void testPositionIncrements()
 {
     Reader reader = new StringReader("foo foobar super-duper-trooper");
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = tokenFilterFactory("CodepointCount", "min", "4", "max", "10").create(stream);
     assertTokenStreamContents(stream, new string[] {"foobar"}, new int[] {2});
 }
コード例 #31
0
            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(source, new EnglishMinimalStemFilter(source)));
            }
コード例 #32
0
            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer)));
            }
コード例 #33
0
            protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(source, new FinnishLightStemFilter(source)));
            }
コード例 #34
0
            protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader);

                return(new TokenStreamComponents(tokenizer, new CrazyTokenFilter(tokenizer)));
            }
コード例 #35
0
            protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(tokenizer, new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max)));
            }
コード例 #36
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(tokenizer, new Lucene47WordDelimiterFilter(tokenizer, flags, protectedWords)));
            }
コード例 #37
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(tokenizer, new Lucene47WordDelimiterFilter(new LargePosIncTokenFilter(outerInstance, tokenizer), flags, protWords)));
            }
コード例 #38
0
            protected override TokenStreamComponents CreateComponents(string field, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, new LargePosIncTokenFilter(outerInstance, tokenizer), flags, protWords)));
            }
コード例 #39
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 4)));
            }
コード例 #40
0
            protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, tokenizer, flags, protectedWords)));
            }
コード例 #41
0
            protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(tokenizer, new ReverseStringFilter(TEST_VERSION_CURRENT, tokenizer)));
            }
コード例 #42
0
            /** Filters MockTokenizer with StopFilter. */

            protected internal override sealed TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);

                return(new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer)));
            }
コード例 #43
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);

                return(new TokenStreamComponents(tokenizer, new MockFixedLengthPayloadFilter(Random(), tokenizer, Length)));
            }
コード例 #44
0
            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);

                return(new TokenStreamComponents(tokenizer, new PayloadFilter(PayloadCount, tokenizer)));
            }
コード例 #45
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);

                return(new TokenStreamComponents(result, new PayloadFilter(result, fieldName)));
            }
コード例 #46
0
            public override TokenStreamComponents createComponents(string field, Reader @in)
            {
                Tokenizer tokenizer = new MockTokenizer(@in, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(tokenizer, new CommonGramsFilter(TEST_VERSION_CURRENT, tokenizer, commonWords)));
            }
コード例 #47
0
            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);

                return(new TokenStreamComponents(tokenizer, new TrimFilter(TEST_VERSION_CURRENT, tokenizer, false)));
            }
コード例 #48
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);

                return(new TokenStreamComponents(tokenizer, tokenizer));
            }
コード例 #49
0
            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(tokenizer, new TokenFilterAnonymousInnerClassHelper(this, tokenizer)));
            }
コード例 #50
0
        public virtual void TestCommitOnCloseDiskUsage()
        {
            // MemoryCodec, since it uses FST, is not necessarily
            // "additive", ie if you add up N small FSTs, then merge
            // them, the merged result can easily be larger than the
            // sum because the merged FST may use array encoding for
            // some arcs (which uses more space):

            string idFormat      = TestUtil.GetPostingsFormat("id");
            string contentFormat = TestUtil.GetPostingsFormat("content");

            AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory", StringComparison.Ordinal) || contentFormat.Equals("Memory", StringComparison.Ordinal));
            MockDirectoryWrapper dir = NewMockDirectory();
            Analyzer             analyzer;

            if (Random.NextBoolean())
            {
                // no payloads
                analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
                {
                    return(new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)));
                });
            }
            else
            {
                // fixed length payloads
                int length = Random.Next(200);
                analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
                {
                    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
                    return(new TokenStreamComponents(tokenizer, new MockFixedLengthPayloadFilter(Random, tokenizer, length)));
                });
            }

            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10)));

            for (int j = 0; j < 30; j++)
            {
                AddDocWithIndex(writer, j);
            }
            writer.Dispose();
            dir.ResetMaxUsedSizeInBytes();

            dir.TrackDiskUsage = true;
            long startDiskUsage = dir.MaxUsedSizeInBytes;

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.APPEND).SetMaxBufferedDocs(10).SetMergeScheduler(new SerialMergeScheduler()).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10)));
            for (int j = 0; j < 1470; j++)
            {
                AddDocWithIndex(writer, j);
            }
            long midDiskUsage = dir.MaxUsedSizeInBytes;

            dir.ResetMaxUsedSizeInBytes();
            writer.ForceMerge(1);
            writer.Dispose();

            DirectoryReader.Open(dir).Dispose();

            long endDiskUsage = dir.MaxUsedSizeInBytes;

            // Ending index is 50X as large as starting index; due
            // to 3X disk usage normally we allow 150X max
            // transient usage.  If something is wrong w/ deleter
            // and it doesn't delete intermediate segments then it
            // will exceed this 150X:
            // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage);
            Assert.IsTrue(midDiskUsage < 150 * startDiskUsage, "writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage * 150));
            Assert.IsTrue(endDiskUsage < 150 * startDiskUsage, "writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage * 150));
            dir.Dispose();
        }
コード例 #51
0
            public override TokenStreamComponents CreateComponents(String fieldName, TextReader reader)
            {
                Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);

                return(new TokenStreamComponents(result, new TestPosIncrementFilter(result)));
            }
コード例 #52
0
            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);

                return(new TokenStreamComponents(tokenizer, new TrimFilter(LuceneVersion.LUCENE_43, tokenizer, true)));
            }
コード例 #53
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);

                return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true)));
            }
コード例 #54
0
            protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
            {
                Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(source, new CJKWidthFilter(source)));
            }
コード例 #55
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testFilterWithPosIncr() throws Exception
	  public virtual void testFilterWithPosIncr()
	  {
		TokenStream stream = new MockTokenizer(new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
		CodepointCountFilter filter = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, 2, 6);
		assertTokenStreamContents(filter, new string[]{"short", "ab", "foo"}, new int[]{1, 4, 2});
	  }
コード例 #56
0
            protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);

                return(new TokenStreamComponents(tokenizer, new KStemFilter(tokenizer)));
            }
コード例 #57
0
        public virtual void Test()
        {
            Directory dir      = NewDirectory();
            Analyzer  analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
                Tokenizer tokenizer = new MockTokenizer(reader);
                if (fieldName.Contains("payloadsFixed"))
                {
                    TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1);
                    return(new TokenStreamComponents(tokenizer, filter));
                }
                else if (fieldName.Contains("payloadsVariable"))
                {
                    TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer);
                    return(new TokenStreamComponents(tokenizer, filter));
                }
                else
                {
                    return(new TokenStreamComponents(tokenizer));
                }
            }, reuseStrategy: Analyzer.PER_FIELD_REUSE_STRATEGY);
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat()));
            // TODO we could actually add more fields implemented with different PFs
            // or, just put this test into the usual rotation?
            RandomIndexWriter iw           = new RandomIndexWriter(Random, dir, (IndexWriterConfig)iwc.Clone());
            Document          doc          = new Document();
            FieldType         docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED);

            // turn this on for a cross-check
            docsOnlyType.StoreTermVectors = true;
            docsOnlyType.IndexOptions     = IndexOptions.DOCS_ONLY;

            FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED);

            // turn this on for a cross-check
            docsAndFreqsType.StoreTermVectors = true;
            docsAndFreqsType.IndexOptions     = IndexOptions.DOCS_AND_FREQS;

            FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED);

            // turn these on for a cross-check
            positionsType.StoreTermVectors         = true;
            positionsType.StoreTermVectorPositions = true;
            positionsType.StoreTermVectorOffsets   = true;
            positionsType.StoreTermVectorPayloads  = true;
            FieldType offsetsType = new FieldType(positionsType);

            offsetsType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            Field field1 = new Field("field1docs", "", docsOnlyType);
            Field field2 = new Field("field2freqs", "", docsAndFreqsType);
            Field field3 = new Field("field3positions", "", positionsType);
            Field field4 = new Field("field4offsets", "", offsetsType);
            Field field5 = new Field("field5payloadsFixed", "", positionsType);
            Field field6 = new Field("field6payloadsVariable", "", positionsType);
            Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType);
            Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType);

            doc.Add(field1);
            doc.Add(field2);
            doc.Add(field3);
            doc.Add(field4);
            doc.Add(field5);
            doc.Add(field6);
            doc.Add(field7);
            doc.Add(field8);
            for (int i = 0; i < MAXDOC; i++)
            {
                string stringValue = Convert.ToString(i) + " verycommon " + English.Int32ToEnglish(i).Replace('-', ' ') + " " + TestUtil.RandomSimpleString(Random);
                field1.SetStringValue(stringValue);
                field2.SetStringValue(stringValue);
                field3.SetStringValue(stringValue);
                field4.SetStringValue(stringValue);
                field5.SetStringValue(stringValue);
                field6.SetStringValue(stringValue);
                field7.SetStringValue(stringValue);
                field8.SetStringValue(stringValue);
                iw.AddDocument(doc);
            }
            iw.Dispose();
            Verify(dir);
            TestUtil.CheckIndex(dir); // for some extra coverage, checkIndex before we forceMerge
            iwc.SetOpenMode(OpenMode.APPEND);
            IndexWriter iw2 = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone());

            iw2.ForceMerge(1);
            iw2.Dispose();
            Verify(dir);
            dir.Dispose();
        }
コード例 #58
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(tokenizer, new CapitalizationFilter(tokenizer)));
            }
コード例 #59
0
            protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);

                return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase)));
            }
コード例 #60
0
            public override TokenStreamComponents CreateComponents(string fieldName, System.IO.TextReader reader)
            {
                Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);

                return(new TokenStreamComponents(result, new FoldingFilter(result)));
            }