public virtual void TestDefaultArticles() { TextReader reader = new StringReader("l'avion"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = TokenFilterFactory("Elision").Create(stream); AssertTokenStreamContents(stream, new string[] { "avion" }); }
/// <summary> /// if the synonyms are completely empty, test that we still analyze correctly </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testEmptySynonyms() throws Exception public virtual void testEmptySynonyms() { Reader reader = new StringReader("GB"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("Synonym", TEST_VERSION_CURRENT, new StringMockResourceLoader(""), "synonyms", "synonyms.txt").create(stream); // empty file! assertTokenStreamContents(stream, new string[] {"GB"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStemming() throws Exception public virtual void testStemming() { Reader reader = new StringReader("räksmörgås"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("ScandinavianNormalization").create(stream); assertTokenStreamContents(stream, new string[] {"ræksmørgås"}); }
public virtual void TestCaseInsensitive() { TextReader reader = new StringReader("L'avion"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = TokenFilterFactory("Elision", "articles", "frenchArticles.txt", "ignoreCase", "true").Create(stream); AssertTokenStreamContents(stream, new string[] { "avion" }); }
public virtual void TestStemming() { TextReader reader = new StringReader("questões"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = TokenFilterFactory("PortugueseMinimalStem").Create(stream); AssertTokenStreamContents(stream, new string[] { "questão" }); }
/// <summary> /// Ensure the filter actually lowercases text. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testCasing() throws Exception public virtual void testCasing() { Reader reader = new StringReader("AĞACI"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("TurkishLowerCase").create(stream); assertTokenStreamContents(stream, new string[] {"ağacı"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStemming() throws Exception public virtual void testStemming() { Reader reader = new StringReader("cariñosa"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("GalicianStem").create(stream); assertTokenStreamContents(stream, new string[] {"cariñ"}); }
public virtual void TestPositionIncrements() { Reader reader = new StringReader("foo foobar super-duper-trooper"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = TokenFilterFactory("Length", LengthFilterFactory.MIN_KEY, "4", LengthFilterFactory.MAX_KEY, "10").Create(stream); AssertTokenStreamContents(stream, new string[] { "foobar" }, new int[] { 2 }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStemming() throws Exception public virtual void testStemming() { Reader reader = new StringReader("chevaux"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("FrenchMinimalStem").create(stream); assertTokenStreamContents(stream, new string[] {"cheval"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testOffsets() throws Exception public virtual void testOffsets() { string input = "abc- def geh 1234- 5678-"; TokenStream ts = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false); ts = new HyphenatedWordsFilter(ts); assertTokenStreamContents(ts, new string[] {"abcdef", "geh", "12345678-"}, new int[] {0, 9, 13}, new int[] {8, 12, 24}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testTrimming() throws Exception public virtual void testTrimming() { Reader reader = new StringReader("trim me "); TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false); stream = tokenFilterFactory("Trim").create(stream); assertTokenStreamContents(stream, new string[] {"trim me"}); }
public virtual void TestStemming() { TextReader reader = new StringReader("räksmörgås"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = TokenFilterFactory("ScandinavianFolding").Create(stream); AssertTokenStreamContents(stream, new string[] { "raksmorgas" }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testConsumeAllTokens() throws Exception public virtual void testConsumeAllTokens() { Reader reader = new StringReader("A1 B2 C3 D4 E5 F6"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("LimitTokenPosition", "maxTokenPosition", "3", "consumeAllTokens", "true").create(stream); assertTokenStreamContents(stream, new string[] {"A1", "B2", "C3"}); }
/// <summary> /// Ensure the filter actually stems and normalizes text. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStemming() throws Exception public virtual void testStemming() { Reader reader = new StringReader("Brasília"); Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream stream = tokenFilterFactory("BrazilianStem").create(tokenizer); assertTokenStreamContents(stream, new string[] {"brasil"}); }
/// <summary> /// Ensure the filter actually lowercases (and a bit more) greek text. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNormalization() throws Exception public virtual void testNormalization() { Reader reader = new StringReader("Μάϊος ΜΆΪΟΣ"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("GreekLowerCase").create(stream); assertTokenStreamContents(stream, new string[] {"μαιοσ", "μαιοσ"}); }
public virtual void TestNormalization() { TextReader reader = new StringReader("های"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = TokenFilterFactory("PersianNormalization").Create(stream); AssertTokenStreamContents(stream, new string[] { "هاي" }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStemming() throws Exception public virtual void testStemming() { Reader reader = new StringReader("abc"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("HunspellStem", "dictionary", "simple.dic", "affix", "simple.aff").create(stream); assertTokenStreamContents(stream, new string[] {"ab"}); }
public virtual void TestLongestOnly() { MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome")); tokenizer.EnableChecks = true; HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, true); AssertTokenStreamContents(filter, new string[] { "lucene", "is", "awesome" }, new int[] { 1, 1, 1 }); }
public virtual void TestStemming() { TextReader reader = new StringReader("weißbier"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = TokenFilterFactory("GermanNormalization").Create(stream); AssertTokenStreamContents(stream, new string[] { "weissbier" }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testApostropheFilter() throws Exception public virtual void testApostropheFilter() { TokenStream stream = new MockTokenizer(new StringReader("Türkiye'de 2003'te Van Gölü'nü gördüm"), MockTokenizer.WHITESPACE, false); stream = new TurkishLowerCaseFilter(stream); stream = new ApostropheFilter(stream); assertTokenStreamContents(stream, new string[]{"türkiye", "2003", "van", "gölü", "gördüm"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStemming() throws Exception public virtual void testStemming() { Reader reader = new StringReader("журналы"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("RussianLightStem").create(stream); assertTokenStreamContents(stream, new string[] {"журнал"}); }
public virtual void TestStemmingInflectional() { TextReader reader = new StringReader("dibukukannya"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = TokenFilterFactory("IndonesianStem", "stemDerivational", "false").Create(stream); AssertTokenStreamContents(stream, new string[] { "dibukukan" }); }
/// <summary> /// test with numbers </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testCapitalization12() throws Exception public virtual void testCapitalization12() { Reader reader = new StringReader("1st 2nd third"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("Capitalization", "keep", "and the it BIG", "onlyFirstWord", "false", "minWordLength", "3", "okPrefix", "McK", "forceFirstLetter", "false").create(stream); assertTokenStreamContents(stream, new string[] {"1st", "2nd", "Third"}); }
/// <summary> /// Ensure the ASCIIFoldingFilterFactory works /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testASCIIFolding() throws Exception public virtual void testASCIIFolding() { Reader reader = new StringReader("Česká"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("ASCIIFolding").create(stream); assertTokenStreamContents(stream, new string[] {"Ceska"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testCapitalization() throws Exception public virtual void testCapitalization() { Reader reader = new StringReader("kiTTEN"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("Capitalization", "keep", "and the it BIG", "onlyFirstWord", "true").create(stream); assertTokenStreamContents(stream, new string[] {"Kitten"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStemming() throws Exception public virtual void testStemming() { Reader reader = new StringReader("äpplen äpple"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("SwedishLightStem").create(stream); assertTokenStreamContents(stream, new string[] {"äppl", "äppl"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStemming() throws Exception public virtual void testStemming() { Reader reader = new StringReader("پیاوەکان"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("SoraniStem").create(stream); assertTokenStreamContents(stream, new string[] {"پیاو"}); }
/// <summary> /// Ensure the filter actually normalizes text (numerics, stopwords) /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testFiltering() throws Exception public virtual void testFiltering() { Reader reader = new StringReader("this 1234 Is such a silly filter"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("Chinese").create(stream); assertTokenStreamContents(stream, new string[] {"Is", "silly", "filter"}); }
/// <summary> /// Ensure the filter actually stems text. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStemming() throws Exception public virtual void testStemming() { Reader reader = new StringReader("dibukukannya"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("IndonesianStem").create(stream); assertTokenStreamContents(stream, new string[] {"buku"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testPositionIncrements() throws Exception public virtual void testPositionIncrements() { Reader reader = new StringReader("foo foobar super-duper-trooper"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = tokenFilterFactory("CodepointCount", "min", "4", "max", "10").create(stream); assertTokenStreamContents(stream, new string[] {"foobar"}, new int[] {2}); }
protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(source, new EnglishMinimalStemFilter(source))); }
protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer))); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(source, new FinnishLightStemFilter(source))); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader); return(new TokenStreamComponents(tokenizer, new CrazyTokenFilter(tokenizer))); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max))); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new Lucene47WordDelimiterFilter(tokenizer, flags, protectedWords))); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new Lucene47WordDelimiterFilter(new LargePosIncTokenFilter(outerInstance, tokenizer), flags, protWords))); }
protected override TokenStreamComponents CreateComponents(string field, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, new LargePosIncTokenFilter(outerInstance, tokenizer), flags, protWords))); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 4))); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, tokenizer, flags, protectedWords))); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new ReverseStringFilter(TEST_VERSION_CURRENT, tokenizer))); }
/** Filters MockTokenizer with StopFilter. */ protected internal override sealed TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); return(new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer))); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return(new TokenStreamComponents(tokenizer, new MockFixedLengthPayloadFilter(Random(), tokenizer, Length))); }
protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return(new TokenStreamComponents(tokenizer, new PayloadFilter(PayloadCount, tokenizer))); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); return(new TokenStreamComponents(result, new PayloadFilter(result, fieldName))); }
public override TokenStreamComponents createComponents(string field, Reader @in) { Tokenizer tokenizer = new MockTokenizer(@in, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new CommonGramsFilter(TEST_VERSION_CURRENT, tokenizer, commonWords))); }
protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false); return(new TokenStreamComponents(tokenizer, new TrimFilter(TEST_VERSION_CURRENT, tokenizer, false))); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); return(new TokenStreamComponents(tokenizer, tokenizer)); }
protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new TokenFilterAnonymousInnerClassHelper(this, tokenizer))); }
public virtual void TestCommitOnCloseDiskUsage() { // MemoryCodec, since it uses FST, is not necessarily // "additive", ie if you add up N small FSTs, then merge // them, the merged result can easily be larger than the // sum because the merged FST may use array encoding for // some arcs (which uses more space): string idFormat = TestUtil.GetPostingsFormat("id"); string contentFormat = TestUtil.GetPostingsFormat("content"); AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory", StringComparison.Ordinal) || contentFormat.Equals("Memory", StringComparison.Ordinal)); MockDirectoryWrapper dir = NewMockDirectory(); Analyzer analyzer; if (Random.NextBoolean()) { // no payloads analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { return(new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true))); }); } else { // fixed length payloads int length = Random.Next(200); analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return(new TokenStreamComponents(tokenizer, new MockFixedLengthPayloadFilter(Random, tokenizer, length))); }); } IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10))); for (int j = 0; j < 30; j++) { AddDocWithIndex(writer, j); } writer.Dispose(); dir.ResetMaxUsedSizeInBytes(); dir.TrackDiskUsage = true; long startDiskUsage = dir.MaxUsedSizeInBytes; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.APPEND).SetMaxBufferedDocs(10).SetMergeScheduler(new SerialMergeScheduler()).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10))); for (int j = 0; j < 1470; j++) { AddDocWithIndex(writer, j); } long midDiskUsage = dir.MaxUsedSizeInBytes; dir.ResetMaxUsedSizeInBytes(); writer.ForceMerge(1); writer.Dispose(); DirectoryReader.Open(dir).Dispose(); long endDiskUsage = dir.MaxUsedSizeInBytes; // Ending index is 50X as large as starting index; due // to 3X disk usage normally we allow 150X max // transient usage. If something is wrong w/ deleter // and it doesn't delete intermediate segments then it // will exceed this 150X: // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage); Assert.IsTrue(midDiskUsage < 150 * startDiskUsage, "writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage * 150)); Assert.IsTrue(endDiskUsage < 150 * startDiskUsage, "writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage * 150)); dir.Dispose(); }
public override TokenStreamComponents CreateComponents(String fieldName, TextReader reader) { Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return(new TokenStreamComponents(result, new TestPosIncrementFilter(result))); }
protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false); return(new TokenStreamComponents(tokenizer, new TrimFilter(LuceneVersion.LUCENE_43, tokenizer, true))); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true))); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(source, new CJKWidthFilter(source))); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testFilterWithPosIncr() throws Exception public virtual void testFilterWithPosIncr() { TokenStream stream = new MockTokenizer(new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false); CodepointCountFilter filter = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, 2, 6); assertTokenStreamContents(filter, new string[]{"short", "ab", "foo"}, new int[]{1, 4, 2}); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return(new TokenStreamComponents(tokenizer, new KStemFilter(tokenizer))); }
public virtual void Test() { Directory dir = NewDirectory(); Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new MockTokenizer(reader); if (fieldName.Contains("payloadsFixed")) { TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1); return(new TokenStreamComponents(tokenizer, filter)); } else if (fieldName.Contains("payloadsVariable")) { TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer); return(new TokenStreamComponents(tokenizer, filter)); } else { return(new TokenStreamComponents(tokenizer)); } }, reuseStrategy: Analyzer.PER_FIELD_REUSE_STRATEGY); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(Random, dir, (IndexWriterConfig)iwc.Clone()); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.StoreTermVectors = true; docsOnlyType.IndexOptions = IndexOptions.DOCS_ONLY; FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.StoreTermVectors = true; docsAndFreqsType.IndexOptions = IndexOptions.DOCS_AND_FREQS; FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.StoreTermVectors = true; positionsType.StoreTermVectorPositions = true; positionsType.StoreTermVectorOffsets = true; positionsType.StoreTermVectorPayloads = true; FieldType offsetsType = new FieldType(positionsType); offsetsType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.Add(field1); doc.Add(field2); doc.Add(field3); doc.Add(field4); doc.Add(field5); doc.Add(field6); doc.Add(field7); doc.Add(field8); for (int i = 0; i < MAXDOC; i++) { string stringValue = Convert.ToString(i) + " verycommon " + English.Int32ToEnglish(i).Replace('-', ' ') + " " + TestUtil.RandomSimpleString(Random); field1.SetStringValue(stringValue); field2.SetStringValue(stringValue); field3.SetStringValue(stringValue); field4.SetStringValue(stringValue); field5.SetStringValue(stringValue); field6.SetStringValue(stringValue); field7.SetStringValue(stringValue); field8.SetStringValue(stringValue); iw.AddDocument(doc); } iw.Dispose(); Verify(dir); TestUtil.CheckIndex(dir); // for some extra coverage, checkIndex before we forceMerge iwc.SetOpenMode(OpenMode.APPEND); IndexWriter iw2 = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone()); iw2.ForceMerge(1); iw2.Dispose(); Verify(dir); dir.Dispose(); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new CapitalizationFilter(tokenizer))); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); return(new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase))); }
public override TokenStreamComponents CreateComponents(string fieldName, System.IO.TextReader reader) { Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return(new TokenStreamComponents(result, new FoldingFilter(result))); }