public virtual void TestCrossPlaneNormalization2() { var analyzer = new AnalyzerAnonymousInnerClassHelper2(); var num = 1000 * RANDOM_MULTIPLIER; for (var i = 0; i < num; i++) { var s = TestUtil.RandomUnicodeString(Random); var ts = analyzer.GetTokenStream("foo", s); try { ts.Reset(); var offsetAtt = ts.AddAttribute <IOffsetAttribute>(); while (ts.IncrementToken()) { string highlightedText = s.Substring(offsetAtt.StartOffset, offsetAtt.EndOffset - offsetAtt.StartOffset); for (int j = 0, cp = 0; j < highlightedText.Length; j += Character.CharCount(cp)) { cp = char.ConvertToUtf32(highlightedText, j); assertTrue("non-letter:" + cp.ToString("x"), Character.IsLetter(cp)); } } ts.End(); } finally { IOUtils.DisposeWhileHandlingException(ts); } } // just for fun CheckRandomData(Random, analyzer, num); }
public virtual void TestTokenReuse() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.Add(NewTextField("f1", "a 5 a a", Field.Store.YES)); writer.AddDocument(doc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random)); DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, reader.LiveDocs, "f1", new BytesRef("a")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = termPositions.Freq; Assert.AreEqual(3, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.IsNotNull(termPositions.GetPayload()); Assert.AreEqual(6, termPositions.NextPosition()); Assert.IsNull(termPositions.GetPayload()); Assert.AreEqual(7, termPositions.NextPosition()); Assert.IsNull(termPositions.GetPayload()); reader.Dispose(); }
public virtual void TestRandomHugeStrings() { Random random = Random; Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this); CheckRandomData(random, a, 100 * RANDOM_MULTIPLIER, 1027); }
public virtual void TestCrossPlaneNormalization2() { var analyzer = new AnalyzerAnonymousInnerClassHelper2(); var num = 1000 * RANDOM_MULTIPLIER; for (var i = 0; i < num; i++) { var s = TestUtil.RandomUnicodeString(Random()); var ts = analyzer.TokenStream("foo", s); try { ts.Reset(); var offsetAtt = ts.AddAttribute<IOffsetAttribute>(); while (ts.IncrementToken()) { string highlightedText = s.Substring(offsetAtt.StartOffset(), offsetAtt.EndOffset() - offsetAtt.StartOffset()); for (int j = 0, cp = 0; j < highlightedText.Length; j += Character.CharCount(cp)) { cp = char.ConvertToUtf32(highlightedText, j); assertTrue("non-letter:" + cp.ToString("x"), Character.IsLetter(cp)); } } ts.End(); } finally { IOUtils.CloseWhileHandlingException(ts); } } // just for fun CheckRandomData(Random(), analyzer, num); }
public virtual void TestRandomHugeStrings() { Random random = Random; Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this); CheckRandomData(random, a, 100 * RandomMultiplier, 8192); }
public virtual void TestKeyword() { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("sängerinnen"), false); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, exclusionSet); CheckOneTerm(a, "sängerinnen", "sängerinnen"); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testKeyword() throws java.io.IOException public virtual void testKeyword() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet exclusionSet = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, exclusionSet); checkOneTerm(a, "quilométricas", "quilométricas"); }
/// <summary> /// blast some random strings through the analyzer </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomStrings() throws Exception public virtual void testRandomStrings() { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER); Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this); checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER); }
public virtual void TestCuriousWikipediaString() { CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet <string>(Arrays.AsList("rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha")), false); sbyte[] table = new sbyte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20 }; Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, protWords, table); CheckAnalysisConsistency(Random(), a, false, "B\u28c3\ue0f8[ \ud800\udfc2 </p> jb"); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testKeyword() throws java.io.IOException public virtual void testKeyword() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet exclusionSet = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlens"), false); CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlens"), false); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, exclusionSet); checkOneTerm(a, "jaktkarlens", "jaktkarlens"); }
public TokenFilterAnonymousInnerClassHelper(AnalyzerAnonymousInnerClassHelper2 outerInstance, Tokenizer tokenizer) : base(tokenizer) { this.OuterInstance = outerInstance; first = true; termAtt = AddAttribute <ICharTermAttribute>(); payloadAtt = AddAttribute <IPayloadAttribute>(); posIncrAtt = AddAttribute <IPositionIncrementAttribute>(); }
public virtual void TestRandomStrings() { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); CheckRandomData(Random, a, 1000 * RandomMultiplier); Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this); CheckRandomData(Random, b, 1000 * RandomMultiplier); }
public virtual void TestRandomStrings() { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); CheckRandomData(Random, a, 1000 * RANDOM_MULTIPLIER); Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this); CheckRandomData(Random, b, 1000 * RANDOM_MULTIPLIER); }
public virtual void TestRandomStrings() { for (int i = 0; i < 10; i++) { int min = TestUtil.NextInt32(Random, 2, 10); int max = TestUtil.NextInt32(Random, min, 20); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max); CheckRandomData(Random, a, 200 * RANDOM_MULTIPLIER, 20); } }
public virtual void TestRandomRegexps() { int iters = AtLeast(30); for (int i = 0; i < iters; i++) { CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.RandomAutomaton(Random())); bool lowercase = Random().NextBoolean(); int limit = TestUtil.NextInt(Random(), 0, 500); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dfa, lowercase, limit); CheckRandomData(Random(), a, 100); a.Dispose(); } }
/// <summary> /// blast some random strings through the analyzer </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomStrings() throws Exception public virtual void testRandomStrings() { for (int i = 0; i < 10; i++) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int min = org.apache.lucene.util.TestUtil.nextInt(random(), 2, 10); int min = TestUtil.Next(random(), 2, 10); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int max = org.apache.lucene.util.TestUtil.nextInt(random(), min, 20); int max = TestUtil.Next(random(), min, 20); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max); checkRandomData(random(), a, 200 * RANDOM_MULTIPLIER, 20); } }
public virtual void TestRandomStrings() { CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def"); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dict); CheckRandomData(Random, a, 1000 * RANDOM_MULTIPLIER); //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm()); using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml")) { HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is); Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator); CheckRandomData(Random, b, 1000 * RANDOM_MULTIPLIER); } }
public virtual void TestFinalOffsetSpecialCase() { NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.Add("t", ""); // even though this below rule has no effect, the test passes if you remove it!! builder.Add("tmakdbl", "c"); NormalizeCharMap map = builder.Build(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map); string text = "gzw f quaxot"; CheckAnalysisConsistency(Random(), analyzer, false, text); }
public virtual void TestRandomStrings() { for (int i = 0; i < 10; i++) { int min = TestUtil.NextInt32(Random, 2, 10); int max = TestUtil.NextInt32(Random, min, 20); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max); CheckRandomData(Random, a, 100 * RandomMultiplier); } Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this); CheckRandomData(Random, b, 1000 * RandomMultiplier, 20, false, false); }
public virtual void TestRandomStrings() { for (int i = 0; i < 10; i++) { int min = TestUtil.NextInt(Random(), 2, 10); int max = TestUtil.NextInt(Random(), min, 20); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max); CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER); } Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this); CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false); }
public virtual void TestEscapedStuff() { string testFile = "a\\=>a => b\\=>b\n" + "a\\,a => b\\,b"; SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)); parser.Parse(new StringReader(testFile)); SynonymMap map = parser.Build(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map); AssertAnalyzesTo(analyzer, "ball", new string[] { "ball" }, new int[] { 1 }); AssertAnalyzesTo(analyzer, "a=>a", new string[] { "b=>b" }, new int[] { 1 }); AssertAnalyzesTo(analyzer, "a,a", new string[] { "b,b" }, new int[] { 1 }); }
/// <summary> /// parse a syn file with some escaped syntax chars </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testEscapedStuff() throws Exception public virtual void testEscapedStuff() { string testFile = "a\\=>a => b\\=>b\n" + "a\\,a => b\\,b"; SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); parser.parse(new StringReader(testFile)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final SynonymMap map = parser.build(); SynonymMap map = parser.build(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map); assertAnalyzesTo(analyzer, "ball", new string[] {"ball"}, new int[] {1}); assertAnalyzesTo(analyzer, "a=>a", new string[] {"b=>b"}, new int[] {1}); assertAnalyzesTo(analyzer, "a,a", new string[] {"b,b"}, new int[] {1}); }
public virtual void TestMockGraphTokenFilterOnGraphInput() { for (int iter = 0; iter < 100 * RANDOM_MULTIPLIER; iter++) { if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter); } // Make new analyzer each time, because MGTF has fixed // seed: Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this); CheckAnalysisConsistency(Random, a, false, "a/x:3 c/y:2 d e f/z:4 g h i j k"); } }
public virtual void TestLetterAsciiHuge() { Random random = Random; int maxLength = 8192; // CharTokenizer.IO_BUFFER_SIZE*2 MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false); left.MaxTokenLength = 255; // match CharTokenizer's max token length Analyzer right = new AnalyzerAnonymousInnerClassHelper2(this); int numIterations = AtLeast(50); for (int i = 0; i < numIterations; i++) { string s = TestUtil.RandomSimpleString(random, maxLength); assertEquals(s, left.GetTokenStream("foo", newStringReader(s)), right.GetTokenStream("foo", newStringReader(s))); } }
/// <summary> /// parse a syn file with some escaped syntax chars </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testEscapedStuff() throws Exception public virtual void testEscapedStuff() { string testFile = "a\\=>a => b\\=>b\n" + "a\\,a => b\\,b"; SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); parser.parse(new StringReader(testFile)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final SynonymMap map = parser.build(); SynonymMap map = parser.build(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map); assertAnalyzesTo(analyzer, "ball", new string[] { "ball" }, new int[] { 1 }); assertAnalyzesTo(analyzer, "a=>a", new string[] { "b=>b" }, new int[] { 1 }); assertAnalyzesTo(analyzer, "a,a", new string[] { "b,b" }, new int[] { 1 }); }
/// <summary> /// blast some random strings through the analyzer </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomStrings() throws Exception public virtual void testRandomStrings() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def"); CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def"); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dict); checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER); InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm()); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is); HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is); Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator); checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER); }
public virtual void TestPositionIncrements() { WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS | WordDelimiterFlags.GENERATE_NUMBER_PARTS | WordDelimiterFlags.CATENATE_ALL | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE | WordDelimiterFlags.SPLIT_ON_NUMERICS | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "NUTCH" }, false); /* analyzer that uses whitespace + wdf */ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords); /* in this case, works as expected. */ AssertAnalyzesTo(a, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 1 }); /* only in this case, posInc of 2 ?! */ AssertAnalyzesTo(a, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 1, 0, 1 }); AssertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 1, 1 }); /* analyzer that will consume tokens with large position increments */ Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords); /* increment of "largegap" is preserved */ AssertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] { "LUCENE", "largegap", "SOLR" }, new int[] { 0, 7, 16 }, new int[] { 6, 15, 20 }, new int[] { 1, 10, 1 }); /* the "/" had a position increment of 10, where did it go?!?!! */ AssertAnalyzesTo(a2, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 11 }); /* in this case, the increment of 10 from the "/" is carried over */ AssertAnalyzesTo(a2, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 11, 0, 1 }); AssertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 11, 1 }); Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords); AssertAnalyzesTo(a3, "lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 0, 0, 7 }, new int[] { 6, 11, 11 }, new int[] { 1, 0, 1 }); /* the stopword should add a gap here */ AssertAnalyzesTo(a3, "the lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 4, 4, 11 }, new int[] { 10, 15, 15 }, new int[] { 2, 0, 1 }); }
public virtual void TestDoKeepOrig() { b = new SynonymMap.Builder(true); Add("a b", "foo", true); SynonymMap map = b.Build(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map); AssertAnalyzesTo(analyzer, "a b c", new string[] { "a", "foo", "b", "c" }, new int[] { 0, 0, 2, 4 }, new int[] { 1, 3, 3, 5 }, null, new int[] { 1, 0, 1, 1 }, new int[] { 1, 2, 1, 1 }, true); CheckAnalysisConsistency(Random(), analyzer, false, "a b c"); }
//@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971") //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testFinalOffsetSpecialCase() throws Exception public virtual void testFinalOffsetSpecialCase() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("t", ""); // even though this below rule has no effect, the test passes if you remove it!! builder.add("tmakdbl", "c"); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final NormalizeCharMap map = builder.build(); NormalizeCharMap map = builder.build(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map); string text = "gzw f quaxot"; checkAnalysisConsistency(random(), analyzer, false, text); }
//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes: //ORIGINAL LINE: @Test public void testPositionIncrements() throws Exception //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: public virtual void testPositionIncrements() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet protWords = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("NUTCH")), false); CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet <>("NUTCH"), false); /* analyzer that uses whitespace + wdf */ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords); /* in this case, works as expected. */ assertAnalyzesTo(a, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 1 }); /* only in this case, posInc of 2 ?! */ assertAnalyzesTo(a, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 1, 0, 1 }); assertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 1, 1 }); /* analyzer that will consume tokens with large position increments */ Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords); /* increment of "largegap" is preserved */ assertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] { "LUCENE", "largegap", "SOLR" }, new int[] { 0, 7, 16 }, new int[] { 6, 15, 20 }, new int[] { 1, 10, 1 }); /* the "/" had a position increment of 10, where did it go?!?!! */ assertAnalyzesTo(a2, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 11 }); /* in this case, the increment of 10 from the "/" is carried over */ assertAnalyzesTo(a2, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 11, 0, 1 }); assertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 11, 1 }); Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords); assertAnalyzesTo(a3, "lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 0, 0, 7 }, new int[] { 6, 11, 11 }, new int[] { 1, 0, 1 }); /* the stopword should add a gap here */ assertAnalyzesTo(a3, "the lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 4, 4, 11 }, new int[] { 10, 15, 15 }, new int[] { 2, 0, 1 }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testCommonGramsFilter() throws Exception public virtual void testCommonGramsFilter() { Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this); // Stop words used below are "of" "the" and "s" // one word queries assertAnalyzesTo(a, "the", new string[] {"the"}); assertAnalyzesTo(a, "foo", new string[] {"foo"}); // two word queries assertAnalyzesTo(a, "brown fox", new string[] {"brown", "fox"}, new int[] {1, 1}); assertAnalyzesTo(a, "the fox", new string[] {"the", "the_fox", "fox"}, new int[] {1, 0, 1}); assertAnalyzesTo(a, "fox of", new string[] {"fox", "fox_of", "of"}, new int[] {1, 0, 1}); assertAnalyzesTo(a, "of the", new string[] {"of", "of_the", "the"}, new int[] {1, 0, 1}); // 3 word combinations s=stopword/common word n=not a stop word assertAnalyzesTo(a, "n n n", new string[] {"n", "n", "n"}, new int[] {1, 1, 1}); assertAnalyzesTo(a, "quick brown fox", new string[] {"quick", "brown", "fox"}, new int[] {1, 1, 1}); assertAnalyzesTo(a, "n n s", new string[] {"n", "n", "n_s", "s"}, new int[] {1, 1, 0, 1}); assertAnalyzesTo(a, "quick brown the", new string[] {"quick", "brown", "brown_the", "the"}, new int[] {1, 1, 0, 1}); assertAnalyzesTo(a, "n s n", new string[] {"n", "n_s", "s", "s_n", "n"}, new int[] {1, 0, 1, 0, 1}); assertAnalyzesTo(a, "quick the fox", new string[] {"quick", "quick_the", "the", "the_fox", "fox"}, new int[] {1, 0, 1, 0, 1}); assertAnalyzesTo(a, "n s s", new string[] {"n", "n_s", "s", "s_s", "s"}, new int[] {1, 0, 1, 0, 1}); assertAnalyzesTo(a, "fox of the", new string[] {"fox", "fox_of", "of", "of_the", "the"}, new int[] {1, 0, 1, 0, 1}); assertAnalyzesTo(a, "s n n", new string[] {"s", "s_n", "n", "n"}, new int[] {1, 0, 1, 1}); assertAnalyzesTo(a, "the quick brown", new string[] {"the", "the_quick", "quick", "brown"}, new int[] {1, 0, 1, 1}); assertAnalyzesTo(a, "s n s", new string[] {"s", "s_n", "n", "n_s", "s"}, new int[] {1, 0, 1, 0, 1}); assertAnalyzesTo(a, "the fox of", new string[] {"the", "the_fox", "fox", "fox_of", "of"}, new int[] {1, 0, 1, 0, 1}); assertAnalyzesTo(a, "s s n", new string[] {"s", "s_s", "s", "s_n", "n"}, new int[] {1, 0, 1, 0, 1}); assertAnalyzesTo(a, "of the fox", new string[] {"of", "of_the", "the", "the_fox", "fox"}, new int[] {1, 0, 1, 0, 1}); assertAnalyzesTo(a, "s s s", new string[] {"s", "s_s", "s", "s_s", "s"}, new int[] {1, 0, 1, 0, 1}); assertAnalyzesTo(a, "of the of", new string[] {"of", "of_the", "the", "the_of", "of"}, new int[] {1, 0, 1, 0, 1}); }
public virtual void TestCommonGramsFilter() { Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this); // Stop words used below are "of" "the" and "s" // one word queries AssertAnalyzesTo(a, "the", new string[] { "the" }); AssertAnalyzesTo(a, "foo", new string[] { "foo" }); // two word queries AssertAnalyzesTo(a, "brown fox", new string[] { "brown", "fox" }, new int[] { 1, 1 }); AssertAnalyzesTo(a, "the fox", new string[] { "the", "the_fox", "fox" }, new int[] { 1, 0, 1 }); AssertAnalyzesTo(a, "fox of", new string[] { "fox", "fox_of", "of" }, new int[] { 1, 0, 1 }); AssertAnalyzesTo(a, "of the", new string[] { "of", "of_the", "the" }, new int[] { 1, 0, 1 }); // 3 word combinations s=stopword/common word n=not a stop word AssertAnalyzesTo(a, "n n n", new string[] { "n", "n", "n" }, new int[] { 1, 1, 1 }); AssertAnalyzesTo(a, "quick brown fox", new string[] { "quick", "brown", "fox" }, new int[] { 1, 1, 1 }); AssertAnalyzesTo(a, "n n s", new string[] { "n", "n", "n_s", "s" }, new int[] { 1, 1, 0, 1 }); AssertAnalyzesTo(a, "quick brown the", new string[] { "quick", "brown", "brown_the", "the" }, new int[] { 1, 1, 0, 1 }); AssertAnalyzesTo(a, "n s n", new string[] { "n", "n_s", "s", "s_n", "n" }, new int[] { 1, 0, 1, 0, 1 }); AssertAnalyzesTo(a, "quick the fox", new string[] { "quick", "quick_the", "the", "the_fox", "fox" }, new int[] { 1, 0, 1, 0, 1 }); AssertAnalyzesTo(a, "n s s", new string[] { "n", "n_s", "s", "s_s", "s" }, new int[] { 1, 0, 1, 0, 1 }); AssertAnalyzesTo(a, "fox of the", new string[] { "fox", "fox_of", "of", "of_the", "the" }, new int[] { 1, 0, 1, 0, 1 }); AssertAnalyzesTo(a, "s n n", new string[] { "s", "s_n", "n", "n" }, new int[] { 1, 0, 1, 1 }); AssertAnalyzesTo(a, "the quick brown", new string[] { "the", "the_quick", "quick", "brown" }, new int[] { 1, 0, 1, 1 }); AssertAnalyzesTo(a, "s n s", new string[] { "s", "s_n", "n", "n_s", "s" }, new int[] { 1, 0, 1, 0, 1 }); AssertAnalyzesTo(a, "the fox of", new string[] { "the", "the_fox", "fox", "fox_of", "of" }, new int[] { 1, 0, 1, 0, 1 }); AssertAnalyzesTo(a, "s s n", new string[] { "s", "s_s", "s", "s_n", "n" }, new int[] { 1, 0, 1, 0, 1 }); AssertAnalyzesTo(a, "of the fox", new string[] { "of", "of_the", "the", "the_fox", "fox" }, new int[] { 1, 0, 1, 0, 1 }); AssertAnalyzesTo(a, "s s s", new string[] { "s", "s_s", "s", "s_s", "s" }, new int[] { 1, 0, 1, 0, 1 }); AssertAnalyzesTo(a, "of the of", new string[] { "of", "of_the", "the", "the_of", "of" }, new int[] { 1, 0, 1, 0, 1 }); }
// LUCENE-3642: normalize BMP->SMP and check that offsets are correct //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testCrossPlaneNormalization2() throws java.io.IOException public virtual void testCrossPlaneNormalization2() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this); int num = 1000 * RANDOM_MULTIPLIER; for (int i = 0; i < num; i++) { string s = TestUtil.randomUnicodeString(random()); TokenStream ts = analyzer.tokenStream("foo", s); try { ts.reset(); OffsetAttribute offsetAtt = ts.addAttribute(typeof(OffsetAttribute)); while (ts.incrementToken()) { string highlightedText = StringHelperClass.SubstringSpecial(s, offsetAtt.startOffset(), offsetAtt.endOffset()); for (int j = 0, cp = 0; j < highlightedText.Length; j += char.charCount(cp)) { cp = char.ConvertToUtf32(highlightedText, j); assertTrue("non-letter:" + cp.ToString("x"), char.IsLetter(cp)); } } ts.end(); } finally { IOUtils.closeWhileHandlingException(ts); } } // just for fun checkRandomData(random(), analyzer, num); }
public LetterTokenizerAnonymousInnerClassHelper2(AnalyzerAnonymousInnerClassHelper2 outerInstance, UnknownType TEST_VERSION_CURRENT, Reader reader) : base(TEST_VERSION_CURRENT, reader) { this.outerInstance = outerInstance; }
public virtual void TestKeyword() { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("quilométricas"), false); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, exclusionSet); CheckOneTerm(a, "quilométricas", "quilométricas"); }
public virtual void TestRandomHugeStrings() { Random random = Random(); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this); CheckRandomData(random, a, 100 * RANDOM_MULTIPLIER, 8192); }
public virtual void TestEmptyTerm() { Analyzer a = new AnalyzerAnonymousInnerClassHelper2(); CheckOneTerm(a, "", ""); }
public virtual void TestRandomStrings() { for (int i = 0; i < 10; i++) { int min = TestUtil.NextInt(Random(), 2, 10); int max = TestUtil.NextInt(Random(), min, 20); Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max); CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER, 20); CheckRandomData(Random(), a, 10 * RANDOM_MULTIPLIER, 8192); } Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this); CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false); CheckRandomData(Random(), b, 100 * RANDOM_MULTIPLIER, 8192, false, false); }
public virtual void TestMockGraphTokenFilterOnGraphInput() { for (int iter = 0; iter < 100 * RANDOM_MULTIPLIER; iter++) { if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter); } // Make new analyzer each time, because MGTF has fixed // seed: Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this); CheckAnalysisConsistency(Random(), a, false, "a/x:3 c/y:2 d e f/z:4 g h i j k"); } }
// NOTE: this is an invalid test... SynFilter today can't // properly consume a graph... we can re-enable this once // we fix that... /* // Adds MockGraphTokenFilter before SynFilter: public void testRandom2GraphBefore() throws Exception { final int numIters = atLeast(10); Random random = random(); for (int i = 0; i < numIters; i++) { b = new SynonymMap.Builder(random.nextBoolean()); final int numEntries = atLeast(10); for (int j = 0; j < numEntries; j++) { add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean()); } final SynonymMap map = b.build(); final boolean ignoreCase = random.nextBoolean(); final Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); TokenStream graph = new MockGraphTokenFilter(random(), tokenizer); return new TokenStreamComponents(tokenizer, new SynonymFilter(graph, map, ignoreCase)); } }; checkRandomData(random, analyzer, 1000*RANDOM_MULTIPLIER); } } */ // Adds MockGraphTokenFilter after SynFilter: //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandom2GraphAfter() throws Exception public virtual void testRandom2GraphAfter() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numIters = atLeast(3); int numIters = atLeast(3); Random random = random(); for (int i = 0; i < numIters; i++) { b = new SynonymMap.Builder(random.nextBoolean()); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numEntries = atLeast(10); int numEntries = atLeast(10); for (int j = 0; j < numEntries; j++) { add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean()); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final SynonymMap map = b.build(); SynonymMap map = b.build(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final boolean ignoreCase = random.nextBoolean(); bool ignoreCase = random.nextBoolean(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Analyzer analyzer = new org.apache.lucene.analysis.Analyzer() Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map, ignoreCase); checkRandomData(random, analyzer, 100); } }
public virtual void TestRandomStrings() { CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def"); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dict); CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER); //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm()); using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml")) { HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is); Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator); CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER); } }
/// <summary> /// blast some random strings through the analyzer </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomStrings() throws Exception public virtual void testRandomStrings() { for (int i = 0; i < 10; i++) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int min = org.apache.lucene.util.TestUtil.nextInt(random(), 2, 10); int min = TestUtil.Next(random(), 2, 10); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int max = org.apache.lucene.util.TestUtil.nextInt(random(), min, 20); int max = TestUtil.Next(random(), min, 20); Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max); checkRandomData(random(), a, 100 * RANDOM_MULTIPLIER, 20); checkRandomData(random(), a, 10 * RANDOM_MULTIPLIER, 8192); } Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this); checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false); checkRandomData(random(), b, 100 * RANDOM_MULTIPLIER, 8192, false, false); }
public TokenFilterAnonymousInnerClassHelper(AnalyzerAnonymousInnerClassHelper2 outerInstance, Tokenizer tokenizer) : base(tokenizer) { this.OuterInstance = outerInstance; first = true; termAtt = AddAttribute<ICharTermAttribute>(); payloadAtt = AddAttribute<IPayloadAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); }
public virtual void TestNynorskVocabulary() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this); VocabularyAssert.AssertVocabulary(analyzer, GetDataFile("nn_light.txt")); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testDoKeepOrig() throws Exception public virtual void testDoKeepOrig() { b = new SynonymMap.Builder(true); add("a b", "foo", true); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final SynonymMap map = b.build(); SynonymMap map = b.build(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Analyzer analyzer = new org.apache.lucene.analysis.Analyzer() Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map); assertAnalyzesTo(analyzer, "a b c", new string[] {"a", "foo", "b", "c"}, new int[] {0, 0, 2, 4}, new int[] {1, 3, 3, 5}, null, new int[] {1, 0, 1, 1}, new int[] {1, 2, 1, 1}, true); checkAnalysisConsistency(random(), analyzer, false, "a b c"); }
public virtual void TestPositionIncrements() { int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "NUTCH" }, false); /* analyzer that uses whitespace + wdf */ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords); /* in this case, works as expected. */ AssertAnalyzesTo(a, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 1 }); /* only in this case, posInc of 2 ?! */ AssertAnalyzesTo(a, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 1, 0, 1 }); AssertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 1, 1 }); /* analyzer that will consume tokens with large position increments */ Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords); /* increment of "largegap" is preserved */ AssertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] { "LUCENE", "largegap", "SOLR" }, new int[] { 0, 7, 16 }, new int[] { 6, 15, 20 }, new int[] { 1, 10, 1 }); /* the "/" had a position increment of 10, where did it go?!?!! */ AssertAnalyzesTo(a2, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 11 }); /* in this case, the increment of 10 from the "/" is carried over */ AssertAnalyzesTo(a2, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 11, 0, 1 }); AssertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 11, 1 }); Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords); AssertAnalyzesTo(a3, "lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 0, 0, 7 }, new int[] { 6, 11, 11 }, new int[] { 1, 0, 1 }); /* the stopword should add a gap here */ AssertAnalyzesTo(a3, "the lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 4, 4, 11 }, new int[] { 10, 15, 15 }, new int[] { 2, 0, 1 }); }
public virtual void TestEmptyTerm() { Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this); CheckOneTerm(a, "", ""); }
/// <summary> /// Test against a Nynorsk vocabulary file </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNynorskVocabulary() throws java.io.IOException public virtual void testNynorskVocabulary() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this); assertVocabulary(analyzer, new System.IO.FileStream(getDataFile("nn_light.txt"), System.IO.FileMode.Open, System.IO.FileAccess.Read)); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void checkRandomStrings(final String snowballLanguage) throws java.io.IOException //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: public virtual void checkRandomStrings(string snowballLanguage) { Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, snowballLanguage); checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER); }
public virtual void TestVersion36() { Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this); AssertAnalyzesTo(a, "this is just a t\u08E6st [email protected]", new string[] { "this", "is", "just", "a", "t", "st", "lucene", "apache.org" }); // new combining mark in 6.1 }
//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes: //ORIGINAL LINE: @Test public void testPositionIncrements() throws Exception //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: public virtual void testPositionIncrements() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet protWords = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("NUTCH")), false); CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>("NUTCH"), false); /* analyzer that uses whitespace + wdf */ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords); /* in this case, works as expected. */ assertAnalyzesTo(a, "LUCENE / SOLR", new string[] {"LUCENE", "SOLR"}, new int[] {0, 9}, new int[] {6, 13}, new int[] {1, 1}); /* only in this case, posInc of 2 ?! */ assertAnalyzesTo(a, "LUCENE / solR", new string[] {"LUCENE", "sol", "solR", "R"}, new int[] {0, 9, 9, 12}, new int[] {6, 12, 13, 13}, new int[] {1, 1, 0, 1}); assertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] {"LUCENE", "NUTCH", "SOLR"}, new int[] {0, 9, 15}, new int[] {6, 14, 19}, new int[] {1, 1, 1}); /* analyzer that will consume tokens with large position increments */ Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords); /* increment of "largegap" is preserved */ assertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] {"LUCENE", "largegap", "SOLR"}, new int[] {0, 7, 16}, new int[] {6, 15, 20}, new int[] {1, 10, 1}); /* the "/" had a position increment of 10, where did it go?!?!! */ assertAnalyzesTo(a2, "LUCENE / SOLR", new string[] {"LUCENE", "SOLR"}, new int[] {0, 9}, new int[] {6, 13}, new int[] {1, 11}); /* in this case, the increment of 10 from the "/" is carried over */ assertAnalyzesTo(a2, "LUCENE / solR", new string[] {"LUCENE", "sol", "solR", "R"}, new int[] {0, 9, 9, 12}, new int[] {6, 12, 13, 13}, new int[] {1, 11, 0, 1}); assertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] {"LUCENE", "NUTCH", "SOLR"}, new int[] {0, 9, 15}, new int[] {6, 14, 19}, new int[] {1, 11, 1}); Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords); assertAnalyzesTo(a3, "lucene.solr", new string[] {"lucene", "lucenesolr", "solr"}, new int[] {0, 0, 7}, new int[] {6, 11, 11}, new int[] {1, 0, 1}); /* the stopword should add a gap here */ assertAnalyzesTo(a3, "the lucene.solr", new string[] {"lucene", "lucenesolr", "solr"}, new int[] {4, 4, 11}, new int[] {10, 15, 15}, new int[] {2, 0, 1}); }
public virtual void TestLetterAsciiHuge() { Random random = Random(); int maxLength = 8192; // CharTokenizer.IO_BUFFER_SIZE*2 MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false); left.MaxTokenLength = 255; // match CharTokenizer's max token length Analyzer right = new AnalyzerAnonymousInnerClassHelper2(this); int numIterations = AtLeast(50); for (int i = 0; i < numIterations; i++) { string s = TestUtil.RandomSimpleString(random, maxLength); assertEquals(s, left.TokenStream("foo", newStringReader(s)), right.TokenStream("foo", newStringReader(s))); } }
public virtual void TestCommitOnCloseDiskUsage() { // MemoryCodec, since it uses FST, is not necessarily // "additive", ie if you add up N small FSTs, then merge // them, the merged result can easily be larger than the // sum because the merged FST may use array encoding for // some arcs (which uses more space): string idFormat = TestUtil.GetPostingsFormat("id"); string contentFormat = TestUtil.GetPostingsFormat("content"); AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory") || contentFormat.Equals("Memory")); MockDirectoryWrapper dir = NewMockDirectory(); Analyzer analyzer; if (Random().NextBoolean()) { // no payloads analyzer = new AnalyzerAnonymousInnerClassHelper(this); } else { // fixed length payloads int length = Random().Next(200); analyzer = new AnalyzerAnonymousInnerClassHelper2(this, length); } IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10))); for (int j = 0; j < 30; j++) { TestIndexWriter.AddDocWithIndex(writer, j); } writer.Dispose(); dir.ResetMaxUsedSizeInBytes(); dir.TrackDiskUsage = true; long startDiskUsage = dir.MaxUsedSizeInBytes; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(10).SetMergeScheduler(new SerialMergeScheduler()).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10))); for (int j = 0; j < 1470; j++) { TestIndexWriter.AddDocWithIndex(writer, j); } long midDiskUsage = dir.MaxUsedSizeInBytes; dir.ResetMaxUsedSizeInBytes(); writer.ForceMerge(1); writer.Dispose(); DirectoryReader.Open(dir).Dispose(); long endDiskUsage = dir.MaxUsedSizeInBytes; // Ending index is 50X as large as starting index; due // to 3X disk usage normally we allow 150X max // transient usage. If something is wrong w/ deleter // and it doesn't delete intermediate segments then it // will exceed this 150X: // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage); Assert.IsTrue(midDiskUsage < 150 * startDiskUsage, "writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage * 150)); Assert.IsTrue(endDiskUsage < 150 * startDiskUsage, "writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage * 150)); dir.Dispose(); }
public virtual void CheckRandomStrings(string snowballLanguage) { Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, snowballLanguage); CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER); }
public virtual void TestTokenReuse() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.Add(NewTextField("f1", "a 5 a a", Field.Store.YES)); writer.AddDocument(doc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, reader.LiveDocs, "f1", new BytesRef("a")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = termPositions.Freq(); Assert.AreEqual(3, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.IsNotNull(termPositions.Payload); Assert.AreEqual(6, termPositions.NextPosition()); Assert.IsNull(termPositions.Payload); Assert.AreEqual(7, termPositions.NextPosition()); Assert.IsNull(termPositions.Payload); reader.Dispose(); }