public virtual void TestLimitTokenCountAnalyzer() { foreach (bool consumeAll in new bool[] { true, false }) { MockAnalyzer mock = new MockAnalyzer(Random); // if we are consuming all tokens, we can use the checks, // otherwise we can't mock.EnableChecks = consumeAll; Analyzer a = new LimitTokenCountAnalyzer(mock, 2, consumeAll); // dont use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)! AssertTokenStreamContents(a.GetTokenStream("dummy", "1 2 3 4 5"), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? (int?)16 : null); AssertTokenStreamContents(a.GetTokenStream("dummy", "1 2 3 4 5"), new string[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? (int?)9 : null); // less than the limit, ensure we behave correctly AssertTokenStreamContents(a.GetTokenStream("dummy", "1 "), new string[] { "1" }, new int[] { 0 }, new int[] { 1 }, (consumeAll ? (int?)3 : null)); // equal to limit AssertTokenStreamContents(a.GetTokenStream("dummy", "1 2 "), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? (int?)6 : null); } }
public virtual void TestLimitTokenCountAnalyzer() { foreach (bool consumeAll in new bool[] { true, false }) { MockAnalyzer mock = new MockAnalyzer(Random()); // if we are consuming all tokens, we can use the checks, // otherwise we can't mock.EnableChecks = consumeAll; Analyzer a = new LimitTokenCountAnalyzer(mock, 2, consumeAll); // dont use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)! AssertTokenStreamContents(a.TokenStream("dummy", "1 2 3 4 5"), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? (int?)16 : null); AssertTokenStreamContents(a.TokenStream("dummy", "1 2 3 4 5"), new string[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? (int?)9 : null); // less than the limit, ensure we behave correctly AssertTokenStreamContents(a.TokenStream("dummy", "1 "), new string[] { "1" }, new int[] { 0 }, new int[] { 1 }, (consumeAll ? (int?)3 : null)); // equal to limit AssertTokenStreamContents(a.TokenStream("dummy", "1 2 "), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? (int?)6 : null); } }
public virtual void TestLimitTokenCountIndexWriter() { foreach (bool consumeAll in new bool[] { true, false }) { Store.Directory dir = NewDirectory(); int limit = TestUtil.NextInt(Random(), 50, 101000); MockAnalyzer mock = new MockAnalyzer(Random()); // if we are consuming all tokens, we can use the checks, // otherwise we can't mock.EnableChecks = consumeAll; Analyzer a = new LimitTokenCountAnalyzer(mock, limit, consumeAll); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, a)); Document doc = new Document(); StringBuilder b = new StringBuilder(); for (int i = 1; i < limit; i++) { b.Append(" a"); } b.Append(" x"); b.Append(" z"); doc.Add(NewTextField("field", b.ToString(), Field.Store.NO)); writer.AddDocument(doc); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); Term t = new Term("field", "x"); assertEquals(1, reader.DocFreq(t)); t = new Term("field", "z"); assertEquals(0, reader.DocFreq(t)); reader.Dispose(); dir.Dispose(); } }
public virtual void TestLimitTokenCountIndexWriter() { foreach (bool consumeAll in new bool[] { true, false }) { Store.Directory dir = NewDirectory(); int limit = TestUtil.NextInt32(Random, 50, 101000); MockAnalyzer mock = new MockAnalyzer(Random); // if we are consuming all tokens, we can use the checks, // otherwise we can't mock.EnableChecks = consumeAll; Analyzer a = new LimitTokenCountAnalyzer(mock, limit, consumeAll); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, a)); Document doc = new Document(); StringBuilder b = new StringBuilder(); for (int i = 1; i < limit; i++) { b.Append(" a"); } b.Append(" x"); b.Append(" z"); doc.Add(NewTextField("field", b.ToString(), Field.Store.NO)); writer.AddDocument(doc); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); Term t = new Term("field", "x"); assertEquals(1, reader.DocFreq(t)); t = new Term("field", "z"); assertEquals(0, reader.DocFreq(t)); reader.Dispose(); dir.Dispose(); } }