public void RestDocsAndPositionsEnumStart() { Analyzer analyzer = new MockAnalyzer(Random); int numIters = AtLeast(3); MemoryIndex memory = new MemoryIndex(true, Random.nextInt(50) * 1024 * 1024); for (int i = 0; i < numIters; i++) { // check reuse memory.AddField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader; assertEquals(1, reader.GetTerms("foo").SumTotalTermFreq); DocsAndPositionsEnum disi = reader.GetTermPositionsEnum(new Term("foo", "bar")); int docid = disi.DocID; assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(0, disi.NextPosition()); assertEquals(0, disi.StartOffset); assertEquals(3, disi.EndOffset); // now reuse and check again TermsEnum te = reader.GetTerms("foo").GetEnumerator(); assertTrue(te.SeekExact(new BytesRef("bar"))); disi = te.DocsAndPositions(null, disi); docid = disi.DocID; assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.Dispose(); memory.Reset(); } }
/** * Build a randomish document for both RAMDirectory and MemoryIndex, * and run all the queries against it. */ public void AssertAgainstRAMDirectory(MemoryIndex memory) { memory.Reset(); StringBuilder fooField = new StringBuilder(); StringBuilder termField = new StringBuilder(); // add up to 250 terms to field "foo" int numFooTerms = Random.nextInt(250 * RandomMultiplier); for (int i = 0; i < numFooTerms; i++) { fooField.append(" "); fooField.append(RandomTerm()); } // add up to 250 terms to field "term" int numTermTerms = Random.nextInt(250 * RandomMultiplier); for (int i = 0; i < numTermTerms; i++) { termField.append(" "); termField.append(RandomTerm()); } Store.Directory ramdir = new RAMDirectory(); Analyzer analyzer = RandomAnalyzer(); IndexWriter writer = new IndexWriter(ramdir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat()))); Document doc = new Document(); Field field1 = NewTextField("foo", fooField.toString(), Field.Store.NO); Field field2 = NewTextField("term", termField.toString(), Field.Store.NO); doc.Add(field1); doc.Add(field2); writer.AddDocument(doc); writer.Dispose(); memory.AddField("foo", fooField.toString(), analyzer); memory.AddField("term", termField.toString(), analyzer); if (Verbose) { Console.WriteLine("Random MemoryIndex:\n" + memory.toString()); Console.WriteLine("Same index as RAMDirectory: " + RamUsageEstimator.HumanReadableUnits(RamUsageEstimator.SizeOf(ramdir))); Console.WriteLine(); } else { assertTrue(memory.GetMemorySize() > 0L); } AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader; DirectoryReader competitor = DirectoryReader.Open(ramdir); DuellReaders(competitor, reader); IOUtils.Dispose(reader, competitor); AssertAllQueries(memory, ramdir, analyzer); ramdir.Dispose(); }
public void TestEmptyString() { MemoryIndex memory = new MemoryIndex(); memory.AddField("foo", new CannedTokenStream(new Analysis.Token("", 0, 5))); IndexSearcher searcher = memory.CreateSearcher(); TopDocs docs = searcher.Search(new TermQuery(new Term("foo", "")), 10); assertEquals(1, docs.TotalHits); }
public void TestNonExistingsField() { MemoryIndex mindex = new MemoryIndex(Random.nextBoolean(), Random.nextInt(50) * 1024 * 1024); MockAnalyzer mockAnalyzer = new MockAnalyzer(Random); mindex.AddField("field", "the quick brown fox", mockAnalyzer); AtomicReader reader = (AtomicReader)mindex.CreateSearcher().IndexReader; assertNull(reader.GetNumericDocValues("not-in-index")); assertNull(reader.GetNormValues("not-in-index")); assertNull(reader.GetTermDocsEnum(new Term("not-in-index", "foo"))); assertNull(reader.GetTermPositionsEnum(new Term("not-in-index", "foo"))); assertNull(reader.GetTerms("not-in-index")); }
/** * Run all queries against both the RAMDirectory and MemoryIndex, ensuring they are the same. */ public void AssertAllQueries(MemoryIndex memory, Store.Directory ramdir, Analyzer analyzer) { IndexReader reader = DirectoryReader.Open(ramdir); IndexSearcher ram = NewSearcher(reader); IndexSearcher mem = memory.CreateSearcher(); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "foo", analyzer); foreach (string query in queries) { TopDocs ramDocs = ram.Search(qp.Parse(query), 1); TopDocs memDocs = mem.Search(qp.Parse(query), 1); assertEquals(query, ramDocs.TotalHits, memDocs.TotalHits); } reader.Dispose(); }
public void TestDuelMemoryIndexCoreDirectoryWithArrayField() { string field_name = "text"; MockAnalyzer mockAnalyzer = new MockAnalyzer(Random); if (Random.nextBoolean()) { mockAnalyzer.SetOffsetGap(Random.nextInt(100)); } //index into a random directory FieldType type = new FieldType(TextField.TYPE_STORED); type.StoreTermVectorOffsets = (true); type.StoreTermVectorPayloads = (false); type.StoreTermVectorPositions = (true); type.StoreTermVectors = (true); type.Freeze(); Document doc = new Document(); doc.Add(new Field(field_name, "la la", type)); doc.Add(new Field(field_name, "foo bar foo bar foo", type)); Store.Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, mockAnalyzer)); writer.UpdateDocument(new Term("id", "1"), doc); writer.Commit(); writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); //Index document in Memory index MemoryIndex memIndex = new MemoryIndex(true); memIndex.AddField(field_name, "la la", mockAnalyzer); memIndex.AddField(field_name, "foo bar foo bar foo", mockAnalyzer); //compare term vectors Terms ramTv = reader.GetTermVector(0, field_name); IndexReader memIndexReader = memIndex.CreateSearcher().IndexReader; Terms memTv = memIndexReader.GetTermVector(0, field_name); CompareTermVectors(ramTv, memTv, field_name); memIndexReader.Dispose(); reader.Dispose(); dir.Dispose(); }
public void TestDuellMemIndex() { LineFileDocs lineFileDocs = new LineFileDocs(Random); int numDocs = AtLeast(10); MemoryIndex memory = new MemoryIndex(Random.nextBoolean(), Random.nextInt(50) * 1024 * 1024); for (int i = 0; i < numDocs; i++) { Store.Directory dir = NewDirectory(); MockAnalyzer mockAnalyzer = new MockAnalyzer(Random); mockAnalyzer.MaxTokenLength = (TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, mockAnalyzer)); Document nextDoc = lineFileDocs.NextDoc(); Document doc = new Document(); foreach (IIndexableField field in nextDoc.Fields) { if (field.IndexableFieldType.IsIndexed) { doc.Add(field); if (Random.nextInt(3) == 0) { doc.Add(field); // randomly add the same field twice } } } writer.AddDocument(doc); writer.Dispose(); foreach (IIndexableField field in doc.Fields) { memory.AddField(field.Name, ((Field)field).GetStringValue(), mockAnalyzer); } DirectoryReader competitor = DirectoryReader.Open(dir); AtomicReader memIndexReader = (AtomicReader)memory.CreateSearcher().IndexReader; DuellReaders(competitor, memIndexReader); IOUtils.Dispose(competitor, memIndexReader); memory.Reset(); dir.Dispose(); } lineFileDocs.Dispose(); }
/** * Run all queries against both the RAMDirectory and MemoryIndex, ensuring they are the same. */ public void AssertAllQueries(MemoryIndex memory, Store.Directory ramdir, Analyzer analyzer) { IndexReader reader = DirectoryReader.Open(ramdir); IndexSearcher ram = NewSearcher(reader); IndexSearcher mem = memory.CreateSearcher(); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "foo", analyzer) { // LUCENENET specific - to avoid random failures, set the culture // of the QueryParser to invariant Locale = CultureInfo.InvariantCulture }; foreach (string query in queries) { TopDocs ramDocs = ram.Search(qp.Parse(query), 1); TopDocs memDocs = mem.Search(qp.Parse(query), 1); assertEquals(query, ramDocs.TotalHits, memDocs.TotalHits); } reader.Dispose(); }
public void TestSameFieldAddedMultipleTimes() { MemoryIndex mindex = new MemoryIndex(Random.nextBoolean(), Random.nextInt(50) * 1024 * 1024); MockAnalyzer mockAnalyzer = new MockAnalyzer(Random); mindex.AddField("field", "the quick brown fox", mockAnalyzer); mindex.AddField("field", "jumps over the", mockAnalyzer); AtomicReader reader = (AtomicReader)mindex.CreateSearcher().IndexReader; assertEquals(7, reader.GetTerms("field").SumTotalTermFreq); PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "fox")); query.Add(new Term("field", "jumps")); assertTrue(mindex.Search(query) > 0.1); mindex.Reset(); mockAnalyzer.SetPositionIncrementGap(1 + Random.nextInt(10)); mindex.AddField("field", "the quick brown fox", mockAnalyzer); mindex.AddField("field", "jumps over the", mockAnalyzer); assertEquals(0, mindex.Search(query), 0.00001f); query.Slop = (10); assertTrue("posGap" + mockAnalyzer.GetPositionIncrementGap("field"), mindex.Search(query) > 0.0001); }
public void TestDocsEnumStart() { Analyzer analyzer = new MockAnalyzer(Random); MemoryIndex memory = new MemoryIndex(Random.nextBoolean(), Random.nextInt(50) * 1024 * 1024); memory.AddField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader; DocsEnum disi = TestUtil.Docs(Random, reader, "foo", new BytesRef("bar"), null, null, DocsFlags.NONE); int docid = disi.DocID; assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = reader.GetTerms("foo").GetEnumerator(); assertTrue(te.SeekExact(new BytesRef("bar"))); disi = te.Docs(null, disi, DocsFlags.NONE); docid = disi.DocID; assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.Dispose(); }