public virtual void TestReuseDocsEnumSameBitsOrNull() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader open = DirectoryReader.Open(dir); foreach (AtomicReaderContext ctx in open.Leaves) { Terms terms = ((AtomicReader)ctx.Reader).GetTerms("body"); TermsEnum iterator = terms.GetIterator(null); IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(open.MaxDoc); DocsEnum docs = null; while ((iterator.Next()) != null) { docs = iterator.Docs(bits, docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(1, enums.Count); enums.Clear(); iterator = terms.GetIterator(null); docs = null; while ((iterator.Next()) != null) { docs = iterator.Docs(new MatchNoBits(open.MaxDoc), docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(terms.Count, enums.Count); enums.Clear(); iterator = terms.GetIterator(null); docs = null; while ((iterator.Next()) != null) { docs = iterator.Docs(null, docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(1, enums.Count); } IOUtils.Close(writer, open, dir); }
public virtual void TestReuseDocsEnumDifferentReader() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader firstReader = DirectoryReader.Open(dir); DirectoryReader secondReader = DirectoryReader.Open(dir); IList <AtomicReaderContext> leaves = firstReader.Leaves; IList <AtomicReaderContext> leaves2 = secondReader.Leaves; foreach (AtomicReaderContext ctx in leaves) { Terms terms = ((AtomicReader)ctx.Reader).GetTerms("body"); TermsEnum iterator = terms.GetIterator(null); IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc); iterator = terms.GetIterator(null); DocsEnum docs = null; BytesRef term = null; while ((term = iterator.Next()) != null) { docs = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(terms.Count, enums.Count); iterator = terms.GetIterator(null); enums.Clear(); docs = null; while ((term = iterator.Next()) != null) { docs = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(terms.Count, enums.Count); } IOUtils.Close(writer, firstReader, secondReader, dir); }
/// <summary> /// tests reuse with Pulsing1(Pulsing2(Standard)) </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNestedPulsing() throws Exception public virtual void testNestedPulsing() { // we always run this test with pulsing codec. Codec cp = TestUtil.alwaysPostingsFormat(new NestedPulsingPostingsFormat()); BaseDirectoryWrapper dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp)); Document doc = new Document(); doc.add(new TextField("foo", "a b b c c c d e f g g g h i i j j k l l m m m", Field.Store.NO)); // note: the reuse is imperfect, here we would have 4 enums (lost reuse when we get an enum for 'm') // this is because we only track the 'last' enum we reused (not all). // but this seems 'good enough' for now. iw.addDocument(doc); DirectoryReader ir = iw.Reader; iw.close(); AtomicReader segment = getOnlySegmentReader(ir); DocsEnum reuse = null; IDictionary <DocsEnum, bool?> allEnums = new IdentityHashMap <DocsEnum, bool?>(); TermsEnum te = segment.terms("foo").iterator(null); while (te.next() != null) { reuse = te.docs(null, reuse, DocsEnum.FLAG_NONE); allEnums[reuse] = true; } assertEquals(4, allEnums.Count); allEnums.Clear(); DocsAndPositionsEnum posReuse = null; te = segment.terms("foo").iterator(null); while (te.next() != null) { posReuse = te.docsAndPositions(null, posReuse); allEnums[posReuse] = true; } assertEquals(4, allEnums.Count); ir.close(); dir.close(); }
// TODO: this is a basic test. this thing is complicated, add more //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testSophisticatedReuse() throws Exception public virtual void testSophisticatedReuse() { // we always run this test with pulsing codec. Codec cp = TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1)); Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp)); Document doc = new Document(); doc.add(new TextField("foo", "a b b c c c d e f g g h i i j j k", Field.Store.NO)); iw.addDocument(doc); DirectoryReader ir = iw.Reader; iw.close(); AtomicReader segment = getOnlySegmentReader(ir); DocsEnum reuse = null; IDictionary <DocsEnum, bool?> allEnums = new IdentityHashMap <DocsEnum, bool?>(); TermsEnum te = segment.terms("foo").iterator(null); while (te.next() != null) { reuse = te.docs(null, reuse, DocsEnum.FLAG_NONE); allEnums[reuse] = true; } assertEquals(2, allEnums.Count); allEnums.Clear(); DocsAndPositionsEnum posReuse = null; te = segment.terms("foo").iterator(null); while (te.next() != null) { posReuse = te.docsAndPositions(null, posReuse); allEnums[posReuse] = true; } assertEquals(2, allEnums.Count); ir.close(); dir.close(); }
public virtual void TestSophisticatedReuse() { // we always run this test with pulsing codec. Codec cp = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1)); Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp)); Document doc = new Document(); doc.Add(new TextField("foo", "a b b c c c d e f g g h i i j j k", Field.Store.NO)); iw.AddDocument(doc); DirectoryReader ir = iw.Reader; iw.Dispose(); AtomicReader segment = GetOnlySegmentReader(ir); DocsEnum reuse = null; IDictionary<DocsEnum, bool?> allEnums = new IdentityHashMap<DocsEnum, bool?>(); TermsEnum te = segment.Terms("foo").Iterator(null); while (te.Next() != null) { reuse = te.Docs(null, reuse, DocsEnum.FLAG_NONE); allEnums[reuse] = true; } assertEquals(2, allEnums.Count); allEnums.Clear(); DocsAndPositionsEnum posReuse = null; te = segment.Terms("foo").Iterator(null); while (te.Next() != null) { posReuse = te.DocsAndPositions(null, posReuse); allEnums[posReuse] = true; } assertEquals(2, allEnums.Count); ir.Dispose(); dir.Dispose(); }
public virtual void TestNestedPulsing() { // we always run this test with pulsing codec. Codec cp = TestUtil.AlwaysPostingsFormat(new NestedPulsingPostingsFormat()); BaseDirectoryWrapper dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp)); Document doc = new Document(); doc.Add(new TextField("foo", "a b b c c c d e f g g g h i i j j k l l m m m", Field.Store.NO)); // note: the reuse is imperfect, here we would have 4 enums (lost reuse when we get an enum for 'm') // this is because we only track the 'last' enum we reused (not all). // but this seems 'good enough' for now. iw.AddDocument(doc); DirectoryReader ir = iw.Reader; iw.Dispose(); AtomicReader segment = GetOnlySegmentReader(ir); DocsEnum reuse = null; IDictionary<DocsEnum, bool?> allEnums = new IdentityHashMap<DocsEnum, bool?>(); TermsEnum te = segment.Terms("foo").Iterator(null); while (te.Next() != null) { reuse = te.Docs(null, reuse, DocsEnum.FLAG_NONE); allEnums[reuse] = true; } assertEquals(4, allEnums.Count); allEnums.Clear(); DocsAndPositionsEnum posReuse = null; te = segment.Terms("foo").Iterator(null); while (te.Next() != null) { posReuse = te.DocsAndPositions(null, posReuse); allEnums[posReuse] = true; } assertEquals(4, allEnums.Count); ir.Dispose(); dir.Dispose(); }
/* * Non-recursive version of object descend. this consumes more memory than recursive in-depth * traversal but prevents stack overflows on long chains of objects * or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain * so not too much). */ private static long MeasureObjectSize(object root) { // Objects seen so far. IdentityHashSet <object> seen = new IdentityHashSet <object>(); // Class cache with reference Field and precalculated shallow size. HashMap <Type, ClassCache> classCache = new IdentityHashMap <Type, ClassCache>(); // Stack of objects pending traversal. Recursion caused stack overflows. Stack <object> stack = new Stack <object>(); stack.Push(root); long totalSize = 0; while (stack.Count > 0) { object ob = stack.Pop(); if (ob == null || seen.Contains(ob)) { continue; } seen.Add(ob); Type obClazz = ob.GetType(); if (obClazz.Equals(typeof(string))) { // LUCENENET specific - we can get a closer estimate of a string // by using simple math. Reference: http://stackoverflow.com/a/8171099. // This fixes the TestSanity test. totalSize += (2 * (((string)ob).Length + 1)); } if (obClazz.IsArray) { /* * Consider an array, possibly of primitive types. Push any of its references to * the processing stack and accumulate this array's shallow size. */ long size = NUM_BYTES_ARRAY_HEADER; Array array = (Array)ob; int len = array.Length; if (len > 0) { Type componentClazz = obClazz.GetElementType(); if (componentClazz.GetTypeInfo().IsPrimitive) { size += (long)len * primitiveSizes[componentClazz]; } else { size += (long)NUM_BYTES_OBJECT_REF * len; // Push refs for traversal later. for (int i = len; --i >= 0;) { object o = array.GetValue(i); if (o != null && !seen.Contains(o)) { stack.Push(o); } } } } totalSize += AlignObjectSize(size); } else { /* * Consider an object. Push any references it has to the processing stack * and accumulate this object's shallow size. */ try { if (!classCache.TryGetValue(obClazz, out ClassCache cachedInfo) || cachedInfo == null) { classCache[obClazz] = cachedInfo = CreateCacheEntry(obClazz); } foreach (FieldInfo f in cachedInfo.ReferenceFields) { // Fast path to eliminate redundancies. object o = f.GetValue(ob); if (o != null && !seen.Contains(o)) { stack.Push(o); } } totalSize += cachedInfo.AlignedShallowInstanceSize; } catch (Exception e) { // this should never happen as we enabled setAccessible(). throw new Exception("Reflective field access failed?", e); } } } // Help the GC (?). seen.Clear(); stack.Clear(); classCache.Clear(); return(totalSize); }
public virtual void TestReuseDocsEnumSameBitsOrNull() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader open = DirectoryReader.Open(dir); foreach (AtomicReaderContext ctx in open.Leaves) { Terms terms = ((AtomicReader)ctx.Reader).Terms("body"); TermsEnum iterator = terms.Iterator(null); IdentityHashMap<DocsEnum, bool?> enums = new IdentityHashMap<DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(open.MaxDoc); DocsEnum docs = null; while ((iterator.Next()) != null) { docs = iterator.Docs(bits, docs, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(1, enums.Count); enums.Clear(); iterator = terms.Iterator(null); docs = null; while ((iterator.Next()) != null) { docs = iterator.Docs(new MatchNoBits(open.MaxDoc), docs, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); enums.Clear(); iterator = terms.Iterator(null); docs = null; while ((iterator.Next()) != null) { docs = iterator.Docs(null, docs, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(1, enums.Count); } IOUtils.Close(writer, open, dir); }
public virtual void TestReuseDocsEnumDifferentReader() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader firstReader = DirectoryReader.Open(dir); DirectoryReader secondReader = DirectoryReader.Open(dir); IList<AtomicReaderContext> leaves = firstReader.Leaves; IList<AtomicReaderContext> leaves2 = secondReader.Leaves; foreach (AtomicReaderContext ctx in leaves) { Terms terms = ((AtomicReader)ctx.Reader).Terms("body"); TermsEnum iterator = terms.Iterator(null); IdentityHashMap<DocsEnum, bool?> enums = new IdentityHashMap<DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc); iterator = terms.Iterator(null); DocsEnum docs = null; BytesRef term = null; while ((term = iterator.Next()) != null) { docs = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); iterator = terms.Iterator(null); enums.Clear(); docs = null; while ((term = iterator.Next()) != null) { docs = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); } IOUtils.Close(writer, firstReader, secondReader, dir); }
/// <summary>Removes all of the elements from this set.</summary> public override void Clear() { map.Clear(); }