Esempio n. 1
0
        public virtual void TestReuseDocsEnumSameBitsOrNull()
        {
            Directory         dir    = NewDirectory();
            Codec             cp     = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp));
            int numdocs = AtLeast(20);

            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader open = DirectoryReader.Open(dir);

            foreach (AtomicReaderContext ctx in open.Leaves)
            {
                Terms     terms    = ((AtomicReader)ctx.Reader).GetTerms("body");
                TermsEnum iterator = terms.GetIterator(null);
                IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(open.MaxDoc);
                DocsEnum    docs = null;
                while ((iterator.Next()) != null)
                {
                    docs        = iterator.Docs(bits, docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }

                Assert.AreEqual(1, enums.Count);
                enums.Clear();
                iterator = terms.GetIterator(null);
                docs     = null;
                while ((iterator.Next()) != null)
                {
                    docs        = iterator.Docs(new MatchNoBits(open.MaxDoc), docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Count, enums.Count);

                enums.Clear();
                iterator = terms.GetIterator(null);
                docs     = null;
                while ((iterator.Next()) != null)
                {
                    docs        = iterator.Docs(null, docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(1, enums.Count);
            }
            IOUtils.Close(writer, open, dir);
        }
Esempio n. 2
0
        public virtual void TestReuseDocsEnumDifferentReader()
        {
            Directory    dir      = NewDirectory();
            Codec        cp       = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat());
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp));
            int numdocs = AtLeast(20);

            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader             firstReader  = DirectoryReader.Open(dir);
            DirectoryReader             secondReader = DirectoryReader.Open(dir);
            IList <AtomicReaderContext> leaves       = firstReader.Leaves;
            IList <AtomicReaderContext> leaves2      = secondReader.Leaves;

            foreach (AtomicReaderContext ctx in leaves)
            {
                Terms     terms    = ((AtomicReader)ctx.Reader).GetTerms("body");
                TermsEnum iterator = terms.GetIterator(null);
                IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc);
                iterator = terms.GetIterator(null);
                DocsEnum docs = null;
                BytesRef term = null;
                while ((term = iterator.Next()) != null)
                {
                    docs        = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Count, enums.Count);

                iterator = terms.GetIterator(null);
                enums.Clear();
                docs = null;
                while ((term = iterator.Next()) != null)
                {
                    docs        = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Count, enums.Count);
            }
            IOUtils.Close(writer, firstReader, secondReader, dir);
        }
Esempio n. 3
0
        /// <summary>
        /// tests reuse with Pulsing1(Pulsing2(Standard)) </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testNestedPulsing() throws Exception
        public virtual void testNestedPulsing()
        {
            // we always run this test with pulsing codec.
            Codec cp = TestUtil.alwaysPostingsFormat(new NestedPulsingPostingsFormat());
            BaseDirectoryWrapper dir = newDirectory();
            RandomIndexWriter    iw  = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
            Document             doc = new Document();

            doc.add(new TextField("foo", "a b b c c c d e f g g g h i i j j k l l m m m", Field.Store.NO));
            // note: the reuse is imperfect, here we would have 4 enums (lost reuse when we get an enum for 'm')
            // this is because we only track the 'last' enum we reused (not all).
            // but this seems 'good enough' for now.
            iw.addDocument(doc);
            DirectoryReader ir = iw.Reader;

            iw.close();

            AtomicReader segment = getOnlySegmentReader(ir);
            DocsEnum     reuse   = null;
            IDictionary <DocsEnum, bool?> allEnums = new IdentityHashMap <DocsEnum, bool?>();
            TermsEnum te = segment.terms("foo").iterator(null);

            while (te.next() != null)
            {
                reuse           = te.docs(null, reuse, DocsEnum.FLAG_NONE);
                allEnums[reuse] = true;
            }

            assertEquals(4, allEnums.Count);

            allEnums.Clear();
            DocsAndPositionsEnum posReuse = null;

            te = segment.terms("foo").iterator(null);
            while (te.next() != null)
            {
                posReuse           = te.docsAndPositions(null, posReuse);
                allEnums[posReuse] = true;
            }

            assertEquals(4, allEnums.Count);

            ir.close();
            dir.close();
        }
Esempio n. 4
0
        // TODO: this is a basic test. this thing is complicated, add more
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testSophisticatedReuse() throws Exception
        public virtual void testSophisticatedReuse()
        {
            // we always run this test with pulsing codec.
            Codec             cp  = TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1));
            Directory         dir = newDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
            Document          doc = new Document();

            doc.add(new TextField("foo", "a b b c c c d e f g g h i i j j k", Field.Store.NO));
            iw.addDocument(doc);
            DirectoryReader ir = iw.Reader;

            iw.close();

            AtomicReader segment = getOnlySegmentReader(ir);
            DocsEnum     reuse   = null;
            IDictionary <DocsEnum, bool?> allEnums = new IdentityHashMap <DocsEnum, bool?>();
            TermsEnum te = segment.terms("foo").iterator(null);

            while (te.next() != null)
            {
                reuse           = te.docs(null, reuse, DocsEnum.FLAG_NONE);
                allEnums[reuse] = true;
            }

            assertEquals(2, allEnums.Count);

            allEnums.Clear();
            DocsAndPositionsEnum posReuse = null;

            te = segment.terms("foo").iterator(null);
            while (te.next() != null)
            {
                posReuse           = te.docsAndPositions(null, posReuse);
                allEnums[posReuse] = true;
            }

            assertEquals(2, allEnums.Count);

            ir.close();
            dir.close();
        }
        public virtual void TestSophisticatedReuse()
        {
            // we always run this test with pulsing codec.
            Codec cp = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1));
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp));
            Document doc = new Document();
            doc.Add(new TextField("foo", "a b b c c c d e f g g h i i j j k", Field.Store.NO));
            iw.AddDocument(doc);
            DirectoryReader ir = iw.Reader;
            iw.Dispose();

            AtomicReader segment = GetOnlySegmentReader(ir);
            DocsEnum reuse = null;
            IDictionary<DocsEnum, bool?> allEnums = new IdentityHashMap<DocsEnum, bool?>();
            TermsEnum te = segment.Terms("foo").Iterator(null);
            while (te.Next() != null)
            {
                reuse = te.Docs(null, reuse, DocsEnum.FLAG_NONE);
                allEnums[reuse] = true;
            }

            assertEquals(2, allEnums.Count);

            allEnums.Clear();
            DocsAndPositionsEnum posReuse = null;
            te = segment.Terms("foo").Iterator(null);
            while (te.Next() != null)
            {
                posReuse = te.DocsAndPositions(null, posReuse);
                allEnums[posReuse] = true;
            }

            assertEquals(2, allEnums.Count);

            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestNestedPulsing()
        {
            // we always run this test with pulsing codec.
            Codec cp = TestUtil.AlwaysPostingsFormat(new NestedPulsingPostingsFormat());
            BaseDirectoryWrapper dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp));
            Document doc = new Document();
            doc.Add(new TextField("foo", "a b b c c c d e f g g g h i i j j k l l m m m", Field.Store.NO));
            // note: the reuse is imperfect, here we would have 4 enums (lost reuse when we get an enum for 'm')
            // this is because we only track the 'last' enum we reused (not all).
            // but this seems 'good enough' for now.
            iw.AddDocument(doc);
            DirectoryReader ir = iw.Reader;
            iw.Dispose();

            AtomicReader segment = GetOnlySegmentReader(ir);
            DocsEnum reuse = null;
            IDictionary<DocsEnum, bool?> allEnums = new IdentityHashMap<DocsEnum, bool?>();
            TermsEnum te = segment.Terms("foo").Iterator(null);
            while (te.Next() != null)
            {
                reuse = te.Docs(null, reuse, DocsEnum.FLAG_NONE);
                allEnums[reuse] = true;
            }

            assertEquals(4, allEnums.Count);

            allEnums.Clear();
            DocsAndPositionsEnum posReuse = null;
            te = segment.Terms("foo").Iterator(null);
            while (te.Next() != null)
            {
                posReuse = te.DocsAndPositions(null, posReuse);
                allEnums[posReuse] = true;
            }

            assertEquals(4, allEnums.Count);

            ir.Dispose();
            dir.Dispose();
        }
Esempio n. 7
0
        /*
         * Non-recursive version of object descend. this consumes more memory than recursive in-depth
         * traversal but prevents stack overflows on long chains of objects
         * or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain
         * so not too much).
         */

        private static long MeasureObjectSize(object root)
        {
            // Objects seen so far.
            IdentityHashSet <object> seen = new IdentityHashSet <object>();
            // Class cache with reference Field and precalculated shallow size.
            HashMap <Type, ClassCache> classCache = new IdentityHashMap <Type, ClassCache>();
            // Stack of objects pending traversal. Recursion caused stack overflows.
            Stack <object> stack = new Stack <object>();

            stack.Push(root);

            long totalSize = 0;

            while (stack.Count > 0)
            {
                object ob = stack.Pop();

                if (ob == null || seen.Contains(ob))
                {
                    continue;
                }
                seen.Add(ob);

                Type obClazz = ob.GetType();

                if (obClazz.Equals(typeof(string)))
                {
                    // LUCENENET specific - we can get a closer estimate of a string
                    // by using simple math. Reference: http://stackoverflow.com/a/8171099.
                    // This fixes the TestSanity test.
                    totalSize += (2 * (((string)ob).Length + 1));
                }
                if (obClazz.IsArray)
                {
                    /*
                     * Consider an array, possibly of primitive types. Push any of its references to
                     * the processing stack and accumulate this array's shallow size.
                     */
                    long  size  = NUM_BYTES_ARRAY_HEADER;
                    Array array = (Array)ob;
                    int   len   = array.Length;
                    if (len > 0)
                    {
                        Type componentClazz = obClazz.GetElementType();
                        if (componentClazz.GetTypeInfo().IsPrimitive)
                        {
                            size += (long)len * primitiveSizes[componentClazz];
                        }
                        else
                        {
                            size += (long)NUM_BYTES_OBJECT_REF * len;

                            // Push refs for traversal later.
                            for (int i = len; --i >= 0;)
                            {
                                object o = array.GetValue(i);
                                if (o != null && !seen.Contains(o))
                                {
                                    stack.Push(o);
                                }
                            }
                        }
                    }
                    totalSize += AlignObjectSize(size);
                }
                else
                {
                    /*
                     * Consider an object. Push any references it has to the processing stack
                     * and accumulate this object's shallow size.
                     */
                    try
                    {
                        if (!classCache.TryGetValue(obClazz, out ClassCache cachedInfo) || cachedInfo == null)
                        {
                            classCache[obClazz] = cachedInfo = CreateCacheEntry(obClazz);
                        }

                        foreach (FieldInfo f in cachedInfo.ReferenceFields)
                        {
                            // Fast path to eliminate redundancies.
                            object o = f.GetValue(ob);
                            if (o != null && !seen.Contains(o))
                            {
                                stack.Push(o);
                            }
                        }

                        totalSize += cachedInfo.AlignedShallowInstanceSize;
                    }
                    catch (Exception e)
                    {
                        // this should never happen as we enabled setAccessible().
                        throw new Exception("Reflective field access failed?", e);
                    }
                }
            }

            // Help the GC (?).
            seen.Clear();
            stack.Clear();
            classCache.Clear();

            return(totalSize);
        }
        public virtual void TestReuseDocsEnumSameBitsOrNull()
        {
            Directory dir = NewDirectory();
            Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp));
            int numdocs = AtLeast(20);
            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader open = DirectoryReader.Open(dir);
            foreach (AtomicReaderContext ctx in open.Leaves)
            {
                Terms terms = ((AtomicReader)ctx.Reader).Terms("body");
                TermsEnum iterator = terms.Iterator(null);
                IdentityHashMap<DocsEnum, bool?> enums = new IdentityHashMap<DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(open.MaxDoc);
                DocsEnum docs = null;
                while ((iterator.Next()) != null)
                {
                    docs = iterator.Docs(bits, docs, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }

                Assert.AreEqual(1, enums.Count);
                enums.Clear();
                iterator = terms.Iterator(null);
                docs = null;
                while ((iterator.Next()) != null)
                {
                    docs = iterator.Docs(new MatchNoBits(open.MaxDoc), docs, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Size(), enums.Count);

                enums.Clear();
                iterator = terms.Iterator(null);
                docs = null;
                while ((iterator.Next()) != null)
                {
                    docs = iterator.Docs(null, docs, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(1, enums.Count);
            }
            IOUtils.Close(writer, open, dir);
        }
        public virtual void TestReuseDocsEnumDifferentReader()
        {
            Directory dir = NewDirectory();
            Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat());
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp));
            int numdocs = AtLeast(20);
            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader firstReader = DirectoryReader.Open(dir);
            DirectoryReader secondReader = DirectoryReader.Open(dir);
            IList<AtomicReaderContext> leaves = firstReader.Leaves;
            IList<AtomicReaderContext> leaves2 = secondReader.Leaves;

            foreach (AtomicReaderContext ctx in leaves)
            {
                Terms terms = ((AtomicReader)ctx.Reader).Terms("body");
                TermsEnum iterator = terms.Iterator(null);
                IdentityHashMap<DocsEnum, bool?> enums = new IdentityHashMap<DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc);
                iterator = terms.Iterator(null);
                DocsEnum docs = null;
                BytesRef term = null;
                while ((term = iterator.Next()) != null)
                {
                    docs = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Size(), enums.Count);

                iterator = terms.Iterator(null);
                enums.Clear();
                docs = null;
                while ((term = iterator.Next()) != null)
                {
                    docs = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Size(), enums.Count);
            }
            IOUtils.Close(writer, firstReader, secondReader, dir);
        }
 /// <summary>Removes all of the elements from this set.</summary>
 public override void Clear()
 {
     map.Clear();
 }