コード例 #1
0
ファイル: TermsFilterTest.cs プロジェクト: mundher/lucene.net
        public void MissingTerms_Test()
        {
            string       fieldName = "field1";
            RAMDirectory rd        = new RAMDirectory();
            IndexWriter  w         = new IndexWriter(rd, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

            for (int i = 0; i < 100; i++)
            {
                Document doc  = new Document();
                int      term = i * 10; //terms are units of 10;
                doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.NOT_ANALYZED));
                w.AddDocument(doc);
            }
            w.Close();
            IndexReader reader = IndexReader.Open(rd, true);

            TermsFilter tf = new TermsFilter();

            tf.AddTerm(new Term(fieldName, "19"));
            OpenBitSet bits = (OpenBitSet)tf.GetDocIdSet(reader);

            Assert.AreEqual(0, bits.Cardinality(), "Must match nothing");

            tf.AddTerm(new Term(fieldName, "20"));
            bits = (OpenBitSet)tf.GetDocIdSet(reader);
            Assert.AreEqual(1, bits.Cardinality(), "Must match 1");

            tf.AddTerm(new Term(fieldName, "10"));
            bits = (OpenBitSet)tf.GetDocIdSet(reader);
            Assert.AreEqual(2, bits.Cardinality(), "Must match 2");

            tf.AddTerm(new Term(fieldName, "00"));
            bits = (OpenBitSet)tf.GetDocIdSet(reader);
            Assert.AreEqual(2, bits.Cardinality(), "Must match 2");
        }
コード例 #2
0
        public override RandomAccessDocIdSet GetRandomAccessDocIdSet(BoboSegmentReader reader)
        {
            MultiValueFacetDataCache dataCache = m_facetHandler.GetFacetData <MultiValueFacetDataCache>(reader);

            int[] index = m_valueConverter.Convert(dataCache, m_vals);
            //BigNestedIntArray nestedArray = dataCache.NestedArray;
            OpenBitSet bitset = new OpenBitSet(dataCache.ValArray.Count);

            foreach (int i in index)
            {
                bitset.FastSet(i);
            }

            if (m_takeCompliment)
            {
                // flip the bits
                int size = dataCache.ValArray.Count;
                for (int i = 0; i < size; ++i)
                {
                    bitset.FastFlip(i);
                }
            }

            long count = bitset.Cardinality();

            if (count == 0)
            {
                return(new EmptyRandomAccessDocIdSet());
            }
            else
            {
                return(new MultiRandomAccessDocIdSet(dataCache, bitset));
            }
        }
コード例 #3
0
        protected virtual void Condense(float[] floats)
        {
            if (floats.Length != m_capacity)
            {
                throw new ArgumentException("bad input float array of length " + floats.Length + " for capacity: " + m_capacity);
            }
            var bits = new OpenBitSet(floats.Length);
            int on   = 0;

            for (int i = 0; i < floats.Length; i++)
            {
                if (floats[i] != 0f)
                {
                    bits.Set(i);
                    on++;
                }
            }
            if (((float)on) / ((float)floats.Length) < ON_RATIO_CUTOFF)
            {
                // it's worth compressing
                if (0 == on)
                {
                    // it's worth super-compressing
                    m_floats          = null;
                    m_bits            = null;
                    m_referencePoints = null;
                    // capacity is good.
                }
                else
                {
                    m_bits            = bits;
                    m_floats          = new float[m_bits.Cardinality()];
                    m_referencePoints = new int[floats.Length / REFERENCE_POINT_EVERY];
                    int i         = 0;
                    int floatsIdx = 0;
                    int refIdx    = 0;
                    while (i < floats.Length && (i = m_bits.NextSetBit(i)) >= 0)
                    {
                        m_floats[floatsIdx] = floats[i];
                        while (refIdx < i / REFERENCE_POINT_EVERY)
                        {
                            m_referencePoints[refIdx++] = floatsIdx;
                        }
                        floatsIdx++;
                        i++;
                    }
                    while (refIdx < m_referencePoints.Length)
                    {
                        m_referencePoints[refIdx++] = floatsIdx;
                    }
                }
            }
            else
            {
                // it's not worth compressing
                m_floats = floats;
                m_bits   = null;
            }
        }
コード例 #4
0
        /// <summary>
        /// Calculate similarity score between a pair of FingerprintMxs
        /// </summary>
        /// <param name="fp1"></param>
        /// <param name="fp2"></param>
        /// <returns></returns>

        public static float CalculateFingerprintPairSimilarityScore(
            FingerprintMx fp1,
            FingerprintMx fp2)
        {
            long[]     fp1Array  = fp1.ToLongArray();
            OpenBitSet fp1BitSet = new OpenBitSet(fp1Array, fp1Array.Length);
            int        fp1Card   = (int)fp1BitSet.Cardinality();

            long[]     fp2Array  = fp2.ToLongArray();
            OpenBitSet fp2BitSet = new OpenBitSet(fp2Array, fp2Array.Length);
            int        fp2Card   = (int)fp2BitSet.Cardinality();

            fp2BitSet.Intersect(fp1BitSet);
            int   commonCnt = (int)fp2BitSet.Cardinality();
            float simScore  = commonCnt / (float)(fp1Card + fp2Card - commonCnt);

            return(simScore);
        }
コード例 #5
0
        public override RandomAccessDocIdSet GetRandomAccessDocIdSet(BoboSegmentReader reader)
        {
            FacetDataCache dataCache  = m_facetDataCacheBuilder.Build(reader);
            OpenBitSet     openBitSet = GetBitSet(dataCache);
            long           count      = openBitSet.Cardinality();

            if (count == 0)
            {
                return(EmptyDocIdSet.Instance);
            }
            else
            {
                bool multi = dataCache is MultiValueFacetDataCache;
                MultiValueFacetDataCache multiCache = multi ? (MultiValueFacetDataCache)dataCache : null;
                return(new BitSetRandomAccessDocIdSet(multi, multiCache, openBitSet, dataCache));
            }
        }
コード例 #6
0
            public virtual void Write(FieldsConsumer consumer)
            {
                Array.Sort(terms);
                TermsConsumer termsConsumer     = consumer.AddField(fieldInfo);
                long          sumTotalTermCount = 0;
                long          sumDF             = 0;
                OpenBitSet    visitedDocs       = new OpenBitSet();

                foreach (TermData term in terms)
                {
                    for (int i = 0; i < term.docs.Length; i++)
                    {
                        visitedDocs.Set(term.docs[i]);
                    }
                    sumDF             += term.docs.Length;
                    sumTotalTermCount += term.Write(termsConsumer);
                }
                termsConsumer.Finish(omitTF ? -1 : sumTotalTermCount, sumDF, (int)visitedDocs.Cardinality());
            }
コード例 #7
0
 public override void Finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
 {
     Debug.Assert(state == TermsConsumerState.INITIAL || state == TermsConsumerState.START && lastPostingsConsumer.docFreq == 0);
     state = TermsConsumerState.FINISHED;
     Debug.Assert(docCount >= 0);
     Debug.Assert(docCount == visitedDocs.Cardinality());
     Debug.Assert(sumDocFreq >= docCount);
     Debug.Assert(sumDocFreq == this.sumDocFreq);
     if (fieldInfo.IndexOptions == IndexOptions.DOCS_ONLY)
     {
         Debug.Assert(sumTotalTermFreq == -1);
     }
     else
     {
         Debug.Assert(sumTotalTermFreq >= sumDocFreq);
         Debug.Assert(sumTotalTermFreq == this.sumTotalTermFreq);
     }
     @in.Finish(sumTotalTermFreq, sumDocFreq, docCount);
 }
コード例 #8
0
        //Compares a BitArray with an OpenBitSet
        public static bool Equal(this BitArray a, OpenBitSet b)
        {
            var bitArrayCardinality = a.Cardinality();

            if (bitArrayCardinality != b.Cardinality())
            {
                return(false);
            }

            for (int i = 0; i < bitArrayCardinality; i++)
            {
                if (a.SafeGet(i) != b.Get(i))
                {
                    return(false);
                }
            }

            return(true);
        }
コード例 #9
0
ファイル: HitsPerFacet.cs プロジェクト: mundher/lucene.net
            internal void Calculate()
            {
                if (_QueryDocidSet == DocIdBitSet.EMPTY_DOCIDSET)
                {
                    _ResultBitSet = new OpenBitSet(0);
                }
                else
                {
                    _ResultBitSet = (OpenBitSet)((OpenBitSet)_QueryDocidSet).Clone();
                    _ResultBitSet.And(_GroupBitSet);
                }

                _ResultIterator = _ResultBitSet.Iterator();

                _HitCount = _ResultBitSet.Cardinality();

                _ResultBitSet  = null;
                _QueryDocidSet = null;
                _GroupBitSet   = null;
            }
コード例 #10
0
        private void DoTestMultiThreads(bool withTimeout)
        {
            ThreadClass[] threadArray = new ThreadClass[N_THREADS];
            OpenBitSet    success     = new OpenBitSet(N_THREADS);

            for (int i = 0; i < threadArray.Length; ++i)
            {
                int num = i;
                threadArray[num] = new ThreadClassAnonymousHelper(this, success, withTimeout, num);
            }
            for (int i = 0; i < threadArray.Length; ++i)
            {
                threadArray[i].Start();
            }
            for (int i = 0; i < threadArray.Length; ++i)
            {
                threadArray[i].Join();
            }
            assertEquals("some threads failed!", N_THREADS, success.Cardinality());
        }
コード例 #11
0
        /// <summary>
        /// loads multi-value facet data. This method uses a workarea to prepare loading.
        /// </summary>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <param name="listFactory"></param>
        /// <param name="workArea"></param>
        public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea)
        {
            long t0     = Environment.TickCount;
            int  maxdoc = reader.MaxDoc;

            BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea);

            TermEnum       tenum              = null;
            TermDocs       tdoc               = null;
            ITermValueList list               = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList());
            List <int>     minIDList          = new List <int>();
            List <int>     maxIDList          = new List <int>();
            List <int>     freqList           = new List <int>();
            OpenBitSet     bitset             = new OpenBitSet();
            int            negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName));
            int            t = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            t++;

            _overflow = false;
            try
            {
                tdoc  = reader.TermDocs();
                tenum = reader.Terms(new Term(fieldName, ""));
                if (tenum != null)
                {
                    do
                    {
                        Term term = tenum.Term;
                        if (term == null || !fieldName.Equals(term.Field))
                        {
                            break;
                        }

                        string val = term.Text;

                        if (val != null)
                        {
                            list.Add(val);

                            tdoc.Seek(tenum);
                            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
                            int df    = 0;
                            int minID = -1;
                            int maxID = -1;
                            int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                            if (tdoc.Next())
                            {
                                df++;
                                int docid = tdoc.Doc;

                                if (!loader.Add(docid, valId))
                                {
                                    LogOverflow(fieldName);
                                }
                                minID = docid;
                                bitset.Set(docid);
                                while (tdoc.Next())
                                {
                                    df++;
                                    docid = tdoc.Doc;

                                    if (!loader.Add(docid, valId))
                                    {
                                        LogOverflow(fieldName);
                                    }
                                    bitset.Set(docid);
                                }
                                maxID = docid;
                            }
                            freqList.Add(df);
                            minIDList.Add(minID);
                            maxIDList.Add(maxID);
                        }

                        t++;
                    }while (tenum.Next());
                }
            }
            finally
            {
                try
                {
                    if (tdoc != null)
                    {
                        tdoc.Dispose();
                    }
                }
                finally
                {
                    if (tenum != null)
                    {
                        tenum.Dispose();
                    }
                }
            }

            list.Seal();

            try
            {
                _nestedArray.Load(maxdoc + 1, loader);
            }
            catch (System.IO.IOException e)
            {
                throw e;
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.valArray = list;
            this.freqs    = freqList.ToArray();
            this.minIDs   = minIDList.ToArray();
            this.maxIDs   = maxIDList.ToArray();

            int doc = 0;

            while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc <= maxdoc)
            {
                this.minIDs[0] = doc;
                doc            = maxdoc;
                while (doc > 0 && !_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality();
        }
コード例 #12
0
ファイル: SearchBits.cs プロジェクト: EhsanGhanbari/Seldino
 public long Count()
 {
     return(_openBitSet.Cardinality());
 }
コード例 #13
0
ファイル: OBSDocIdSet.cs プロジェクト: yao-yi/BoboBrowse.Net
 public override int Size()
 {
     return((int)bitSet.Cardinality());
 }
コード例 #14
0
        /// <summary>
        /// loads multi-value facet data. This method uses a workarea to prepare loading.
        /// </summary>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <param name="listFactory"></param>
        /// <param name="workArea"></param>
        public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory, BoboSegmentReader.WorkArea workArea)
        {
#if FEATURE_STRING_INTERN
            string field = string.Intern(fieldName);
#else
            string field = fieldName;
#endif
            int maxdoc = reader.MaxDoc;
            BigNestedInt32Array.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea);

            ITermValueList list               = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList());
            List <int>     minIDList          = new List <int>();
            List <int>     maxIDList          = new List <int>();
            List <int>     freqList           = new List <int>();
            OpenBitSet     bitset             = new OpenBitSet(maxdoc + 1);
            int            negativeValueCount = GetNegativeValueCount(reader, field);
            int            t = 1; // valid term id starts from 1
            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);

            m_overflow = false;
            Terms terms = reader.GetTerms(field);
            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetIterator(null);
                BytesRef  text;
                while ((text = termsEnum.Next()) != null)
                {
                    string strText = text.Utf8ToString();
                    list.Add(strText);

                    Term     term     = new Term(field, strText);
                    DocsEnum docsEnum = reader.GetTermDocsEnum(term);
                    int      df       = 0;
                    int      minID    = -1;
                    int      maxID    = -1;
                    int      docID    = -1;
                    int      valId    = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                    while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                    {
                        df++;
                        if (!loader.Add(docID, valId))
                        {
                            LogOverflow(fieldName);
                        }
                        minID = docID;
                        bitset.FastSet(docID);
                        while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS)
                        {
                            docID = docsEnum.DocID;
                            df++;
                            if (!loader.Add(docID, valId))
                            {
                                LogOverflow(fieldName);
                            }
                            bitset.FastSet(docID);
                        }
                        maxID = docID;
                    }
                    freqList.Add(df);
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);
                    t++;
                }
            }

            list.Seal();

            try
            {
                m_nestedArray.Load(maxdoc + 1, loader);
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.m_valArray = list;
            this.m_freqs    = freqList.ToArray();
            this.m_minIDs   = minIDList.ToArray();
            this.m_maxIDs   = maxIDList.ToArray();

            int doc = 0;
            while (doc < maxdoc && !m_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc < maxdoc)
            {
                this.m_minIDs[0] = doc;
                doc = maxdoc - 1;
                while (doc >= 0 && !m_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                this.m_maxIDs[0] = doc;
            }
            this.m_freqs[0] = maxdoc - (int)bitset.Cardinality();
        }
コード例 #15
0
        public override void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea)
        {
            long t0     = System.Environment.TickCount;
            int  maxdoc = reader.MaxDoc;

            BigNestedIntArray.BufferedLoader loader       = GetBufferedLoader(maxdoc, workArea);
            BigNestedIntArray.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null);

            TermEnum   tenum              = null;
            TermDocs   tdoc               = null;
            var        list               = (listFactory == null ? new TermStringList() : listFactory.CreateTermList());
            List <int> minIDList          = new List <int>();
            List <int> maxIDList          = new List <int>();
            List <int> freqList           = new List <int>();
            OpenBitSet bitset             = new OpenBitSet(maxdoc + 1);
            int        negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName));
            int        t = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            t++;

            _overflow = false;

            string pre = null;

            int df    = 0;
            int minID = -1;
            int maxID = -1;
            int valId = 0;

            try
            {
                tdoc  = reader.TermDocs();
                tenum = reader.Terms(new Term(fieldName, ""));
                if (tenum != null)
                {
                    do
                    {
                        Term term = tenum.Term;
                        if (term == null || !fieldName.Equals(term.Field))
                        {
                            break;
                        }

                        string val = term.Text;

                        if (val != null)
                        {
                            int      weight = 0;
                            string[] split  = val.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries);
                            if (split.Length > 1)
                            {
                                val    = split[0];
                                weight = int.Parse(split[split.Length - 1]);
                            }
                            if (pre == null || !val.Equals(pre))
                            {
                                if (pre != null)
                                {
                                    freqList.Add(df);
                                    minIDList.Add(minID);
                                    maxIDList.Add(maxID);
                                }

                                list.Add(val);

                                df    = 0;
                                minID = -1;
                                maxID = -1;
                                valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                                t++;
                            }

                            tdoc.Seek(tenum);
                            if (tdoc.Next())
                            {
                                df++;
                                int docid = tdoc.Doc;

                                if (!loader.Add(docid, valId))
                                {
                                    LogOverflow(fieldName);
                                }
                                else
                                {
                                    weightLoader.Add(docid, weight);
                                }

                                if (docid < minID)
                                {
                                    minID = docid;
                                }
                                bitset.FastSet(docid);
                                while (tdoc.Next())
                                {
                                    df++;
                                    docid = tdoc.Doc;

                                    if (!loader.Add(docid, valId))
                                    {
                                        LogOverflow(fieldName);
                                    }
                                    else
                                    {
                                        weightLoader.Add(docid, weight);
                                    }

                                    bitset.FastSet(docid);
                                }
                                if (docid > maxID)
                                {
                                    maxID = docid;
                                }
                            }
                            pre = val;
                        }
                    }while (tenum.Next());
                    if (pre != null)
                    {
                        freqList.Add(df);
                        minIDList.Add(minID);
                        maxIDList.Add(maxID);
                    }
                }
            }
            finally
            {
                try
                {
                    if (tdoc != null)
                    {
                        tdoc.Dispose();
                    }
                }
                finally
                {
                    if (tenum != null)
                    {
                        tenum.Dispose();
                    }
                }
            }

            list.Seal();

            try
            {
                _nestedArray.Load(maxdoc + 1, loader);
                _weightArray.Load(maxdoc + 1, weightLoader);
            }
            catch (System.IO.IOException e)
            {
                throw e;
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.valArray = list;
            this.freqs    = freqList.ToArray();
            this.minIDs   = minIDList.ToArray();
            this.maxIDs   = maxIDList.ToArray();

            int doc = 0;

            while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc <= maxdoc)
            {
                this.minIDs[0] = doc;
                doc            = maxdoc;
                while (doc > 0 && !_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality();
        }
コード例 #16
0
        /// <summary>
        /// Search a single file
        /// </summary>

        void SearchSingleFile(int fi)
        {
            StructSearchMatch sm = null;

            AssertMx.IsNotNull(FpDao, "FpDao");

            List <StructSearchMatch> matchList = FileMatchLists[fi];

            AssertMx.IsNotNull(matchList, "matchList");

            OpenBitSet queryObs = new OpenBitSet(QueryFpLongArray, QueryFpLongArray.Length);

            AssertMx.IsNotNull(queryObs, "queryObs");

            OpenBitSet dbObs = new OpenBitSet(QueryFpLongArray, QueryFpLongArray.Length);             // gets set to DB fp for intersect

            AssertMx.IsNotNull(dbObs, "dbObs");

            FileStream fs = FileStreamReaders[fi];

            AssertMx.IsNotNull(fs, "fs");

            ReadFingerprintRecArgs a = new ReadFingerprintRecArgs();

            a.Initialize(fs, QueryFpLongArray.Length);

            try
            {
                while (true)
                {
                    bool readOk = FpDao.ReadRawFingerprintRec(a);
                    if (!readOk)
                    {
                        break;
                    }

                    //if (IsSrcCidMatch("03435269", a)) a = a;  // debug

                    dbObs.Bits = a.fingerprint;
                    dbObs.Intersect(queryObs);
                    int   commonCnt = (int)dbObs.Cardinality();
                    float simScore  = commonCnt / (float)(a.cardinality + QueryFpCardinality - commonCnt);

                    if (simScore >= MinimumSimilarity)
                    {
                        sm            = ReadFingerprintRec_To_StructSearchMatch(a);
                        sm.SearchType = StructureSearchType.MolSim;
                        sm.MatchScore = simScore;

                        matchList.Add(sm);
                    }
                }
            }

            catch (Exception ex)
            {
                string msg = ex.Message;
                msg += string.Format("\r\nfi: {0}, fs.Name: {1}, sm: {2}", fi, fs.Name, sm != null ? sm.Serialize() : "");
                DebugLog.Message(DebugLog.FormatExceptionMessage(ex, msg));
                throw new Exception(msg, ex);
            }

            return;
        }
コード例 #17
0
 public int HitCount()
 {
     return((int)bits.Cardinality());
 }
コード例 #18
0
        public override void Load(string fieldName, AtomicReader reader, TermListFactory listFactory, BoboSegmentReader.WorkArea workArea)
        {
#if FEATURE_STRING_INTERN
            string field = string.Intern(fieldName);
#else
            string field = fieldName;
#endif
            int maxdoc = reader.MaxDoc;
            BigNestedInt32Array.BufferedLoader loader       = GetBufferedLoader(maxdoc, workArea);
            BigNestedInt32Array.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null);

            var        list               = (listFactory == null ? new TermStringList() : listFactory.CreateTermList());
            List <int> minIDList          = new List <int>();
            List <int> maxIDList          = new List <int>();
            List <int> freqList           = new List <int>();
            OpenBitSet bitset             = new OpenBitSet(maxdoc + 1);
            int        negativeValueCount = GetNegativeValueCount(reader, field);
            int        t = 1; // valid term id starts from 1
            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);

            m_overflow = false;

            string pre = null;

            int df    = 0;
            int minID = -1;
            int maxID = -1;
            int docID = -1;
            int valId = 0;

            Terms terms = reader.GetTerms(field);
            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetIterator(null);
                BytesRef  text;
                while ((text = termsEnum.Next()) != null)
                {
                    string   strText = text.Utf8ToString();
                    string   val     = null;
                    int      weight  = 0;
                    string[] split   = strText.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries);
                    if (split.Length > 1)
                    {
                        val    = split[0];
                        weight = int.Parse(split[split.Length - 1]);
                    }
                    else
                    {
                        continue;
                    }

                    if (pre == null || !val.Equals(pre))
                    {
                        if (pre != null)
                        {
                            freqList.Add(df);
                            minIDList.Add(minID);
                            maxIDList.Add(maxID);
                        }
                        list.Add(val);
                        df    = 0;
                        minID = -1;
                        maxID = -1;
                        valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                        t++;
                    }

                    Term     term     = new Term(field, strText);
                    DocsEnum docsEnum = reader.GetTermDocsEnum(term);
                    if (docsEnum != null)
                    {
                        while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                        {
                            df++;

                            if (!loader.Add(docID, valId))
                            {
                                LogOverflow(fieldName);
                            }
                            else
                            {
                                weightLoader.Add(docID, weight);
                            }

                            if (docID < minID)
                            {
                                minID = docID;
                            }
                            bitset.FastSet(docID);
                            while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS)
                            {
                                docID = docsEnum.DocID;
                                df++;
                                if (!loader.Add(docID, valId))
                                {
                                    LogOverflow(fieldName);
                                }
                                else
                                {
                                    weightLoader.Add(docID, weight);
                                }
                                bitset.FastSet(docID);
                            }
                            if (docID > maxID)
                            {
                                maxID = docID;
                            }
                        }
                    }
                    pre = val;
                }
                if (pre != null)
                {
                    freqList.Add(df);
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);
                }
            }

            list.Seal();

            try
            {
                m_nestedArray.Load(maxdoc + 1, loader);
                m_weightArray.Load(maxdoc + 1, weightLoader);
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.m_valArray = list;
            this.m_freqs    = freqList.ToArray();
            this.m_minIDs   = minIDList.ToArray();
            this.m_maxIDs   = maxIDList.ToArray();

            int doc = 0;
            while (doc < maxdoc && !m_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc < maxdoc)
            {
                this.m_minIDs[0] = doc;
                doc = maxdoc - 1;
                while (doc >= 0 && !m_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                this.m_maxIDs[0] = doc;
            }
            this.m_freqs[0] = maxdoc - (int)bitset.Cardinality();
        }