Example #1
0
        public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory)
        {
            string field  = string.Intern(fieldName);
            int    maxDoc = reader.MaxDoc;

            BigSegmentedArray order = this.orderArray;

            if (order == null) // we want to reuse the memory
            {
                int dictValueCount = GetDictValueCount(reader, fieldName);
                order = NewInstance(dictValueCount, maxDoc);
            }
            else
            {
                order.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the
                                              // data anyway
            }
            this.orderArray = order;

            List <int> minIDList = new List <int>();
            List <int> maxIDList = new List <int>();
            List <int> freqList  = new List <int>();

            int            length             = maxDoc + 1;
            ITermValueList list               = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList();
            int            negativeValueCount = GetNegativeValueCount(reader, field);

            TermDocs termDocs = reader.TermDocs();
            TermEnum termEnum = reader.Terms(new Term(field, ""));
            int      t        = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            int totalFreq = 0;

            //int df = 0;
            t++;
            try
            {
                do
                {
                    Term term = termEnum.Term;
                    if (term == null || string.CompareOrdinal(term.Field, field) != 0)
                    {
                        break;
                    }

                    // store term text
                    // we expect that there is at most one term per document

                    // Alexey: well, we could get now more than one term per document. Effectively, we could build facet against tokenized field
                    //if (t >= length)
                    //{
                    //    throw new RuntimeException("there are more terms than " + "documents in field \"" + field
                    //        + "\", but it's impossible to sort on " + "tokenized fields");
                    //}
                    list.Add(term.Text);
                    termDocs.Seek(termEnum);
                    // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs
                    int minID = -1;
                    int maxID = -1;
                    int df    = 0;
                    int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                    if (termDocs.Next())
                    {
                        df++;
                        int docid = termDocs.Doc;
                        order.Add(docid, valId);
                        minID = docid;
                        while (termDocs.Next())
                        {
                            df++;
                            docid = termDocs.Doc;
                            order.Add(docid, valId);
                        }
                        maxID = docid;
                    }
                    freqList.Add(df);
                    totalFreq += df;
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);

                    t++;
                } while (termEnum.Next());
            }
            finally
            {
                termDocs.Dispose();
                termEnum.Dispose();
            }
            list.Seal();
            this.valArray = list;
            this.freqs    = freqList.ToArray();
            this.minIDs   = minIDList.ToArray();
            this.maxIDs   = maxIDList.ToArray();

            int doc = 0;

            while (doc <= maxDoc && order.Get(doc) != 0)
            {
                ++doc;
            }
            if (doc <= maxDoc)
            {
                this.minIDs[0] = doc;
                // Try to get the max
                doc = maxDoc;
                while (doc > 0 && order.Get(doc) != 0)
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxDoc + 1 - totalFreq;
        }