public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory) { string field = string.Intern(fieldName); int maxDoc = reader.MaxDoc; BigSegmentedArray order = this.orderArray; if (order == null) // we want to reuse the memory { int dictValueCount = GetDictValueCount(reader, fieldName); order = NewInstance(dictValueCount, maxDoc); } else { order.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the // data anyway } this.orderArray = order; List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); int length = maxDoc + 1; ITermValueList list = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList(); int negativeValueCount = GetNegativeValueCount(reader, field); TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field, "")); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); int totalFreq = 0; //int df = 0; t++; try { do { Term term = termEnum.Term; if (term == null || string.CompareOrdinal(term.Field, field) != 0) { break; } // store term text // we expect that there is at most one term per document // Alexey: well, we could get now more than one term per document. Effectively, we could build facet against tokenized field //if (t >= length) //{ // throw new RuntimeException("there are more terms than " + "documents in field \"" + field // + "\", but it's impossible to sort on " + "tokenized fields"); //} list.Add(term.Text); termDocs.Seek(termEnum); // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs int minID = -1; int maxID = -1; int df = 0; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; if (termDocs.Next()) { df++; int docid = termDocs.Doc; order.Add(docid, valId); minID = docid; while (termDocs.Next()) { df++; docid = termDocs.Doc; order.Add(docid, valId); } maxID = docid; } freqList.Add(df); totalFreq += df; minIDList.Add(minID); maxIDList.Add(maxID); t++; } while (termEnum.Next()); } finally { termDocs.Dispose(); termEnum.Dispose(); } list.Seal(); this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxDoc && order.Get(doc) != 0) { ++doc; } if (doc <= maxDoc) { this.minIDs[0] = doc; // Try to get the max doc = maxDoc; while (doc > 0 && order.Get(doc) != 0) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxDoc + 1 - totalFreq; }