/// <summary> /// loads multi-value facet data. This method uses the count payload to allocate storage before loading data. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <param name="listFactory"></param> /// <param name="sizeTerm"></param> public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory, Term sizeTerm) { #if FEATURE_STRING_INTERN string field = string.Intern(fieldName); #else string field = fieldName; #endif int maxdoc = reader.MaxDoc; BigNestedInt32Array.Loader loader = new AllocOnlyLoader(m_maxItems, sizeTerm, reader); int negativeValueCount = GetNegativeValueCount(reader, field); try { m_nestedArray.Load(maxdoc + 1, loader); } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } ITermValueList list = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList()); List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); OpenBitSet bitset = new OpenBitSet(maxdoc + 1); int t = 1; // valid term id starts from 1 list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); m_overflow = false; Terms terms = reader.GetTerms(field); if (terms != null) { TermsEnum termsEnum = terms.GetIterator(null); BytesRef text; while ((text = termsEnum.Next()) != null) { string strText = text.Utf8ToString(); list.Add(strText); Term term = new Term(field, strText); DocsEnum docsEnum = reader.GetTermDocsEnum(term); int df = 0; int minID = -1; int maxID = -1; int docID = -1; while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS) { df++; if (!m_nestedArray.AddData(docID, t)) { LogOverflow(fieldName); } minID = docID; bitset.FastSet(docID); int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS) { docID = docsEnum.DocID; df++; if (!m_nestedArray.AddData(docID, valId)) { LogOverflow(fieldName); } bitset.FastSet(docID); } maxID = docID; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); t++; } } list.Seal(); this.m_valArray = list; this.m_freqs = freqList.ToArray(); this.m_minIDs = minIDList.ToArray(); this.m_maxIDs = maxIDList.ToArray(); int doc = 0; while (doc < maxdoc && !m_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc < maxdoc) { this.m_minIDs[0] = doc; doc = maxdoc - 1; while (doc >= 0 && !m_nestedArray.Contains(doc, 0, true)) { --doc; } this.m_maxIDs[0] = doc; } this.m_freqs[0] = maxdoc - (int)bitset.Cardinality(); }
/// <summary> /// loads multi-value facet data. This method uses the count payload to allocate storage before loading data. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <param name="listFactory"></param> /// <param name="sizeTerm"></param> public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory, Term sizeTerm) { int maxdoc = reader.MaxDoc; BigNestedIntArray.Loader loader = new AllocOnlyLoader(_maxItems, sizeTerm, reader); int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName)); try { _nestedArray.Load(maxdoc + 1, loader); } catch (System.IO.IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } TermEnum tenum = null; TermDocs tdoc = null; ITermValueList list = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList()); List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); OpenBitSet bitset = new OpenBitSet(); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); t++; _overflow = false; try { tdoc = reader.TermDocs(); tenum = reader.Terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.Term; if (term == null || !fieldName.Equals(term.Field)) { break; } string val = term.Text; if (val != null) { list.Add(val); tdoc.Seek(tenum); //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs int df = 0; int minID = -1; int maxID = -1; if (tdoc.Next()) { df++; int docid = tdoc.Doc; if (!_nestedArray.AddData(docid, t)) { LogOverflow(fieldName); } minID = docid; bitset.FastSet(docid); int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; while (tdoc.Next()) { df++; docid = tdoc.Doc; if (!_nestedArray.AddData(docid, valId)) { LogOverflow(fieldName); } bitset.FastSet(docid); } maxID = docid; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } t++; }while (tenum.Next()); } } finally { try { if (tdoc != null) { tdoc.Dispose(); } } finally { if (tenum != null) { tenum.Dispose(); } } } list.Seal(); this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.Contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality(); }
/// <summary> /// loads multi-value facet data. This method uses the count payload to allocate storage before loading data. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <param name="listFactory"></param> /// <param name="sizeTerm"></param> public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory, Term sizeTerm) { int maxdoc = reader.MaxDoc; BigNestedIntArray.Loader loader = new AllocOnlyLoader(_maxItems, sizeTerm, reader); int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName)); try { _nestedArray.Load(maxdoc + 1, loader); } catch (System.IO.IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } TermEnum tenum = null; TermDocs tdoc = null; ITermValueList list = (listFactory == null ? (ITermValueList)new TermStringList() : listFactory.CreateTermList()); List<int> minIDList = new List<int>(); List<int> maxIDList = new List<int>(); List<int> freqList = new List<int>(); OpenBitSet bitset = new OpenBitSet(); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); t++; _overflow = false; try { tdoc = reader.TermDocs(); tenum = reader.Terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.Term; if (term == null || !fieldName.Equals(term.Field)) break; string val = term.Text; if (val != null) { list.Add(val); tdoc.Seek(tenum); //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs int df = 0; int minID = -1; int maxID = -1; if (tdoc.Next()) { df++; int docid = tdoc.Doc; if (!_nestedArray.AddData(docid, t)) LogOverflow(fieldName); minID = docid; bitset.FastSet(docid); int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; while (tdoc.Next()) { df++; docid = tdoc.Doc; if (!_nestedArray.AddData(docid, valId)) LogOverflow(fieldName); bitset.FastSet(docid); } maxID = docid; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } t++; } while (tenum.Next()); } } finally { try { if (tdoc != null) { tdoc.Dispose(); } } finally { if (tenum != null) { tenum.Dispose(); } } } list.Seal(); this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.Contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality(); }