public PathFacetHandler(string name) : base(name) { dataCache = null; termListFactory = TermListFactory.StringListFactory; separator = DEFAULT_SEP; }
public SimpleFacetHandler(string name, string indexFieldName, TermListFactory termListFactory) : base(name) { _indexFieldName = indexFieldName; _dataCache = null; _termListFactory = termListFactory; }
public RangeFacetHandler(string name, string indexFieldName, TermListFactory termListFactory, IEnumerable<string> predefinedRanges) : base(name) { this.indexFieldName = indexFieldName; this.dataCache = null; this.termListFactory = termListFactory; this.predefinedRanges = predefinedRanges; this.autoRange = false; }
public MultiValueFacetHandler(string name, string indexFieldName, TermListFactory termListFactory, Term sizePayloadTerm, IEnumerable<string> depends) : base(name, depends) { _depends = depends; _indexFieldName = (indexFieldName != null ? indexFieldName : name); _termListFactory = termListFactory; _sizePayloadTerm = sizePayloadTerm; _dataCache = null; }
public RangeFacetHandler(string name, string indexFieldName, TermListFactory termListFactory, bool autoRange) : base(name) { this.dataCache = null; this.indexFieldName = indexFieldName; this.termListFactory = termListFactory; this.predefinedRanges = null; this.autoRange = autoRange; }
public AttributesFacetHandler(string name, string indexFieldName, TermListFactory termListFactory, Term sizePayloadTerm, IDictionary<string, string> facetProps) : base(name, indexFieldName, sizePayloadTerm, termListFactory, new string[0]) { if (facetProps.ContainsKey(SEPARATOR_PROP_NAME)) { this.separator = Narrow(facetProps.Get(SEPARATOR_PROP_NAME))[0]; } else { this.separator = DEFAULT_SEPARATOR; } if (facetProps.ContainsKey(MAX_FACETS_PER_KEY_PROP_NAME)) { this.numFacetsPerKey = int.Parse(Narrow(facetProps.Get(MAX_FACETS_PER_KEY_PROP_NAME))); } }
public override void Load(string fieldName, AtomicReader reader, TermListFactory listFactory) { this.Load(fieldName, reader, listFactory, new BoboSegmentReader.WorkArea()); }
public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory) { string field = string.Intern(fieldName); int maxDoc = reader.MaxDoc; BigSegmentedArray order = this.orderArray; if (order == null) // we want to reuse the memory { int dictValueCount = GetDictValueCount(reader, fieldName); order = NewInstance(dictValueCount, maxDoc); } else { order.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the // data anyway } this.orderArray = order; List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); int length = maxDoc + 1; ITermValueList list = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList(); int negativeValueCount = GetNegativeValueCount(reader, field); TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field, "")); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); int totalFreq = 0; //int df = 0; t++; try { do { Term term = termEnum.Term; if (term == null || string.CompareOrdinal(term.Field, field) != 0) { break; } // store term text // we expect that there is at most one term per document // Alexey: well, we could get now more than one term per document. Effectively, we could build facet against tokenized field //if (t >= length) //{ // throw new RuntimeException("there are more terms than " + "documents in field \"" + field // + "\", but it's impossible to sort on " + "tokenized fields"); //} list.Add(term.Text); termDocs.Seek(termEnum); // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs int minID = -1; int maxID = -1; int df = 0; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; if (termDocs.Next()) { df++; int docid = termDocs.Doc; order.Add(docid, valId); minID = docid; while (termDocs.Next()) { df++; docid = termDocs.Doc; order.Add(docid, valId); } maxID = docid; } freqList.Add(df); totalFreq += df; minIDList.Add(minID); maxIDList.Add(maxID); t++; } while (termEnum.Next()); } finally { termDocs.Dispose(); termEnum.Dispose(); } list.Seal(); this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxDoc && order.Get(doc) != 0) { ++doc; } if (doc <= maxDoc) { this.minIDs[0] = doc; // Try to get the max doc = maxDoc; while (doc > 0 && order.Get(doc) != 0) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxDoc + 1 - totalFreq; }
public override void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea) { long t0 = System.Environment.TickCount; int maxdoc = reader.MaxDoc; BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); BigNestedIntArray.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null); TermEnum tenum = null; TermDocs tdoc = null; var list = (listFactory == null ? new TermStringList() : listFactory.CreateTermList()); List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); OpenBitSet bitset = new OpenBitSet(maxdoc + 1); int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName)); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); t++; _overflow = false; string pre = null; int df = 0; int minID = -1; int maxID = -1; int valId = 0; try { tdoc = reader.TermDocs(); tenum = reader.Terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.Term; if (term == null || !fieldName.Equals(term.Field)) { break; } string val = term.Text; if (val != null) { int weight = 0; string[] split = val.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries); if (split.Length > 1) { val = split[0]; weight = int.Parse(split[split.Length - 1]); } if (pre == null || !val.Equals(pre)) { if (pre != null) { freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } list.Add(val); df = 0; minID = -1; maxID = -1; valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; t++; } tdoc.Seek(tenum); if (tdoc.Next()) { df++; int docid = tdoc.Doc; if (!loader.Add(docid, valId)) { LogOverflow(fieldName); } else { weightLoader.Add(docid, weight); } if (docid < minID) { minID = docid; } bitset.FastSet(docid); while (tdoc.Next()) { df++; docid = tdoc.Doc; if (!loader.Add(docid, valId)) { LogOverflow(fieldName); } else { weightLoader.Add(docid, weight); } bitset.FastSet(docid); } if (docid > maxID) { maxID = docid; } } pre = val; } }while (tenum.Next()); if (pre != null) { freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } } } finally { try { if (tdoc != null) { tdoc.Dispose(); } } finally { if (tenum != null) { tenum.Dispose(); } } } list.Seal(); try { _nestedArray.Load(maxdoc + 1, loader); _weightArray.Load(maxdoc + 1, weightLoader); } catch (System.IO.IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.Contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality(); }
public RangeFacetHandler(string name, TermListFactory termListFactory, bool autoRange) : this(name, name, termListFactory, autoRange) { }
public MultiValueWithWeightFacetHandler(string name, string indexFieldName, TermListFactory termListFactory) : base(name, indexFieldName, termListFactory, null, null) { }
public MultiValueFacetHandler(string name, string indexFieldName, TermListFactory termListFactory) : this(name, indexFieldName, termListFactory, null, null) { }
public override void Load(string fieldName, IndexReader reader, TermListFactory listFactory) { this.Load(fieldName, reader, listFactory, new BoboIndexReader.WorkArea()); }
public override void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea) { long t0 = System.Environment.TickCount; int maxdoc = reader.MaxDoc; BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); BigNestedIntArray.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null); TermEnum tenum = null; TermDocs tdoc = null; var list = (listFactory == null ? new TermStringList() : listFactory.CreateTermList()); List<int> minIDList = new List<int>(); List<int> maxIDList = new List<int>(); List<int> freqList = new List<int>(); OpenBitSet bitset = new OpenBitSet(maxdoc + 1); int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName)); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); t++; _overflow = false; string pre = null; int df = 0; int minID = -1; int maxID = -1; int valId = 0; try { tdoc = reader.TermDocs(); tenum = reader.Terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.Term; if (term == null || !fieldName.Equals(term.Field)) break; string val = term.Text; if (val != null) { int weight = 0; string[] split = val.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries); if (split.Length > 1) { val = split[0]; weight = int.Parse(split[split.Length - 1]); } if (pre == null || !val.Equals(pre)) { if (pre != null) { freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } list.Add(val); df = 0; minID = -1; maxID = -1; valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; t++; } tdoc.Seek(tenum); if (tdoc.Next()) { df++; int docid = tdoc.Doc; if (!loader.Add(docid, valId)) LogOverflow(fieldName); else weightLoader.Add(docid, weight); if (docid < minID) minID = docid; bitset.FastSet(docid); while (tdoc.Next()) { df++; docid = tdoc.Doc; if (!loader.Add(docid, valId)) LogOverflow(fieldName); else weightLoader.Add(docid, weight); bitset.FastSet(docid); } if (docid > maxID) maxID = docid; } pre = val; } } while (tenum.Next()); if (pre != null) { freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } } } finally { try { if (tdoc != null) { tdoc.Dispose(); } } finally { if (tenum != null) { tenum.Dispose(); } } } list.Seal(); try { _nestedArray.Load(maxdoc + 1, loader); _weightArray.Load(maxdoc + 1, weightLoader); } catch (System.IO.IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.Contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality(); }
/// <summary> /// loads multi-value facet data. This method uses a workarea to prepare loading. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <param name="listFactory"></param> /// <param name="workArea"></param> public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea) { long t0 = Environment.TickCount; int maxdoc = reader.MaxDoc; BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); TermEnum tenum = null; TermDocs tdoc = null; ITermValueList list = (listFactory == null ? (ITermValueList)new TermStringList() : listFactory.CreateTermList()); List<int> minIDList = new List<int>(); List<int> maxIDList = new List<int>(); List<int> freqList = new List<int>(); OpenBitSet bitset = new OpenBitSet(); int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName)); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); t++; _overflow = false; try { tdoc = reader.TermDocs(); tenum = reader.Terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.Term; if (term == null || !fieldName.Equals(term.Field)) break; string val = term.Text; if (val != null) { list.Add(val); tdoc.Seek(tenum); //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs int df = 0; int minID = -1; int maxID = -1; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; if (tdoc.Next()) { df++; int docid = tdoc.Doc; if (!loader.Add(docid, valId)) LogOverflow(fieldName); minID = docid; bitset.Set(docid); while (tdoc.Next()) { df++; docid = tdoc.Doc; if (!loader.Add(docid, valId)) LogOverflow(fieldName); bitset.Set(docid); } maxID = docid; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } t++; } while (tenum.Next()); } } finally { try { if (tdoc != null) { tdoc.Dispose(); } } finally { if (tenum != null) { tenum.Dispose(); } } } list.Seal(); try { _nestedArray.Load(maxdoc + 1, loader); } catch (System.IO.IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.Contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality(); }
public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory) { #if FEATURE_STRING_INTERN string field = string.Intern(fieldName); #else string field = fieldName; #endif int maxDoc = reader.MaxDoc; int dictValueCount = GetDictValueCount(reader, fieldName); BigSegmentedArray order = NewInstance(dictValueCount, maxDoc); this.m_orderArray = order; List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); int length = maxDoc + 1; ITermValueList list = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList(); int negativeValueCount = GetNegativeValueCount(reader, field); int t = 1; // valid term id starts from 1 list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); int totalFreq = 0; Terms terms = reader.GetTerms(field); if (terms != null) { TermsEnum termsEnum = terms.GetIterator(null); BytesRef text; while ((text = termsEnum.Next()) != null) { // store term text // we expect that there is at most one term per document if (t >= length) { throw new RuntimeException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields"); } string strText = text.Utf8ToString(); list.Add(strText); Term term = new Term(field, strText); DocsEnum docsEnum = reader.GetTermDocsEnum(term); // freqList.add(termEnum.docFreq()); // doesn't take into account // deldocs int minID = -1; int maxID = -1; int docID = -1; int df = 0; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS) { df++; order.Add(docID, valId); minID = docID; while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS) { docID = docsEnum.DocID; df++; order.Add(docID, valId); } maxID = docID; } freqList.Add(df); totalFreq += df; minIDList.Add(minID); maxIDList.Add(maxID); t++; } } list.Seal(); this.m_valArray = list; this.m_freqs = freqList.ToArray(); this.m_minIDs = minIDList.ToArray(); this.m_maxIDs = maxIDList.ToArray(); int doc = 0; while (doc < maxDoc && order.Get(doc) != 0) { ++doc; } if (doc < maxDoc) { this.m_minIDs[0] = doc; // Try to get the max doc = maxDoc - 1; while (doc >= 0 && order.Get(doc) != 0) { --doc; } this.m_maxIDs[0] = doc; } this.m_freqs[0] = reader.NumDocs - totalFreq; }
public CompactMultiValueFacetHandler(string name, TermListFactory termListFactory) : this(name, name, termListFactory) { }
public CompactMultiValueFacetHandler(string name, string indexFieldName, TermListFactory termListFactory) : base(name) { _indexFieldName = indexFieldName; _termListFactory = termListFactory; }
/// <summary> /// loads multi-value facet data. This method uses a workarea to prepare loading. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <param name="listFactory"></param> /// <param name="workArea"></param> public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory, BoboSegmentReader.WorkArea workArea) { #if FEATURE_STRING_INTERN string field = string.Intern(fieldName); #else string field = fieldName; #endif int maxdoc = reader.MaxDoc; BigNestedInt32Array.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); ITermValueList list = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList()); List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); OpenBitSet bitset = new OpenBitSet(maxdoc + 1); int negativeValueCount = GetNegativeValueCount(reader, field); int t = 1; // valid term id starts from 1 list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); m_overflow = false; Terms terms = reader.GetTerms(field); if (terms != null) { TermsEnum termsEnum = terms.GetIterator(null); BytesRef text; while ((text = termsEnum.Next()) != null) { string strText = text.Utf8ToString(); list.Add(strText); Term term = new Term(field, strText); DocsEnum docsEnum = reader.GetTermDocsEnum(term); int df = 0; int minID = -1; int maxID = -1; int docID = -1; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS) { df++; if (!loader.Add(docID, valId)) { LogOverflow(fieldName); } minID = docID; bitset.FastSet(docID); while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS) { docID = docsEnum.DocID; df++; if (!loader.Add(docID, valId)) { LogOverflow(fieldName); } bitset.FastSet(docID); } maxID = docID; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); t++; } } list.Seal(); try { m_nestedArray.Load(maxdoc + 1, loader); } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.m_valArray = list; this.m_freqs = freqList.ToArray(); this.m_minIDs = minIDList.ToArray(); this.m_maxIDs = maxIDList.ToArray(); int doc = 0; while (doc < maxdoc && !m_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc < maxdoc) { this.m_minIDs[0] = doc; doc = maxdoc - 1; while (doc >= 0 && !m_nestedArray.Contains(doc, 0, true)) { --doc; } this.m_maxIDs[0] = doc; } this.m_freqs[0] = maxdoc - (int)bitset.Cardinality(); }
public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory) { string field = string.Intern(fieldName); int maxDoc = reader.MaxDoc; if (orderArray == null) // we want to reuse the memory { orderArray = NewInstance(termCountSize, maxDoc); } else { orderArray.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the data anyway } List<int> minIDList = new List<int>(); List<int> maxIDList = new List<int>(); List<int> freqList = new List<int>(); int length = maxDoc + 1; ITermValueList list = listFactory == null ? new TermStringList() : listFactory.CreateTermList(); TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); //int df = 0; t++; try { do { Term term = termEnum.Term; if (term == null || string.CompareOrdinal(term.Field, field) != 0) break; if (t >= orderArray.MaxValue()) { throw new System.IO.IOException("maximum number of value cannot exceed: " + orderArray.MaxValue()); } // Alexey: well, we could get now more than one term per document. Effectively, we could build facet againsts tokenized field /*// we expect that there is at most one term per document if (t >= length) { throw new RuntimeException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields"); }*/ // store term text list.Add(term.Text); termDocs.Seek(termEnum); // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs int minID = -1; int maxID = -1; int df = 0; if (termDocs.Next()) { df++; int docid = termDocs.Doc; orderArray.Add(docid, t); minID = docid; while (termDocs.Next()) { df++; docid = termDocs.Doc; orderArray.Add(docid, t); } maxID = docid; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); t++; } while (termEnum.Next()); } finally { termDocs.Dispose(); termEnum.Dispose(); } list.Seal(); this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); }
public MultiValueFacetHandler(string name, TermListFactory termListFactory, Term sizePayloadTerm) : this(name, name, termListFactory, sizePayloadTerm, null) { }
public SimpleFacetHandler(string name, TermListFactory termListFactory) : this(name, name, termListFactory) { }
public override void Load(string fieldName, AtomicReader reader, TermListFactory listFactory, BoboSegmentReader.WorkArea workArea) { #if FEATURE_STRING_INTERN string field = string.Intern(fieldName); #else string field = fieldName; #endif int maxdoc = reader.MaxDoc; BigNestedInt32Array.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); BigNestedInt32Array.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null); var list = (listFactory == null ? new TermStringList() : listFactory.CreateTermList()); List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); OpenBitSet bitset = new OpenBitSet(maxdoc + 1); int negativeValueCount = GetNegativeValueCount(reader, field); int t = 1; // valid term id starts from 1 list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); m_overflow = false; string pre = null; int df = 0; int minID = -1; int maxID = -1; int docID = -1; int valId = 0; Terms terms = reader.GetTerms(field); if (terms != null) { TermsEnum termsEnum = terms.GetIterator(null); BytesRef text; while ((text = termsEnum.Next()) != null) { string strText = text.Utf8ToString(); string val = null; int weight = 0; string[] split = strText.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries); if (split.Length > 1) { val = split[0]; weight = int.Parse(split[split.Length - 1]); } else { continue; } if (pre == null || !val.Equals(pre)) { if (pre != null) { freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } list.Add(val); df = 0; minID = -1; maxID = -1; valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; t++; } Term term = new Term(field, strText); DocsEnum docsEnum = reader.GetTermDocsEnum(term); if (docsEnum != null) { while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS) { df++; if (!loader.Add(docID, valId)) { LogOverflow(fieldName); } else { weightLoader.Add(docID, weight); } if (docID < minID) { minID = docID; } bitset.FastSet(docID); while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS) { docID = docsEnum.DocID; df++; if (!loader.Add(docID, valId)) { LogOverflow(fieldName); } else { weightLoader.Add(docID, weight); } bitset.FastSet(docID); } if (docID > maxID) { maxID = docID; } } } pre = val; } if (pre != null) { freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } } list.Seal(); try { m_nestedArray.Load(maxdoc + 1, loader); m_weightArray.Load(maxdoc + 1, weightLoader); } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.m_valArray = list; this.m_freqs = freqList.ToArray(); this.m_minIDs = minIDList.ToArray(); this.m_maxIDs = maxIDList.ToArray(); int doc = 0; while (doc < maxdoc && !m_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc < maxdoc) { this.m_minIDs[0] = doc; doc = maxdoc - 1; while (doc >= 0 && !m_nestedArray.Contains(doc, 0, true)) { --doc; } this.m_maxIDs[0] = doc; } this.m_freqs[0] = maxdoc - (int)bitset.Cardinality(); }
/// <summary> /// loads multi-value facet data. This method uses a workarea to prepare loading. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <param name="listFactory"></param> /// <param name="workArea"></param> public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea) { long t0 = Environment.TickCount; int maxdoc = reader.MaxDoc; BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); TermEnum tenum = null; TermDocs tdoc = null; ITermValueList list = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList()); List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); OpenBitSet bitset = new OpenBitSet(); int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName)); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); t++; _overflow = false; try { tdoc = reader.TermDocs(); tenum = reader.Terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.Term; if (term == null || !fieldName.Equals(term.Field)) { break; } string val = term.Text; if (val != null) { list.Add(val); tdoc.Seek(tenum); //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs int df = 0; int minID = -1; int maxID = -1; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; if (tdoc.Next()) { df++; int docid = tdoc.Doc; if (!loader.Add(docid, valId)) { LogOverflow(fieldName); } minID = docid; bitset.Set(docid); while (tdoc.Next()) { df++; docid = tdoc.Doc; if (!loader.Add(docid, valId)) { LogOverflow(fieldName); } bitset.Set(docid); } maxID = docid; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } t++; }while (tenum.Next()); } } finally { try { if (tdoc != null) { tdoc.Dispose(); } } finally { if (tenum != null) { tenum.Dispose(); } } } list.Seal(); try { _nestedArray.Load(maxdoc + 1, loader); } catch (System.IO.IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.Contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality(); }
public RangeFacetHandler(string name, TermListFactory termListFactory, IEnumerable<string> predefinedRanges) : this(name, name, termListFactory, predefinedRanges) { }