public override FacetDataCache Load(BoboIndexReader reader, BoboIndexReader.WorkArea workArea) { MultiValueFacetDataCache dataCache = new MultiValueFacetDataCache(); dataCache.MaxItems = maxItems; if (sizePayloadTerm == null) { dataCache.Load(_indexFieldName, reader, _termListFactory, workArea); } else { dataCache.Load(_indexFieldName, reader, _termListFactory, sizePayloadTerm); } return(dataCache); }
/// <summary> /// loads multi-value facet data. This method uses a workarea to prepare loading. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <param name="listFactory"></param> /// <param name="workArea"></param> public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea) { long t0 = Environment.TickCount; int maxdoc = reader.MaxDoc; BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); TermEnum tenum = null; TermDocs tdoc = null; ITermValueList list = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList()); List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); OpenBitSet bitset = new OpenBitSet(); int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName)); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); t++; _overflow = false; try { tdoc = reader.TermDocs(); tenum = reader.Terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.Term; if (term == null || !fieldName.Equals(term.Field)) { break; } string val = term.Text; if (val != null) { list.Add(val); tdoc.Seek(tenum); //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs int df = 0; int minID = -1; int maxID = -1; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; if (tdoc.Next()) { df++; int docid = tdoc.Doc; if (!loader.Add(docid, valId)) { LogOverflow(fieldName); } minID = docid; bitset.Set(docid); while (tdoc.Next()) { df++; docid = tdoc.Doc; if (!loader.Add(docid, valId)) { LogOverflow(fieldName); } bitset.Set(docid); } maxID = docid; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } t++; }while (tenum.Next()); } } finally { try { if (tdoc != null) { tdoc.Dispose(); } } finally { if (tenum != null) { tenum.Dispose(); } } } list.Seal(); try { _nestedArray.Load(maxdoc + 1, loader); } catch (System.IO.IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.Contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality(); }
protected virtual BigNestedIntArray.BufferedLoader GetBufferedLoader(int maxdoc, BoboIndexReader.WorkArea workArea) { if (workArea == null) { return(new BigNestedIntArray.BufferedLoader(maxdoc, _maxItems, new BigIntBuffer())); } else { BigIntBuffer buffer = workArea.Get <BigIntBuffer>(); if (buffer == null) { buffer = new BigIntBuffer(); workArea.Put(buffer); } else { buffer.Reset(); } BigNestedIntArray.BufferedLoader loader = workArea.Get <BigNestedIntArray.BufferedLoader>(); if (loader == null || loader.Capacity < maxdoc) { loader = new BigNestedIntArray.BufferedLoader(maxdoc, _maxItems, buffer); workArea.Put(loader); } else { loader.Reset(maxdoc, _maxItems, buffer); } return(loader); } }
public override void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea) { long t0 = System.Environment.TickCount; int maxdoc = reader.MaxDoc; BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); BigNestedIntArray.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null); TermEnum tenum = null; TermDocs tdoc = null; var list = (listFactory == null ? new TermStringList() : listFactory.CreateTermList()); List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); OpenBitSet bitset = new OpenBitSet(maxdoc + 1); int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName)); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); t++; _overflow = false; string pre = null; int df = 0; int minID = -1; int maxID = -1; int valId = 0; try { tdoc = reader.TermDocs(); tenum = reader.Terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.Term; if (term == null || !fieldName.Equals(term.Field)) { break; } string val = term.Text; if (val != null) { int weight = 0; string[] split = val.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries); if (split.Length > 1) { val = split[0]; weight = int.Parse(split[split.Length - 1]); } if (pre == null || !val.Equals(pre)) { if (pre != null) { freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } list.Add(val); df = 0; minID = -1; maxID = -1; valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; t++; } tdoc.Seek(tenum); if (tdoc.Next()) { df++; int docid = tdoc.Doc; if (!loader.Add(docid, valId)) { LogOverflow(fieldName); } else { weightLoader.Add(docid, weight); } if (docid < minID) { minID = docid; } bitset.FastSet(docid); while (tdoc.Next()) { df++; docid = tdoc.Doc; if (!loader.Add(docid, valId)) { LogOverflow(fieldName); } else { weightLoader.Add(docid, weight); } bitset.FastSet(docid); } if (docid > maxID) { maxID = docid; } } pre = val; } }while (tenum.Next()); if (pre != null) { freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } } } finally { try { if (tdoc != null) { tdoc.Dispose(); } } finally { if (tenum != null) { tenum.Dispose(); } } } list.Seal(); try { _nestedArray.Load(maxdoc + 1, loader); _weightArray.Load(maxdoc + 1, weightLoader); } catch (System.IO.IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.Contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality(); }
public virtual void LoadFacetData(BoboIndexReader reader, BoboIndexReader.WorkArea workArea) { reader.PutFacetData(_name, Load(reader, workArea)); }
public virtual D Load(BoboIndexReader reader, BoboIndexReader.WorkArea workArea) { return(Load(reader)); }
public void TestIndexReload() { try { RAMDirectory idxDir = new RAMDirectory(); Document[] docs = BoboTestCase.BuildData(); BoboIndexReader.WorkArea workArea = new BoboIndexReader.WorkArea(); BrowseRequest req; BrowseSelection sel; BoboBrowser browser; BrowseResult result; IndexWriter writer = new IndexWriter(idxDir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED); writer.Close(); int dup = 0; for (int j = 0; j < 50; j++) { IndexReader idxReader = IndexReader.Open(idxDir, true); BoboIndexReader reader = BoboIndexReader.GetInstance(idxReader, _fconf, workArea); req = new BrowseRequest(); req.Offset = 0; req.Count = 10; sel = new BrowseSelection("color"); sel.AddValue("red"); req.AddSelection(sel); browser = new BoboBrowser(reader); result = browser.Browse(req); Assert.AreEqual(3 * dup, result.NumHits); req = new BrowseRequest(); req.Offset = 0; req.Count = 10; sel = new BrowseSelection("tag"); sel.AddValue("dog"); req.AddSelection(sel); browser = new BoboBrowser(reader); result = browser.Browse(req); Assert.AreEqual(2 * dup, result.NumHits); req = new BrowseRequest(); req.Offset = 0; req.Count = 10; sel = new BrowseSelection("tag"); sel.AddValue("funny"); req.AddSelection(sel); browser = new BoboBrowser(reader); result = browser.Browse(req); Assert.AreEqual(3 * dup, result.NumHits); writer = new IndexWriter(idxDir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.UNLIMITED); for (int k = 0; k <= j; k++) { for (int i = 0; i < docs.Length; i++) { writer.AddDocument(docs[i]); } dup++; } writer.Close(); } idxDir.Close(); } catch (Exception e) { Assert.Fail(e.Message); } }
public IEnumerable <IFacetHandler> LoadFacetHandlers(string springConfigFile, BoboIndexReader.WorkArea workArea) { if (File.Exists(springConfigFile)) { XmlApplicationContext appCtx = new XmlApplicationContext(springConfigFile); return(appCtx.GetObjectsOfType(typeof(IFacetHandler)).Values.OfType <IFacetHandler>().ToList()); } else { return(new List <IFacetHandler>()); } }