Example #1
0
        public override FacetDataCache Load(BoboIndexReader reader, BoboIndexReader.WorkArea workArea)
        {
            MultiValueFacetDataCache dataCache = new MultiValueFacetDataCache();

            dataCache.MaxItems = maxItems;
            if (sizePayloadTerm == null)
            {
                dataCache.Load(_indexFieldName, reader, _termListFactory, workArea);
            }
            else
            {
                dataCache.Load(_indexFieldName, reader, _termListFactory, sizePayloadTerm);
            }
            return(dataCache);
        }
        /// <summary>
        /// loads multi-value facet data. This method uses a workarea to prepare loading.
        /// </summary>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <param name="listFactory"></param>
        /// <param name="workArea"></param>
        public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea)
        {
            long t0     = Environment.TickCount;
            int  maxdoc = reader.MaxDoc;

            BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea);

            TermEnum       tenum              = null;
            TermDocs       tdoc               = null;
            ITermValueList list               = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList());
            List <int>     minIDList          = new List <int>();
            List <int>     maxIDList          = new List <int>();
            List <int>     freqList           = new List <int>();
            OpenBitSet     bitset             = new OpenBitSet();
            int            negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName));
            int            t = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            t++;

            _overflow = false;
            try
            {
                tdoc  = reader.TermDocs();
                tenum = reader.Terms(new Term(fieldName, ""));
                if (tenum != null)
                {
                    do
                    {
                        Term term = tenum.Term;
                        if (term == null || !fieldName.Equals(term.Field))
                        {
                            break;
                        }

                        string val = term.Text;

                        if (val != null)
                        {
                            list.Add(val);

                            tdoc.Seek(tenum);
                            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
                            int df    = 0;
                            int minID = -1;
                            int maxID = -1;
                            int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                            if (tdoc.Next())
                            {
                                df++;
                                int docid = tdoc.Doc;

                                if (!loader.Add(docid, valId))
                                {
                                    LogOverflow(fieldName);
                                }
                                minID = docid;
                                bitset.Set(docid);
                                while (tdoc.Next())
                                {
                                    df++;
                                    docid = tdoc.Doc;

                                    if (!loader.Add(docid, valId))
                                    {
                                        LogOverflow(fieldName);
                                    }
                                    bitset.Set(docid);
                                }
                                maxID = docid;
                            }
                            freqList.Add(df);
                            minIDList.Add(minID);
                            maxIDList.Add(maxID);
                        }

                        t++;
                    }while (tenum.Next());
                }
            }
            finally
            {
                try
                {
                    if (tdoc != null)
                    {
                        tdoc.Dispose();
                    }
                }
                finally
                {
                    if (tenum != null)
                    {
                        tenum.Dispose();
                    }
                }
            }

            list.Seal();

            try
            {
                _nestedArray.Load(maxdoc + 1, loader);
            }
            catch (System.IO.IOException e)
            {
                throw e;
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.valArray = list;
            this.freqs    = freqList.ToArray();
            this.minIDs   = minIDList.ToArray();
            this.maxIDs   = maxIDList.ToArray();

            int doc = 0;

            while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc <= maxdoc)
            {
                this.minIDs[0] = doc;
                doc            = maxdoc;
                while (doc > 0 && !_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality();
        }
        protected virtual BigNestedIntArray.BufferedLoader GetBufferedLoader(int maxdoc, BoboIndexReader.WorkArea workArea)
        {
            if (workArea == null)
            {
                return(new BigNestedIntArray.BufferedLoader(maxdoc, _maxItems, new BigIntBuffer()));
            }
            else
            {
                BigIntBuffer buffer = workArea.Get <BigIntBuffer>();
                if (buffer == null)
                {
                    buffer = new BigIntBuffer();
                    workArea.Put(buffer);
                }
                else
                {
                    buffer.Reset();
                }

                BigNestedIntArray.BufferedLoader loader = workArea.Get <BigNestedIntArray.BufferedLoader>();
                if (loader == null || loader.Capacity < maxdoc)
                {
                    loader = new BigNestedIntArray.BufferedLoader(maxdoc, _maxItems, buffer);
                    workArea.Put(loader);
                }
                else
                {
                    loader.Reset(maxdoc, _maxItems, buffer);
                }
                return(loader);
            }
        }
Example #4
0
        public override void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea)
        {
            long t0     = System.Environment.TickCount;
            int  maxdoc = reader.MaxDoc;

            BigNestedIntArray.BufferedLoader loader       = GetBufferedLoader(maxdoc, workArea);
            BigNestedIntArray.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null);

            TermEnum   tenum              = null;
            TermDocs   tdoc               = null;
            var        list               = (listFactory == null ? new TermStringList() : listFactory.CreateTermList());
            List <int> minIDList          = new List <int>();
            List <int> maxIDList          = new List <int>();
            List <int> freqList           = new List <int>();
            OpenBitSet bitset             = new OpenBitSet(maxdoc + 1);
            int        negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName));
            int        t = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            t++;

            _overflow = false;

            string pre = null;

            int df    = 0;
            int minID = -1;
            int maxID = -1;
            int valId = 0;

            try
            {
                tdoc  = reader.TermDocs();
                tenum = reader.Terms(new Term(fieldName, ""));
                if (tenum != null)
                {
                    do
                    {
                        Term term = tenum.Term;
                        if (term == null || !fieldName.Equals(term.Field))
                        {
                            break;
                        }

                        string val = term.Text;

                        if (val != null)
                        {
                            int      weight = 0;
                            string[] split  = val.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries);
                            if (split.Length > 1)
                            {
                                val    = split[0];
                                weight = int.Parse(split[split.Length - 1]);
                            }
                            if (pre == null || !val.Equals(pre))
                            {
                                if (pre != null)
                                {
                                    freqList.Add(df);
                                    minIDList.Add(minID);
                                    maxIDList.Add(maxID);
                                }

                                list.Add(val);

                                df    = 0;
                                minID = -1;
                                maxID = -1;
                                valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                                t++;
                            }

                            tdoc.Seek(tenum);
                            if (tdoc.Next())
                            {
                                df++;
                                int docid = tdoc.Doc;

                                if (!loader.Add(docid, valId))
                                {
                                    LogOverflow(fieldName);
                                }
                                else
                                {
                                    weightLoader.Add(docid, weight);
                                }

                                if (docid < minID)
                                {
                                    minID = docid;
                                }
                                bitset.FastSet(docid);
                                while (tdoc.Next())
                                {
                                    df++;
                                    docid = tdoc.Doc;

                                    if (!loader.Add(docid, valId))
                                    {
                                        LogOverflow(fieldName);
                                    }
                                    else
                                    {
                                        weightLoader.Add(docid, weight);
                                    }

                                    bitset.FastSet(docid);
                                }
                                if (docid > maxID)
                                {
                                    maxID = docid;
                                }
                            }
                            pre = val;
                        }
                    }while (tenum.Next());
                    if (pre != null)
                    {
                        freqList.Add(df);
                        minIDList.Add(minID);
                        maxIDList.Add(maxID);
                    }
                }
            }
            finally
            {
                try
                {
                    if (tdoc != null)
                    {
                        tdoc.Dispose();
                    }
                }
                finally
                {
                    if (tenum != null)
                    {
                        tenum.Dispose();
                    }
                }
            }

            list.Seal();

            try
            {
                _nestedArray.Load(maxdoc + 1, loader);
                _weightArray.Load(maxdoc + 1, weightLoader);
            }
            catch (System.IO.IOException e)
            {
                throw e;
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.valArray = list;
            this.freqs    = freqList.ToArray();
            this.minIDs   = minIDList.ToArray();
            this.maxIDs   = maxIDList.ToArray();

            int doc = 0;

            while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc <= maxdoc)
            {
                this.minIDs[0] = doc;
                doc            = maxdoc;
                while (doc > 0 && !_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality();
        }
Example #5
0
 public virtual void LoadFacetData(BoboIndexReader reader, BoboIndexReader.WorkArea workArea)
 {
     reader.PutFacetData(_name, Load(reader, workArea));
 }
Example #6
0
 public virtual D Load(BoboIndexReader reader, BoboIndexReader.WorkArea workArea)
 {
     return(Load(reader));
 }
        public void TestIndexReload()
        {
            try
            {
                RAMDirectory idxDir = new RAMDirectory();
                Document[] docs = BoboTestCase.BuildData();
                BoboIndexReader.WorkArea workArea = new BoboIndexReader.WorkArea();
                BrowseRequest req;
                BrowseSelection sel;
                BoboBrowser browser;
                BrowseResult result;

                IndexWriter writer = new IndexWriter(idxDir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
                writer.Close();

                int dup = 0;
                for (int j = 0; j < 50; j++)
                {
                    IndexReader idxReader = IndexReader.Open(idxDir, true);
                    BoboIndexReader reader = BoboIndexReader.GetInstance(idxReader, _fconf, workArea);

                    req = new BrowseRequest();
                    req.Offset = 0;
                    req.Count = 10;
                    sel = new BrowseSelection("color");
                    sel.AddValue("red");
                    req.AddSelection(sel);
                    browser = new BoboBrowser(reader);
                    result = browser.Browse(req);

                    Assert.AreEqual(3 * dup, result.NumHits);

                    req = new BrowseRequest();
                    req.Offset = 0;
                    req.Count = 10;
                    sel = new BrowseSelection("tag");
                    sel.AddValue("dog");
                    req.AddSelection(sel);
                    browser = new BoboBrowser(reader);
                    result = browser.Browse(req);

                    Assert.AreEqual(2 * dup, result.NumHits);

                    req = new BrowseRequest();
                    req.Offset = 0;
                    req.Count = 10;
                    sel = new BrowseSelection("tag");
                    sel.AddValue("funny");
                    req.AddSelection(sel);
                    browser = new BoboBrowser(reader);
                    result = browser.Browse(req);

                    Assert.AreEqual(3 * dup, result.NumHits);

                    writer = new IndexWriter(idxDir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.UNLIMITED);
                    for (int k = 0; k <= j; k++)
                    {
                        for (int i = 0; i < docs.Length; i++)
                        {
                            writer.AddDocument(docs[i]);
                        }
                        dup++;
                    }
                    writer.Close();
                }
                idxDir.Close();
            }
            catch (Exception e)
            {
                Assert.Fail(e.Message);
            }
        }
Example #8
0
 public IEnumerable <IFacetHandler> LoadFacetHandlers(string springConfigFile, BoboIndexReader.WorkArea workArea)
 {
     if (File.Exists(springConfigFile))
     {
         XmlApplicationContext appCtx = new XmlApplicationContext(springConfigFile);
         return(appCtx.GetObjectsOfType(typeof(IFacetHandler)).Values.OfType <IFacetHandler>().ToList());
     }
     else
     {
         return(new List <IFacetHandler>());
     }
 }