Exemple #1
0
            public override void Run()
            {
                Random random = Random;

                while (numDocs.DecrementAndGet() > 0)
                {
                    try
                    {
                        Document doc     = new Document();
                        int      numCats = random.Next(3) + 1; // 1-3
                        while (numCats-- > 0)
                        {
                            FacetField ff = NewCategory();
                            doc.Add(ff);

                            FacetLabel label = new FacetLabel(ff.Dim, ff.Path);
                            // add all prefixes to values
                            int level = label.Length;
                            while (level > 0)
                            {
                                string s = FacetsConfig.PathToString(label.Components, level);
                                values[s] = s;
                                --level;
                            }
                        }
                        iw.AddDocument(config.Build(tw, doc));
                    }
                    catch (IOException e)
                    {
                        throw new Exception(e.ToString(), e);
                    }
                }
            }
            public override void Run()
            {
                Random random = Random();

                while (numCats.DecrementAndGet() > 0)
                {
                    try
                    {
                        int        value = random.Next(range);
                        FacetLabel cp    = new FacetLabel(Convert.ToString(value / 1000), Convert.ToString(value / 10000), Convert.ToString(value / 100000), Convert.ToString(value));
                        int        ord   = tw.AddCategory(cp);
                        Assert.True(tw.GetParent(ord) != -1, "invalid parent for ordinal " + ord + ", category " + cp);
                        string l1 = FacetsConfig.PathToString(cp.Components, 1);
                        string l2 = FacetsConfig.PathToString(cp.Components, 2);
                        string l3 = FacetsConfig.PathToString(cp.Components, 3);
                        string l4 = FacetsConfig.PathToString(cp.Components, 4);
                        values[l1] = l1;
                        values[l2] = l2;
                        values[l3] = l3;
                        values[l4] = l4;
                    }
                    catch (IOException e)
                    {
                        throw new Exception(e.Message, e);
                    }
                }
            }
            public override void Run()
            {
                Random random = Random;

                while (numCats.DecrementAndGet() > 0)
                {
                    try
                    {
                        int        value = random.Next(range);
                        FacetLabel cp    = new FacetLabel(
                            Convert.ToString(value / 1000, CultureInfo.InvariantCulture),
                            Convert.ToString(value / 10000, CultureInfo.InvariantCulture),
                            Convert.ToString(value / 100000, CultureInfo.InvariantCulture),
                            Convert.ToString(value, CultureInfo.InvariantCulture));
                        int ord = tw.AddCategory(cp);
                        Assert.IsTrue(tw.GetParent(ord) != -1, "invalid parent for ordinal " + ord + ", category " + cp);
                        string l1 = FacetsConfig.PathToString(cp.Components, 1);
                        string l2 = FacetsConfig.PathToString(cp.Components, 2);
                        string l3 = FacetsConfig.PathToString(cp.Components, 3);
                        string l4 = FacetsConfig.PathToString(cp.Components, 4);
                        values[l1] = l1;
                        values[l2] = l2;
                        values[l3] = l3;
                        values[l4] = l4;
                    }
                    catch (Exception e) when(e.IsIOException())
                    {
                        throw RuntimeException.Create(e);
                    }
                }
            }
Exemple #4
0
        public override float GetSpecificValue(string dim, params string[] path)
        {
            if (path.Length != 1)
            {
                throw new System.ArgumentException("path must be length=1");
            }
            int ord = (int)dv.LookupTerm(new BytesRef(FacetsConfig.PathToString(dim, path)));

            if (ord < 0)
            {
                return(-1);
            }

            return(counts[ord]);
        }
Exemple #5
0
        /// <summary>
        /// Note that the methods calling <see cref="AddCategoryDocument"/> are synchornized, so
        /// this method is effectively synchronized as well.
        /// </summary>
        private int AddCategoryDocument(FacetLabel categoryPath, int parent)
        {
            // Before Lucene 2.9, position increments >=0 were supported, so we
            // added 1 to parent to allow the parent -1 (the parent of the root).
            // Unfortunately, starting with Lucene 2.9, after LUCENE-1542, this is
            // no longer enough, since 0 is not encoded consistently either (see
            // comment in SinglePositionTokenStream). But because we must be
            // backward-compatible with existing indexes, we can't just fix what
            // we write here (e.g., to write parent+2), and need to do a workaround
            // in the reader (which knows that anyway only category 0 has a parent
            // -1).
            parentStream.Set(Math.Max(parent + 1, 1));
            Document d = new Document();

            d.Add(parentStreamField);

            fullPathField.SetStringValue(FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length));
            d.Add(fullPathField);

            // Note that we do no pass an Analyzer here because the fields that are
            // added to the Document are untokenized or contains their own TokenStream.
            // Therefore the IndexWriter's Analyzer has no effect.
            indexWriter.AddDocument(d);
            int id = nextID++;

            // added a category document, mark that ReaderManager is not up-to-date
            shouldRefreshReaderManager = true;

            // also add to the parent array
            taxoArrays = GetTaxoArrays().Add(id, parent);

            // NOTE: this line must be executed last, or else the cache gets updated
            // before the parents array (LUCENE-4596)
            AddToCache(categoryPath, id);

            return(id);
        }
        public override int GetOrdinal(FacetLabel cp)
        {
            EnsureOpen();
            if (cp.Length == 0)
            {
                return(ROOT_ORDINAL);
            }

            // First try to find the answer in the LRU cache:
            lock (ordinalCache)
            {
                IntClass res = ordinalCache.Get(cp);
                if (res != null && res.IntItem != null)
                {
                    if ((int)res.IntItem.Value < indexReader.MaxDoc)
                    {
                        // Since the cache is shared with DTR instances allocated from
                        // doOpenIfChanged, we need to ensure that the ordinal is one that
                        // this DTR instance recognizes.
                        return((int)res.IntItem.Value);
                    }
                    else
                    {
                        // if we get here, it means that the category was found in the cache,
                        // but is not recognized by this TR instance. Therefore there's no
                        // need to continue search for the path on disk, because we won't find
                        // it there too.
                        return(TaxonomyReader.INVALID_ORDINAL);
                    }
                }
            }

            // If we're still here, we have a cache miss. We need to fetch the
            // value from disk, and then also put it in the cache:
            int      ret  = TaxonomyReader.INVALID_ORDINAL;
            DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0);

            if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                ret = docs.DocID();

                // we only store the fact that a category exists, not its inexistence.
                // This is required because the caches are shared with new DTR instances
                // that are allocated from doOpenIfChanged. Therefore, if we only store
                // information about found categories, we cannot accidently tell a new
                // generation of DTR that a category does not exist.
                lock (ordinalCache)
                {
                    ordinalCache.Put(cp, new IntClass {
                        IntItem = Convert.ToInt32(ret)
                    });
                }
            }

            return(ret);
        }
Exemple #7
0
        public override int GetOrdinal(FacetLabel cp)
        {
            EnsureOpen();
            if (cp.Length == 0)
            {
                return(ROOT_ORDINAL);
            }

            // First try to find the answer in the LRU cache:

            // LUCENENET: Despite LRUHashMap being thread-safe, we get much better performance
            // if reads are separated from writes.
            ordinalCacheLock.EnterReadLock();
            try
            {
                if (ordinalCache.TryGetValue(cp, out Int32Class res))
                {
                    if (res < indexReader.MaxDoc)
                    {
                        // Since the cache is shared with DTR instances allocated from
                        // doOpenIfChanged, we need to ensure that the ordinal is one that
                        // this DTR instance recognizes.
                        return(res);
                    }
                    else
                    {
                        // if we get here, it means that the category was found in the cache,
                        // but is not recognized by this TR instance. Therefore there's no
                        // need to continue search for the path on disk, because we won't find
                        // it there too.
                        return(TaxonomyReader.INVALID_ORDINAL);
                    }
                }
            }
            finally
            {
                ordinalCacheLock.ExitReadLock();
            }

            // If we're still here, we have a cache miss. We need to fetch the
            // value from disk, and then also put it in the cache:
            int      ret  = TaxonomyReader.INVALID_ORDINAL;
            DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0);

            if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                ret = docs.DocID;

                // we only store the fact that a category exists, not its inexistence.
                // This is required because the caches are shared with new DTR instances
                // that are allocated from doOpenIfChanged. Therefore, if we only store
                // information about found categories, we cannot accidently tell a new
                // generation of DTR that a category does not exist.

                ordinalCacheLock.EnterWriteLock();
                try
                {
                    ordinalCache[cp] = ret;
                }
                finally
                {
                    ordinalCacheLock.ExitWriteLock();
                }
            }

            return(ret);
        }
Exemple #8
0
        /// <summary>
        /// Look up the given category in the cache and/or the on-disk storage,
        /// returning the category's ordinal, or a negative number in case the
        /// category does not yet exist in the taxonomy.
        /// </summary>
        protected virtual int FindCategory(FacetLabel categoryPath)
        {
            lock (this)
            {
                // If we can find the category in the cache, or we know the cache is
                // complete, we can return the response directly from it
                int res = cache.Get(categoryPath);
                if (res >= 0 || cacheIsComplete)
                {
                    return(res);
                }

                cacheMisses.IncrementAndGet();
                // After a few cache misses, it makes sense to read all the categories
                // from disk and into the cache. The reason not to do this on the first
                // cache miss (or even when opening the writer) is that it will
                // significantly slow down the case when a taxonomy is opened just to
                // add one category. The idea only spending a long time on reading
                // after enough time was spent on cache misses is known as an "online
                // algorithm".
                PerhapsFillCache();
                res = cache.Get(categoryPath);
                if (res >= 0 || cacheIsComplete)
                {
                    // if after filling the cache from the info on disk, the category is in it
                    // or the cache is complete, return whatever cache.get returned.
                    return(res);
                }

                // if we get here, it means the category is not in the cache, and it is not
                // complete, and therefore we must look for the category on disk.

                // We need to get an answer from the on-disk index.
                InitReaderManager();

                int             doc    = -1;
                DirectoryReader reader = readerManager.Acquire();
                try
                {
                    BytesRef  catTerm   = new BytesRef(FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length));
                    TermsEnum termsEnum = null; // reuse
                    DocsEnum  docs      = null; // reuse
                    foreach (AtomicReaderContext ctx in reader.Leaves)
                    {
                        Terms terms = ctx.AtomicReader.GetTerms(Consts.FULL);
                        if (terms != null)
                        {
                            termsEnum = terms.GetIterator(termsEnum);
                            if (termsEnum.SeekExact(catTerm))
                            {
                                // liveDocs=null because the taxonomy has no deletes
                                docs = termsEnum.Docs(null, docs, 0); // freqs not required
                                // if the term was found, we know it has exactly one document.
                                doc = docs.NextDoc() + ctx.DocBase;
                                break;
                            }
                        }
                    }
                }
                finally
                {
                    readerManager.Release(reader);
                }
                if (doc > 0)
                {
                    AddToCache(categoryPath, doc);
                }
                return(doc);
            }
        }