public override void Run() { Random random = Random; while (numDocs.DecrementAndGet() > 0) { try { Document doc = new Document(); int numCats = random.Next(3) + 1; // 1-3 while (numCats-- > 0) { FacetField ff = NewCategory(); doc.Add(ff); FacetLabel label = new FacetLabel(ff.Dim, ff.Path); // add all prefixes to values int level = label.Length; while (level > 0) { string s = FacetsConfig.PathToString(label.Components, level); values[s] = s; --level; } } iw.AddDocument(config.Build(tw, doc)); } catch (IOException e) { throw new Exception(e.ToString(), e); } } }
public override void Run() { Random random = Random(); while (numCats.DecrementAndGet() > 0) { try { int value = random.Next(range); FacetLabel cp = new FacetLabel(Convert.ToString(value / 1000), Convert.ToString(value / 10000), Convert.ToString(value / 100000), Convert.ToString(value)); int ord = tw.AddCategory(cp); Assert.True(tw.GetParent(ord) != -1, "invalid parent for ordinal " + ord + ", category " + cp); string l1 = FacetsConfig.PathToString(cp.Components, 1); string l2 = FacetsConfig.PathToString(cp.Components, 2); string l3 = FacetsConfig.PathToString(cp.Components, 3); string l4 = FacetsConfig.PathToString(cp.Components, 4); values[l1] = l1; values[l2] = l2; values[l3] = l3; values[l4] = l4; } catch (IOException e) { throw new Exception(e.Message, e); } } }
public override void Run() { Random random = Random; while (numCats.DecrementAndGet() > 0) { try { int value = random.Next(range); FacetLabel cp = new FacetLabel( Convert.ToString(value / 1000, CultureInfo.InvariantCulture), Convert.ToString(value / 10000, CultureInfo.InvariantCulture), Convert.ToString(value / 100000, CultureInfo.InvariantCulture), Convert.ToString(value, CultureInfo.InvariantCulture)); int ord = tw.AddCategory(cp); Assert.IsTrue(tw.GetParent(ord) != -1, "invalid parent for ordinal " + ord + ", category " + cp); string l1 = FacetsConfig.PathToString(cp.Components, 1); string l2 = FacetsConfig.PathToString(cp.Components, 2); string l3 = FacetsConfig.PathToString(cp.Components, 3); string l4 = FacetsConfig.PathToString(cp.Components, 4); values[l1] = l1; values[l2] = l2; values[l3] = l3; values[l4] = l4; } catch (Exception e) when(e.IsIOException()) { throw RuntimeException.Create(e); } } }
public override float GetSpecificValue(string dim, params string[] path) { if (path.Length != 1) { throw new System.ArgumentException("path must be length=1"); } int ord = (int)dv.LookupTerm(new BytesRef(FacetsConfig.PathToString(dim, path))); if (ord < 0) { return(-1); } return(counts[ord]); }
/// <summary> /// Note that the methods calling <see cref="AddCategoryDocument"/> are synchornized, so /// this method is effectively synchronized as well. /// </summary> private int AddCategoryDocument(FacetLabel categoryPath, int parent) { // Before Lucene 2.9, position increments >=0 were supported, so we // added 1 to parent to allow the parent -1 (the parent of the root). // Unfortunately, starting with Lucene 2.9, after LUCENE-1542, this is // no longer enough, since 0 is not encoded consistently either (see // comment in SinglePositionTokenStream). But because we must be // backward-compatible with existing indexes, we can't just fix what // we write here (e.g., to write parent+2), and need to do a workaround // in the reader (which knows that anyway only category 0 has a parent // -1). parentStream.Set(Math.Max(parent + 1, 1)); Document d = new Document(); d.Add(parentStreamField); fullPathField.SetStringValue(FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length)); d.Add(fullPathField); // Note that we do no pass an Analyzer here because the fields that are // added to the Document are untokenized or contains their own TokenStream. // Therefore the IndexWriter's Analyzer has no effect. indexWriter.AddDocument(d); int id = nextID++; // added a category document, mark that ReaderManager is not up-to-date shouldRefreshReaderManager = true; // also add to the parent array taxoArrays = GetTaxoArrays().Add(id, parent); // NOTE: this line must be executed last, or else the cache gets updated // before the parents array (LUCENE-4596) AddToCache(categoryPath, id); return(id); }
public override int GetOrdinal(FacetLabel cp) { EnsureOpen(); if (cp.Length == 0) { return(ROOT_ORDINAL); } // First try to find the answer in the LRU cache: lock (ordinalCache) { IntClass res = ordinalCache.Get(cp); if (res != null && res.IntItem != null) { if ((int)res.IntItem.Value < indexReader.MaxDoc) { // Since the cache is shared with DTR instances allocated from // doOpenIfChanged, we need to ensure that the ordinal is one that // this DTR instance recognizes. return((int)res.IntItem.Value); } else { // if we get here, it means that the category was found in the cache, // but is not recognized by this TR instance. Therefore there's no // need to continue search for the path on disk, because we won't find // it there too. return(TaxonomyReader.INVALID_ORDINAL); } } } // If we're still here, we have a cache miss. We need to fetch the // value from disk, and then also put it in the cache: int ret = TaxonomyReader.INVALID_ORDINAL; DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0); if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { ret = docs.DocID(); // we only store the fact that a category exists, not its inexistence. // This is required because the caches are shared with new DTR instances // that are allocated from doOpenIfChanged. Therefore, if we only store // information about found categories, we cannot accidently tell a new // generation of DTR that a category does not exist. lock (ordinalCache) { ordinalCache.Put(cp, new IntClass { IntItem = Convert.ToInt32(ret) }); } } return(ret); }
public override int GetOrdinal(FacetLabel cp) { EnsureOpen(); if (cp.Length == 0) { return(ROOT_ORDINAL); } // First try to find the answer in the LRU cache: // LUCENENET: Despite LRUHashMap being thread-safe, we get much better performance // if reads are separated from writes. ordinalCacheLock.EnterReadLock(); try { if (ordinalCache.TryGetValue(cp, out Int32Class res)) { if (res < indexReader.MaxDoc) { // Since the cache is shared with DTR instances allocated from // doOpenIfChanged, we need to ensure that the ordinal is one that // this DTR instance recognizes. return(res); } else { // if we get here, it means that the category was found in the cache, // but is not recognized by this TR instance. Therefore there's no // need to continue search for the path on disk, because we won't find // it there too. return(TaxonomyReader.INVALID_ORDINAL); } } } finally { ordinalCacheLock.ExitReadLock(); } // If we're still here, we have a cache miss. We need to fetch the // value from disk, and then also put it in the cache: int ret = TaxonomyReader.INVALID_ORDINAL; DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0); if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { ret = docs.DocID; // we only store the fact that a category exists, not its inexistence. // This is required because the caches are shared with new DTR instances // that are allocated from doOpenIfChanged. Therefore, if we only store // information about found categories, we cannot accidently tell a new // generation of DTR that a category does not exist. ordinalCacheLock.EnterWriteLock(); try { ordinalCache[cp] = ret; } finally { ordinalCacheLock.ExitWriteLock(); } } return(ret); }
/// <summary> /// Look up the given category in the cache and/or the on-disk storage, /// returning the category's ordinal, or a negative number in case the /// category does not yet exist in the taxonomy. /// </summary> protected virtual int FindCategory(FacetLabel categoryPath) { lock (this) { // If we can find the category in the cache, or we know the cache is // complete, we can return the response directly from it int res = cache.Get(categoryPath); if (res >= 0 || cacheIsComplete) { return(res); } cacheMisses.IncrementAndGet(); // After a few cache misses, it makes sense to read all the categories // from disk and into the cache. The reason not to do this on the first // cache miss (or even when opening the writer) is that it will // significantly slow down the case when a taxonomy is opened just to // add one category. The idea only spending a long time on reading // after enough time was spent on cache misses is known as an "online // algorithm". PerhapsFillCache(); res = cache.Get(categoryPath); if (res >= 0 || cacheIsComplete) { // if after filling the cache from the info on disk, the category is in it // or the cache is complete, return whatever cache.get returned. return(res); } // if we get here, it means the category is not in the cache, and it is not // complete, and therefore we must look for the category on disk. // We need to get an answer from the on-disk index. InitReaderManager(); int doc = -1; DirectoryReader reader = readerManager.Acquire(); try { BytesRef catTerm = new BytesRef(FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length)); TermsEnum termsEnum = null; // reuse DocsEnum docs = null; // reuse foreach (AtomicReaderContext ctx in reader.Leaves) { Terms terms = ctx.AtomicReader.GetTerms(Consts.FULL); if (terms != null) { termsEnum = terms.GetIterator(termsEnum); if (termsEnum.SeekExact(catTerm)) { // liveDocs=null because the taxonomy has no deletes docs = termsEnum.Docs(null, docs, 0); // freqs not required // if the term was found, we know it has exactly one document. doc = docs.NextDoc() + ctx.DocBase; break; } } } } finally { readerManager.Release(reader); } if (doc > 0) { AddToCache(categoryPath, doc); } return(doc); } }