private static void CheckTaxoWriter(ITaxonomyWriter taxoWriter) // LUCENENET: CA1822: Mark members as static { if (taxoWriter == null) { throw new ThreadStateException("a non-null ITaxonomyWriter must be provided when indexing FacetField or AssociationFacetField"); } }
private void CheckTaxoWriter(ITaxonomyWriter taxoWriter) { if (taxoWriter == null) { throw new ThreadStateException("a non-null ITaxonomyWriter must be provided when indexing FacetField or AssociationFacetField"); } }
private Document NewDocument(ITaxonomyWriter taxoWriter, int id) { Document doc = new Document(); doc.Add(new FacetField("A", id.ToString("X"))); return(config.Build(taxoWriter, doc)); }
private Document NewDocument(ITaxonomyWriter taxoWriter) { FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("A", "1")); return(config.Build(taxoWriter, doc)); }
public IndexerThread(IndexWriter w, FacetsConfig config, ITaxonomyWriter tw, ReferenceManager <SearcherAndTaxonomy> mgr, int ordLimit, AtomicBoolean stop) { this.w = w; this.config = config; this.tw = tw; this.mgr = mgr; this.ordLimit = ordLimit; this.stop = stop; }
private static void seedIndex(ITaxonomyWriter tw, RandomIndexWriter iw, FacetsConfig config) { foreach (FacetField ff in CATEGORIES) { Document doc = new Document(); doc.Add(ff); doc.Add(new TextField("content", "alpha", Field.Store.YES)); iw.AddDocument(config.Build(tw, doc)); } }
private void doTestReadRecreatedTaxonomy(Random random, bool closeReader) { Directory dir = null; ITaxonomyWriter tw = null; TaxonomyReader tr = null; // prepare a few categories int n = 10; FacetLabel[] cp = new FacetLabel[n]; for (int i = 0; i < n; i++) { cp[i] = new FacetLabel("a", Convert.ToString(i, CultureInfo.InvariantCulture)); } try { dir = NewDirectory(); tw = new DirectoryTaxonomyWriter(dir); tw.AddCategory(new FacetLabel("a")); tw.Dispose(); tr = new DirectoryTaxonomyReader(dir); int baseNumCategories = tr.Count; for (int i = 0; i < n; i++) { int k = random.Next(n); tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE); for (int j = 0; j <= k; j++) { tw.AddCategory(cp[j]); } tw.Dispose(); if (closeReader) { tr.Dispose(); tr = new DirectoryTaxonomyReader(dir); } else { var newtr = TaxonomyReader.OpenIfChanged(tr); Assert.IsNotNull(newtr); tr.Dispose(); tr = newtr; } Assert.AreEqual(baseNumCategories + 1 + k, tr.Count, "Wrong #categories in taxonomy (i=" + i + ", k=" + k + ")"); } } finally { IOUtils.Dispose(tr, tw, dir); } }
private static void IndexDocsWithFacetsNoTerms(IndexWriter indexWriter, ITaxonomyWriter taxoWriter, IDictionary <string, int?> expectedCounts) { Random random = Random; int numDocs = AtLeast(random, 2); FacetsConfig config = Config; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); AddFacets(doc, config, false); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } indexWriter.Commit(); // flush a segment }
public override int DoLogic() { ITaxonomyWriter taxonomyWriter = RunData.TaxonomyWriter; if (taxonomyWriter != null) { taxonomyWriter.Commit(); } else { throw IllegalStateException.Create("TaxonomyWriter is not currently open"); } return(1); }
public override int DoLogic() { ITaxonomyWriter taxonomyWriter = RunData.TaxonomyWriter; if (taxonomyWriter != null) { taxonomyWriter.Commit(); } else { throw new InvalidOperationException("TaxonomyWriter is not currently open"); } return(1); }
private static void IndexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, ITaxonomyWriter taxoWriter, IDictionary <string, int?> expectedCounts) { Random random = Random; int numDocs = AtLeast(random, 2); FacetsConfig config = Config; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); bool hasContent = random.NextBoolean(); if (hasContent) { AddField(doc); } AddFacets(doc, config, hasContent); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } indexWriter.Commit(); // flush a segment }
private void indexTwoDocs(ITaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, bool withContent) { for (int i = 0; i < 2; i++) { Document doc = new Document(); if (withContent) { doc.Add(new StringField("f", "a", Field.Store.NO)); } if (config != null) { doc.Add(new FacetField("A", Convert.ToString(i))); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } else { indexWriter.AddDocument(doc); } } indexWriter.Commit(); }
private void ProcessAssocFacetFields(ITaxonomyWriter taxoWriter, IDictionary <string, IList <AssociationFacetField> > byField, Document doc) { foreach (KeyValuePair <string, IList <AssociationFacetField> > ent in byField) { byte[] bytes = new byte[16]; int upto = 0; string indexFieldName = ent.Key; foreach (AssociationFacetField field in ent.Value) { // NOTE: we don't add parents for associations CheckTaxoWriter(taxoWriter); FacetLabel label = new FacetLabel(field.Dim, field.Path); int ordinal = taxoWriter.AddCategory(label); if (upto + 4 > bytes.Length) { bytes = ArrayUtil.Grow(bytes, upto + 4); } // big-endian: bytes[upto++] = (byte)(ordinal >> 24); bytes[upto++] = (byte)(ordinal >> 16); bytes[upto++] = (byte)(ordinal >> 8); bytes[upto++] = (byte)ordinal; if (upto + field.Assoc.Length > bytes.Length) { bytes = ArrayUtil.Grow(bytes, upto + field.Assoc.Length); } Array.Copy(field.Assoc.Bytes, field.Assoc.Offset, bytes, upto, field.Assoc.Length); upto += field.Assoc.Length; // Drill down: for (int i = 1; i <= label.Length; i++) { doc.Add(new StringField(indexFieldName, PathToString(label.Components, i), Field.Store.NO)); } } doc.Add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto))); } }
// clean old stuff, reopen public virtual void Reinit(bool eraseIndex) { // cleanup index IOUtils.Dispose(indexWriter, indexReader, directory); indexWriter = null; indexReader = null; IOUtils.Dispose(taxonomyWriter, taxonomyReader, taxonomyDir); taxonomyWriter = null; taxonomyReader = null; // directory (default is ram-dir). directory = CreateDirectory(eraseIndex, "index", "directory"); taxonomyDir = CreateDirectory(eraseIndex, "taxo", "taxonomy.directory"); // inputs ResetInputs(); // release unused stuff GC.Collect(); // Re-init clock SetStartTimeMillis(); }
/// <summary> /// fillTaxonomy adds the categories in the categories[] array, and asserts /// that the additions return exactly the ordinals (in the past - paths) /// specified in expectedPaths[]. /// Note that this assumes that fillTaxonomy() is called on an empty taxonomy /// index. Calling it after something else was already added to the taxonomy /// index will surely have this method fail. /// </summary> public static void FillTaxonomy(ITaxonomyWriter tw) { for (int i = 0; i < categories.Length; i++) { int ordinal = tw.AddCategory(new FacetLabel(categories[i])); int expectedOrdinal = ExpectedPaths[i][ExpectedPaths[i].Length - 1]; if (ordinal != expectedOrdinal) { Fail("For category " + Showcat(categories[i]) + " expected ordinal " + expectedOrdinal + ", but got " + ordinal); } } }
private void ProcessFacetFields(ITaxonomyWriter taxoWriter, IDictionary <string, IList <FacetField> > byField, Document doc) { foreach (KeyValuePair <string, IList <FacetField> > ent in byField) { string indexFieldName = ent.Key; //System.out.println(" indexFieldName=" + indexFieldName + " fields=" + ent.getValue()); Int32sRef ordinals = new Int32sRef(32); foreach (FacetField facetField in ent.Value) { FacetsConfig.DimConfig ft = GetDimConfig(facetField.Dim); if (facetField.Path.Length > 1 && ft.IsHierarchical == false) { throw new ArgumentException("dimension \"" + facetField.Dim + "\" is not hierarchical yet has " + facetField.Path.Length + " components"); } FacetLabel cp = new FacetLabel(facetField.Dim, facetField.Path); CheckTaxoWriter(taxoWriter); int ordinal = taxoWriter.AddCategory(cp); if (ordinals.Length == ordinals.Int32s.Length) { ordinals.Grow(ordinals.Length + 1); } ordinals.Int32s[ordinals.Length++] = ordinal; //System.out.println("ords[" + (ordinals.length-1) + "]=" + ordinal); //System.out.println(" add cp=" + cp); if (ft.IsMultiValued && (ft.IsHierarchical || ft.RequireDimCount)) { //System.out.println(" add parents"); // Add all parents too: int parent = taxoWriter.GetParent(ordinal); while (parent > 0) { if (ordinals.Int32s.Length == ordinals.Length) { ordinals.Grow(ordinals.Length + 1); } ordinals.Int32s[ordinals.Length++] = parent; parent = taxoWriter.GetParent(parent); } if (ft.RequireDimCount == false) { // Remove last (dimension) ord: ordinals.Length--; } } // Drill down: for (int i = 1; i <= cp.Length; i++) { doc.Add(new StringField(indexFieldName, PathToString(cp.Components, i), Field.Store.NO)); } } // Facet counts: // DocValues are considered stored fields: doc.Add(new BinaryDocValuesField(indexFieldName, DedupAndEncode(ordinals))); } }
/// <summary> /// Translates any added <see cref="FacetField"/>s into normal fields for indexing. /// /// <para> /// <b>NOTE:</b> you should add the returned document to <see cref="Index.IndexWriter"/>, not the /// input one! /// </para> /// </summary> public virtual Document Build(ITaxonomyWriter taxoWriter, Document doc) { // Find all FacetFields, collated by the actual field: IDictionary <string, IList <FacetField> > byField = new Dictionary <string, IList <FacetField> >(); // ... and also all SortedSetDocValuesFacetFields: IDictionary <string, IList <SortedSetDocValuesFacetField> > dvByField = new Dictionary <string, IList <SortedSetDocValuesFacetField> >(); // ... and also all AssociationFacetFields IDictionary <string, IList <AssociationFacetField> > assocByField = new Dictionary <string, IList <AssociationFacetField> >(); var seenDims = new JCG.HashSet <string>(); foreach (IIndexableField field in doc.Fields) { if (field.IndexableFieldType == FacetField.TYPE) { FacetField facetField = (FacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim); if (dimConfig.IsMultiValued == false) { CheckSeen(seenDims, facetField.Dim); } string indexFieldName = dimConfig.IndexFieldName; if (!byField.TryGetValue(indexFieldName, out IList <FacetField> fields)) { fields = new List <FacetField>(); byField[indexFieldName] = fields; } fields.Add(facetField); } if (field.IndexableFieldType == SortedSetDocValuesFacetField.TYPE) { var facetField = (SortedSetDocValuesFacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim); if (dimConfig.IsMultiValued == false) { CheckSeen(seenDims, facetField.Dim); } string indexFieldName = dimConfig.IndexFieldName; if (!dvByField.TryGetValue(indexFieldName, out IList <SortedSetDocValuesFacetField> fields)) { fields = new List <SortedSetDocValuesFacetField>(); dvByField[indexFieldName] = fields; } fields.Add(facetField); } if (field.IndexableFieldType == AssociationFacetField.TYPE) { AssociationFacetField facetField = (AssociationFacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim); if (dimConfig.IsMultiValued == false) { CheckSeen(seenDims, facetField.Dim); } if (dimConfig.IsHierarchical) { throw new ArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.Dim + "\")"); } if (dimConfig.RequireDimCount) { throw new ArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.Dim + "\")"); } string indexFieldName = dimConfig.IndexFieldName; if (!assocByField.TryGetValue(indexFieldName, out IList <AssociationFacetField> fields)) { fields = new List <AssociationFacetField>(); assocByField[indexFieldName] = fields; } fields.Add(facetField); // Best effort: detect mis-matched types in same // indexed field: string type; if (facetField is Int32AssociationFacetField) { type = "int"; } else if (facetField is SingleAssociationFacetField) { type = "float"; } else { type = "bytes"; } // NOTE: not thread safe, but this is just best effort: if (!assocDimTypes.TryGetValue(indexFieldName, out string curType)) { assocDimTypes[indexFieldName] = type; } else if (!curType.Equals(type, StringComparison.Ordinal)) { throw new ArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension"); } } } Document result = new Document(); ProcessFacetFields(taxoWriter, byField, result); ProcessSSDVFacetFields(dvByField, result); ProcessAssocFacetFields(taxoWriter, assocByField, result); //System.out.println("add stored: " + addedStoredFields); foreach (IIndexableField field in doc.Fields) { IIndexableFieldType ft = field.IndexableFieldType; if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE) { result.Add(field); } } return(result); }
/// <summary> /// fillTaxonomyCheckPaths adds the categories in the categories[] array, /// and asserts that the additions return exactly paths specified in /// expectedPaths[]. This is the same add fillTaxonomy() but also checks /// the correctness of getParent(), not just addCategory(). /// Note that this assumes that fillTaxonomyCheckPaths() is called on an empty /// taxonomy index. Calling it after something else was already added to the /// taxonomy index will surely have this method fail. /// </summary> public static void FillTaxonomyCheckPaths(ITaxonomyWriter tw) { for (int i = 0; i < categories.Length; i++) { int ordinal = tw.AddCategory(new FacetLabel(categories[i])); int expectedOrdinal = ExpectedPaths[i][ExpectedPaths[i].Length - 1]; if (ordinal != expectedOrdinal) { Fail("For category " + Showcat(categories[i]) + " expected ordinal " + expectedOrdinal + ", but got " + ordinal); } for (int j = ExpectedPaths[i].Length - 2; j >= 0; j--) { ordinal = tw.GetParent(ordinal); expectedOrdinal = ExpectedPaths[i][j]; if (ordinal != expectedOrdinal) { Fail("For category " + Showcat(categories[i]) + " expected ancestor level " + (ExpectedPaths[i].Length - 1 - j) + " was " + expectedOrdinal + ", but got " + ordinal); } } } }
// After fillTaxonomy returned successfully, checkPaths() checks that // the getParent() calls return as expected, from the table public static void CheckPaths(ITaxonomyWriter tw) { for (int i = 0; i < categories.Length; i++) { int ordinal = ExpectedPaths[i][ExpectedPaths[i].Length - 1]; for (int j = ExpectedPaths[i].Length - 2; j >= 0; j--) { ordinal = tw.GetParent(ordinal); int expectedOrdinal = ExpectedPaths[i][j]; if (ordinal != expectedOrdinal) { Fail("For category " + Showcat(categories[i]) + " expected ancestor level " + (ExpectedPaths[i].Length - 1 - j) + " was " + expectedOrdinal + ", but got " + ordinal); } } Assert.AreEqual(TaxonomyReader.ROOT_ORDINAL, tw.GetParent(ExpectedPaths[i][0])); } Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tw.GetParent(TaxonomyReader.ROOT_ORDINAL)); }
private void CheckWriterParent(TaxonomyReader tr, ITaxonomyWriter tw) { // check that the parent of the root ordinal is the invalid ordinal: Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tw.GetParent(0)); // check parent of non-root ordinals: for (int ordinal = 1; ordinal < tr.Count; ordinal++) { FacetLabel me = tr.GetPath(ordinal); int parentOrdinal = tw.GetParent(ordinal); FacetLabel parent = tr.GetPath(parentOrdinal); if (parent == null) { Fail("Parent of " + ordinal + " is " + parentOrdinal + ", but this is not a valid category."); } // verify that the parent is indeed my parent, according to the // strings if (!me.Subpath(me.Length - 1).Equals(parent)) { Fail("Got parent " + parentOrdinal + " for ordinal " + ordinal + " but categories are " + Showcat(parent) + " and " + Showcat(me) + " respectively."); } } // check parent of of invalid ordinals: try { tw.GetParent(-1); Fail("getParent for -1 should throw exception"); } catch (System.IndexOutOfRangeException) { // ok } try { tw.GetParent(TaxonomyReader.INVALID_ORDINAL); Fail("getParent for INVALID_ORDINAL should throw exception"); } catch (System.IndexOutOfRangeException) { // ok } try { int parent = tw.GetParent(tr.Count); Fail("getParent for getSize() should throw exception, but returned " + parent); } catch (System.IndexOutOfRangeException) { // ok } }
public void Save(ITaxonomyWriter writer) { writer.Write(this); }
private static void IndexDocsWithFacetsNoTerms(IndexWriter indexWriter, ITaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts) { Random random = Random(); int numDocs = AtLeast(random, 2); FacetsConfig config = Config; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); AddFacets(doc, config, false); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } indexWriter.Commit(); // flush a segment }
private static void IndexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, ITaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts) { Random random = Random(); int numDocs = AtLeast(random, 2); FacetsConfig config = Config; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); bool hasContent = random.NextBoolean(); if (hasContent) { AddField(doc); } AddFacets(doc, config, hasContent); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } indexWriter.Commit(); // flush a segment }