private void ProcessFacetFields(TaxonomyWriter taxoWriter, IDictionary<string, IList<FacetField>> byField, Document doc) { foreach (KeyValuePair<string, IList<FacetField>> ent in byField) { string indexFieldName = ent.Key; //System.out.println(" indexFieldName=" + indexFieldName + " fields=" + ent.getValue()); IntsRef ordinals = new IntsRef(32); foreach (FacetField facetField in ent.Value) { FacetsConfig.DimConfig ft = GetDimConfig(facetField.dim); if (facetField.path.Length > 1 && ft.Hierarchical == false) { throw new System.ArgumentException("dimension \"" + facetField.dim + "\" is not hierarchical yet has " + facetField.path.Length + " components"); } FacetLabel cp = new FacetLabel(facetField.dim, facetField.path); checkTaxoWriter(taxoWriter); int ordinal = taxoWriter.AddCategory(cp); if (ordinals.Length == ordinals.Ints.Length) { ordinals.Grow(ordinals.Length + 1); } ordinals.Ints[ordinals.Length++] = ordinal; //System.out.println("ords[" + (ordinals.length-1) + "]=" + ordinal); //System.out.println(" add cp=" + cp); if (ft.MultiValued && (ft.Hierarchical || ft.RequireDimCount)) { //System.out.println(" add parents"); // Add all parents too: int parent = taxoWriter.GetParent(ordinal); while (parent > 0) { if (ordinals.Ints.Length == ordinals.Length) { ordinals.Grow(ordinals.Length + 1); } ordinals.Ints[ordinals.Length++] = parent; parent = taxoWriter.GetParent(parent); } if (ft.RequireDimCount == false) { // Remove last (dimension) ord: ordinals.Length--; } } // Drill down: for (int i = 1; i <= cp.Length; i++) { doc.Add(new StringField(indexFieldName, PathToString(cp.Components, i), Field.Store.NO)); } } // Facet counts: // DocValues are considered stored fields: doc.Add(new BinaryDocValuesField(indexFieldName, DedupAndEncode(ordinals))); } }
private void checkTaxoWriter(TaxonomyWriter taxoWriter) { if (taxoWriter == null) { throw new ThreadStateException("a non-null TaxonomyWriter must be provided when indexing FacetField or AssociationFacetField"); } }
private void ProcessAssocFacetFields(TaxonomyWriter taxoWriter, IDictionary<string, IList<AssociationFacetField>> byField, Document doc) { foreach (KeyValuePair<string, IList<AssociationFacetField>> ent in byField) { byte[] bytes = new byte[16]; int upto = 0; string indexFieldName = ent.Key; foreach (AssociationFacetField field in ent.Value) { // NOTE: we don't add parents for associations checkTaxoWriter(taxoWriter); FacetLabel label = new FacetLabel(field.dim, field.path); int ordinal = taxoWriter.AddCategory(label); if (upto + 4 > bytes.Length) { bytes = ArrayUtil.Grow(bytes, upto + 4); } // big-endian: bytes[upto++] = (byte)(ordinal >> 24); bytes[upto++] = (byte)(ordinal >> 16); bytes[upto++] = (byte)(ordinal >> 8); bytes[upto++] = (byte)ordinal; if (upto + field.assoc.Length > bytes.Length) { bytes = ArrayUtil.Grow(bytes, upto + field.assoc.Length); } Array.Copy(field.assoc.Bytes, field.assoc.Offset, bytes, upto, field.assoc.Length); upto += field.assoc.Length; // Drill down: for (int i = 1; i <= label.Length; i++) { doc.Add(new StringField(indexFieldName, PathToString(label.Components, i), Field.Store.NO)); } } doc.Add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto))); } }
/// <summary> /// Translates any added <seealso cref="FacetField"/>s into normal fields for indexing. /// /// <para> /// <b>NOTE:</b> you should add the returned document to IndexWriter, not the /// input one! /// </para> /// </summary> public virtual Document Build(TaxonomyWriter taxoWriter, Document doc) { // Find all FacetFields, collated by the actual field: IDictionary<string, IList<FacetField>> byField = new Dictionary<string, IList<FacetField>>(); // ... and also all SortedSetDocValuesFacetFields: IDictionary<string, IList<SortedSetDocValuesFacetField>> dvByField = new Dictionary<string, IList<SortedSetDocValuesFacetField>>(); // ... and also all AssociationFacetFields IDictionary<string, IList<AssociationFacetField>> assocByField = new Dictionary<string, IList<AssociationFacetField>>(); var seenDims = new HashSet<string>(); foreach (IndexableField field in doc.Fields) { if (field.FieldType() == FacetField.TYPE) { FacetField facetField = (FacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.dim); if (dimConfig.MultiValued == false) { CheckSeen(seenDims, facetField.dim); } string indexFieldName = dimConfig.IndexFieldName; IList<FacetField> fields; if (!byField.TryGetValue(indexFieldName, out fields)) { fields = new List<FacetField>(); byField[indexFieldName] = fields; } fields.Add(facetField); } if (field.FieldType() == SortedSetDocValuesFacetField.TYPE) { var facetField = (SortedSetDocValuesFacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim); if (dimConfig.MultiValued == false) { CheckSeen(seenDims, facetField.Dim); } string indexFieldName = dimConfig.IndexFieldName; IList<SortedSetDocValuesFacetField> fields; if (!dvByField.TryGetValue(indexFieldName, out fields)) { fields = new List<SortedSetDocValuesFacetField>(); dvByField[indexFieldName] = fields; } fields.Add(facetField); } if (field.FieldType() == AssociationFacetField.TYPE) { AssociationFacetField facetField = (AssociationFacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.dim); if (dimConfig.MultiValued == false) { CheckSeen(seenDims, facetField.dim); } if (dimConfig.Hierarchical) { throw new System.ArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.dim + "\")"); } if (dimConfig.RequireDimCount) { throw new System.ArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.dim + "\")"); } string indexFieldName = dimConfig.IndexFieldName; IList<AssociationFacetField> fields; if (!assocByField.TryGetValue(indexFieldName, out fields)) { fields = new List<AssociationFacetField>(); assocByField[indexFieldName] = fields; } fields.Add(facetField); // Best effort: detect mis-matched types in same // indexed field: string type; if (facetField is IntAssociationFacetField) { type = "int"; } else if (facetField is FloatAssociationFacetField) { type = "float"; } else { type = "bytes"; } // NOTE: not thread safe, but this is just best effort: string curType; if (!assocDimTypes.TryGetValue(indexFieldName, out curType)) { assocDimTypes[indexFieldName] = type; } else if (!curType.Equals(type)) { throw new System.ArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension"); } } } Document result = new Document(); ProcessFacetFields(taxoWriter, byField, result); processSSDVFacetFields(dvByField, result); ProcessAssocFacetFields(taxoWriter, assocByField, result); //System.out.println("add stored: " + addedStoredFields); foreach (IndexableField field in doc.Fields) { IndexableFieldType ft = field.FieldType(); if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE) { result.Add(field); } } return result; }