Ejemplo n.º 1
0
        private void ProcessFacetFields(TaxonomyWriter taxoWriter, IDictionary<string, IList<FacetField>> byField, Document doc)
        {
            foreach (KeyValuePair<string, IList<FacetField>> ent in byField)
            {

                string indexFieldName = ent.Key;
                //System.out.println("  indexFieldName=" + indexFieldName + " fields=" + ent.getValue());

                IntsRef ordinals = new IntsRef(32);
                foreach (FacetField facetField in ent.Value)
                {

                    FacetsConfig.DimConfig ft = GetDimConfig(facetField.dim);
                    if (facetField.path.Length > 1 && ft.Hierarchical == false)
                    {
                        throw new System.ArgumentException("dimension \"" + facetField.dim + "\" is not hierarchical yet has " + facetField.path.Length + " components");
                    }

                    FacetLabel cp = new FacetLabel(facetField.dim, facetField.path);

                    checkTaxoWriter(taxoWriter);
                    int ordinal = taxoWriter.AddCategory(cp);
                    if (ordinals.Length == ordinals.Ints.Length)
                    {
                        ordinals.Grow(ordinals.Length + 1);
                    }
                    ordinals.Ints[ordinals.Length++] = ordinal;
                    //System.out.println("ords[" + (ordinals.length-1) + "]=" + ordinal);
                    //System.out.println("  add cp=" + cp);

                    if (ft.MultiValued && (ft.Hierarchical || ft.RequireDimCount))
                    {
                        //System.out.println("  add parents");
                        // Add all parents too:
                        int parent = taxoWriter.GetParent(ordinal);
                        while (parent > 0)
                        {
                            if (ordinals.Ints.Length == ordinals.Length)
                            {
                                ordinals.Grow(ordinals.Length + 1);
                            }
                            ordinals.Ints[ordinals.Length++] = parent;
                            parent = taxoWriter.GetParent(parent);
                        }

                        if (ft.RequireDimCount == false)
                        {
                            // Remove last (dimension) ord:
                            ordinals.Length--;
                        }
                    }

                    // Drill down:
                    for (int i = 1; i <= cp.Length; i++)
                    {
                        doc.Add(new StringField(indexFieldName, PathToString(cp.Components, i), Field.Store.NO));
                    }
                }

                // Facet counts:
                // DocValues are considered stored fields:
                doc.Add(new BinaryDocValuesField(indexFieldName, DedupAndEncode(ordinals)));
            }
        }
Ejemplo n.º 2
0
 private void checkTaxoWriter(TaxonomyWriter taxoWriter)
 {
     if (taxoWriter == null)
     {
         throw new ThreadStateException("a non-null TaxonomyWriter must be provided when indexing FacetField or AssociationFacetField");
     }
 }
Ejemplo n.º 3
0
        private void ProcessAssocFacetFields(TaxonomyWriter taxoWriter, IDictionary<string, IList<AssociationFacetField>> byField, Document doc)
        {
            foreach (KeyValuePair<string, IList<AssociationFacetField>> ent in byField)
            {
                byte[] bytes = new byte[16];
                int upto = 0;
                string indexFieldName = ent.Key;
                foreach (AssociationFacetField field in ent.Value)
                {
                    // NOTE: we don't add parents for associations
                    checkTaxoWriter(taxoWriter);
                    FacetLabel label = new FacetLabel(field.dim, field.path);
                    int ordinal = taxoWriter.AddCategory(label);
                    if (upto + 4 > bytes.Length)
                    {
                        bytes = ArrayUtil.Grow(bytes, upto + 4);
                    }
                    // big-endian:
                    bytes[upto++] = (byte)(ordinal >> 24);
                    bytes[upto++] = (byte)(ordinal >> 16);
                    bytes[upto++] = (byte)(ordinal >> 8);
                    bytes[upto++] = (byte)ordinal;
                    if (upto + field.assoc.Length > bytes.Length)
                    {
                        bytes = ArrayUtil.Grow(bytes, upto + field.assoc.Length);
                    }
                    Array.Copy(field.assoc.Bytes, field.assoc.Offset, bytes, upto, field.assoc.Length);
                    upto += field.assoc.Length;

                    // Drill down:
                    for (int i = 1; i <= label.Length; i++)
                    {
                        doc.Add(new StringField(indexFieldName, PathToString(label.Components, i), Field.Store.NO));
                    }
                }
                doc.Add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto)));
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Translates any added <seealso cref="FacetField"/>s into normal fields for indexing.
        /// 
        /// <para>
        /// <b>NOTE:</b> you should add the returned document to IndexWriter, not the
        /// input one!
        /// </para>
        /// </summary>
        public virtual Document Build(TaxonomyWriter taxoWriter, Document doc)
        {
            // Find all FacetFields, collated by the actual field:
            IDictionary<string, IList<FacetField>> byField = new Dictionary<string, IList<FacetField>>();

            // ... and also all SortedSetDocValuesFacetFields:
            IDictionary<string, IList<SortedSetDocValuesFacetField>> dvByField = new Dictionary<string, IList<SortedSetDocValuesFacetField>>();

            // ... and also all AssociationFacetFields
            IDictionary<string, IList<AssociationFacetField>> assocByField = new Dictionary<string, IList<AssociationFacetField>>();

            var seenDims = new HashSet<string>();

            foreach (IndexableField field in doc.Fields)
            {
                if (field.FieldType() == FacetField.TYPE)
                {
                    FacetField facetField = (FacetField)field;
                    FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.dim);
                    if (dimConfig.MultiValued == false)
                    {
                        CheckSeen(seenDims, facetField.dim);
                    }
                    string indexFieldName = dimConfig.IndexFieldName;
                    IList<FacetField> fields;
                    if (!byField.TryGetValue(indexFieldName, out fields))
                    {
                        fields = new List<FacetField>();
                        byField[indexFieldName] = fields;
                    }
                    fields.Add(facetField);
                }

                if (field.FieldType() == SortedSetDocValuesFacetField.TYPE)
                {
                    var facetField = (SortedSetDocValuesFacetField)field;
                    FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim);
                    if (dimConfig.MultiValued == false)
                    {
                        CheckSeen(seenDims, facetField.Dim);
                    }
                    string indexFieldName = dimConfig.IndexFieldName;
                    IList<SortedSetDocValuesFacetField> fields;
                    if (!dvByField.TryGetValue(indexFieldName, out fields))
                    {
                        fields = new List<SortedSetDocValuesFacetField>();
                        dvByField[indexFieldName] = fields;
                    }
                    fields.Add(facetField);
                }

                if (field.FieldType() == AssociationFacetField.TYPE)
                {
                    AssociationFacetField facetField = (AssociationFacetField)field;
                    FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.dim);
                    if (dimConfig.MultiValued == false)
                    {
                        CheckSeen(seenDims, facetField.dim);
                    }
                    if (dimConfig.Hierarchical)
                    {
                        throw new System.ArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.dim + "\")");
                    }
                    if (dimConfig.RequireDimCount)
                    {
                        throw new System.ArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.dim + "\")");
                    }

                    string indexFieldName = dimConfig.IndexFieldName;
                    IList<AssociationFacetField> fields;
                    if (!assocByField.TryGetValue(indexFieldName, out fields))
                    {
                        fields = new List<AssociationFacetField>();
                        assocByField[indexFieldName] = fields;
                    }
                    fields.Add(facetField);

                    // Best effort: detect mis-matched types in same
                    // indexed field:
                    string type;
                    if (facetField is IntAssociationFacetField)
                    {
                        type = "int";
                    }
                    else if (facetField is FloatAssociationFacetField)
                    {
                        type = "float";
                    }
                    else
                    {
                        type = "bytes";
                    }
                    // NOTE: not thread safe, but this is just best effort:
                    string curType;
                    if (!assocDimTypes.TryGetValue(indexFieldName, out curType))
                    {
                        assocDimTypes[indexFieldName] = type;
                    }
                    else if (!curType.Equals(type))
                    {
                        throw new System.ArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension");
                    }
                }
            }

            Document result = new Document();

            ProcessFacetFields(taxoWriter, byField, result);
            processSSDVFacetFields(dvByField, result);
            ProcessAssocFacetFields(taxoWriter, assocByField, result);

            //System.out.println("add stored: " + addedStoredFields);

            foreach (IndexableField field in doc.Fields)
            {
                IndexableFieldType ft = field.FieldType();
                if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE)
                {
                    result.Add(field);
                }
            }

            return result;
        }