public override void Get(int docID, IntsRef ordinals) { BytesRef bytes = new BytesRef(); values.Get(docID, bytes); outerInstance.Decode(bytes, ordinals); }
/// <summary> /// Subclass & override if you change the encoding. </summary> protected virtual void Decode(BytesRef buf, IntsRef ordinals) { // grow the buffer up front, even if by a large number of values (buf.length) // that saves the need to check inside the loop for every decoded value if // the buffer needs to grow. if (ordinals.Ints.Length < buf.Length) { ordinals.Ints = ArrayUtil.Grow(ordinals.Ints, buf.Length); } ordinals.Offset = 0; ordinals.Length = 0; // it is better if the decoding is inlined like so, and not e.g. // in a utility method int upto = buf.Offset + buf.Length; int value = 0; int offset = buf.Offset; int prev = 0; while (offset < upto) { byte b = buf.Bytes[offset++]; if ((sbyte)b >= 0) { ordinals.Ints[ordinals.Length] = ((value << 7) | b) + prev; value = 0; prev = ordinals.Ints[ordinals.Length]; ordinals.Length++; } else { value = (value << 7) | (b & 0x7F); } } }
/// <summary> /// Get the ordinals for this document. The <paramref name="ordinals"/>.<see cref="IntsRef.Offset"/> /// must always be 0! /// </summary> public abstract void Get(int doc, IntsRef ordinals);
/// <summary> /// Subclass & override if you change the encoding. /// </summary> protected virtual void Decode(BytesRef buf, IntsRef ordinals) { // grow the buffer up front, even if by a large number of values (buf.length) // that saves the need to check inside the loop for every decoded value if // the buffer needs to grow. if (ordinals.Ints.Length < buf.Length) { ordinals.Ints = ArrayUtil.Grow(ordinals.Ints, buf.Length); } ordinals.Offset = 0; ordinals.Length = 0; // it is better if the decoding is inlined like so, and not e.g. // in a utility method int upto = buf.Offset + buf.Length; int value = 0; int offset = buf.Offset; int prev = 0; while (offset < upto) { byte b = buf.Bytes[offset++]; if ((sbyte)b >= 0) { ordinals.Ints[ordinals.Length] = ((value << 7) | b) + prev; value = 0; prev = ordinals.Ints[ordinals.Length]; ordinals.Length++; } else { value = (value << 7) | (b & 0x7F); } } }
public SortedSetDocValuesAnonymousInnerClassHelper(FSTEntry entry, BinaryDocValues docToOrds, FST<long?> fst, FST.BytesReader @in, FST.Arc<long?> firstArc, FST.Arc<long?> scratchArc, IntsRef scratchInts, BytesRefFSTEnum<long?> fstEnum, BytesRef @ref, ByteArrayDataInput input) { this.Entry = entry; this.DocToOrds = docToOrds; this.Fst = fst; this.@in = @in; this.FirstArc = firstArc; this.ScratchArc = scratchArc; this.ScratchInts = scratchInts; this.FstEnum = fstEnum; this.@ref = @ref; this.Input = input; }
public SortedDocValuesAnonymousInnerClassHelper(FSTEntry entry, NumericDocValues docToOrd, FST<long?> fst, FST.BytesReader @in, FST.Arc<long?> firstArc, FST.Arc<long?> scratchArc, IntsRef scratchInts, BytesRefFSTEnum<long?> fstEnum) { this.Entry = entry; this.DocToOrd = docToOrd; this.Fst = fst; this.@in = @in; this.FirstArc = firstArc; this.ScratchArc = scratchArc; this.ScratchInts = scratchInts; this.FstEnum = fstEnum; }
public override SortedSetDocValues GetSortedSet(FieldInfo field) { FSTEntry entry = Fsts[field.Number]; if (entry.NumOrds == 0) { return DocValues.EMPTY_SORTED_SET; // empty FST! } FST<long?> instance; lock (this) { if (!FstInstances.TryGetValue(field.Number, out instance)) { Data.Seek(entry.Offset); instance = new FST<long?>((DataInput)Data, Lucene.Net.Util.Fst.PositiveIntOutputs.Singleton); RamBytesUsed_Renamed.AddAndGet(instance.SizeInBytes()); FstInstances[field.Number] = instance; } } BinaryDocValues docToOrds = GetBinary(field); FST<long?> fst = instance; // per-thread resources var @in = fst.BytesReader; var firstArc = new FST.Arc<long?>(); var scratchArc = new FST.Arc<long?>(); var scratchInts = new IntsRef(); var fstEnum = new BytesRefFSTEnum<long?>(fst); var @ref = new BytesRef(); var input = new ByteArrayDataInput(); return new SortedSetDocValuesAnonymousInnerClassHelper(entry, docToOrds, fst, @in, firstArc, scratchArc, scratchInts, fstEnum, @ref, input); }
public override SortedDocValues GetSorted(FieldInfo field) { FSTEntry entry = Fsts[field.Number]; FST<long?> instance; lock (this) { if (!FstInstances.TryGetValue(field.Number, out instance)) { Data.Seek(entry.Offset); instance = new FST<long?>(Data, PositiveIntOutputs.Singleton); RamBytesUsed_Renamed.AddAndGet(instance.SizeInBytes()); FstInstances[field.Number] = instance; } } var docToOrd = GetNumeric(field); var fst = instance; // per-thread resources var @in = fst.BytesReader; var firstArc = new FST.Arc<long?>(); var scratchArc = new FST.Arc<long?>(); var scratchInts = new IntsRef(); var fstEnum = new BytesRefFSTEnum<long?>(fst); return new SortedDocValuesAnonymousInnerClassHelper(entry, docToOrd, fst, @in, firstArc, scratchArc, scratchInts, fstEnum); }
private void ProcessFacetFields(TaxonomyWriter taxoWriter, IDictionary<string, IList<FacetField>> byField, Document doc) { foreach (KeyValuePair<string, IList<FacetField>> ent in byField) { string indexFieldName = ent.Key; //System.out.println(" indexFieldName=" + indexFieldName + " fields=" + ent.getValue()); IntsRef ordinals = new IntsRef(32); foreach (FacetField facetField in ent.Value) { FacetsConfig.DimConfig ft = GetDimConfig(facetField.dim); if (facetField.path.Length > 1 && ft.Hierarchical == false) { throw new System.ArgumentException("dimension \"" + facetField.dim + "\" is not hierarchical yet has " + facetField.path.Length + " components"); } FacetLabel cp = new FacetLabel(facetField.dim, facetField.path); checkTaxoWriter(taxoWriter); int ordinal = taxoWriter.AddCategory(cp); if (ordinals.Length == ordinals.Ints.Length) { ordinals.Grow(ordinals.Length + 1); } ordinals.Ints[ordinals.Length++] = ordinal; //System.out.println("ords[" + (ordinals.length-1) + "]=" + ordinal); //System.out.println(" add cp=" + cp); if (ft.MultiValued && (ft.Hierarchical || ft.RequireDimCount)) { //System.out.println(" add parents"); // Add all parents too: int parent = taxoWriter.GetParent(ordinal); while (parent > 0) { if (ordinals.Ints.Length == ordinals.Length) { ordinals.Grow(ordinals.Length + 1); } ordinals.Ints[ordinals.Length++] = parent; parent = taxoWriter.GetParent(parent); } if (ft.RequireDimCount == false) { // Remove last (dimension) ord: ordinals.Length--; } } // Drill down: for (int i = 1; i <= cp.Length; i++) { doc.Add(new StringField(indexFieldName, PathToString(cp.Components, i), Field.Store.NO)); } } // Facet counts: // DocValues are considered stored fields: doc.Add(new BinaryDocValuesField(indexFieldName, DedupAndEncode(ordinals))); } }
/// <summary> /// Encodes ordinals into a BytesRef; expert: subclass can /// override this to change encoding. /// </summary> protected virtual BytesRef DedupAndEncode(IntsRef ordinals) { Array.Sort(ordinals.Ints, ordinals.Offset, ordinals.Length); byte[] bytes = new byte[5 * ordinals.Length]; int lastOrd = -1; int upto = 0; for (int i = 0; i < ordinals.Length; i++) { int ord = ordinals.Ints[ordinals.Offset + i]; // ord could be == lastOrd, so we must dedup: if (ord > lastOrd) { int delta; if (lastOrd == -1) { delta = ord; } else { delta = ord - lastOrd; } if ((delta & ~0x7F) == 0) { bytes[upto] = (byte)delta; upto++; } else if ((delta & ~0x3FFF) == 0) { bytes[upto] = unchecked((byte)(0x80 | ((delta & 0x3F80) >> 7))); bytes[upto + 1] = (byte)(delta & 0x7F); upto += 2; } else if ((delta & ~0x1FFFFF) == 0) { bytes[upto] = unchecked((byte)(0x80 | ((delta & 0x1FC000) >> 14))); bytes[upto + 1] = unchecked((byte)(0x80 | ((delta & 0x3F80) >> 7))); bytes[upto + 2] = (byte)(delta & 0x7F); upto += 3; } else if ((delta & ~0xFFFFFFF) == 0) { bytes[upto] = unchecked((byte)(0x80 | ((delta & 0xFE00000) >> 21))); bytes[upto + 1] = unchecked((byte)(0x80 | ((delta & 0x1FC000) >> 14))); bytes[upto + 2] = unchecked((byte)(0x80 | ((delta & 0x3F80) >> 7))); bytes[upto + 3] = (byte)(delta & 0x7F); upto += 4; } else { bytes[upto] = unchecked((byte)(0x80 | ((delta & 0xF0000000) >> 28))); bytes[upto + 1] = unchecked((byte)(0x80 | ((delta & 0xFE00000) >> 21))); bytes[upto + 2] = unchecked((byte)(0x80 | ((delta & 0x1FC000) >> 14))); bytes[upto + 3] = unchecked((byte)(0x80 | ((delta & 0x3F80) >> 7))); bytes[upto + 4] = (byte)(delta & 0x7F); upto += 5; } lastOrd = ord; } } return new BytesRef(bytes, 0, upto); }