Beispiel #1
0
        public override BinaryDocValues GetBinary(FieldInfo field)
        {
            lock (this)
            {
                if (!binaryInstances.TryGetValue(field.Number, out BinaryDocValues instance))
                {
                    var type = field.GetAttribute(legacyKey).ToLegacyDocValuesType();

                    if (type == LegacyDocValuesType.BYTES_FIXED_STRAIGHT)
                    {
                        instance = LoadBytesFixedStraight(field);
                    }
                    else if (type == LegacyDocValuesType.BYTES_VAR_STRAIGHT)
                    {
                        instance = LoadBytesVarStraight(field);
                    }
                    else if (type == LegacyDocValuesType.BYTES_FIXED_DEREF)
                    {
                        instance = LoadBytesFixedDeref(field);
                    }
                    else if (type == LegacyDocValuesType.BYTES_VAR_DEREF)
                    {
                        instance = LoadBytesVarDeref(field);
                    }
                    else
                    {
                        throw AssertionError.Create();
                    }
                    binaryInstances[field.Number] = instance;
                }
                return(instance);
            }
        }
        internal virtual Int64Values GetNumeric(NumericEntry entry)
        {
            IndexInput data = (IndexInput)this.data.Clone();

            data.Seek(entry.Offset);

            switch (entry.format)
            {
            case Lucene45DocValuesConsumer.DELTA_COMPRESSED:
                BlockPackedReader reader = new BlockPackedReader(data, entry.PackedInt32sVersion, entry.BlockSize, entry.Count, true);
                return(reader);

            case Lucene45DocValuesConsumer.GCD_COMPRESSED:
                long min  = entry.minValue;
                long mult = entry.gcd;
                BlockPackedReader quotientReader = new BlockPackedReader(data, entry.PackedInt32sVersion, entry.BlockSize, entry.Count, true);
                return(new Int64ValuesAnonymousClass(min, mult, quotientReader));

            case Lucene45DocValuesConsumer.TABLE_COMPRESSED:
                long[] table             = entry.table;
                int    bitsRequired      = PackedInt32s.BitsRequired(table.Length - 1);
                PackedInt32s.Reader ords = PackedInt32s.GetDirectReaderNoHeader(data, PackedInt32s.Format.PACKED, entry.PackedInt32sVersion, (int)entry.Count, bitsRequired);
                return(new Int64ValuesAnonymousClass2(table, ords));

            default:
                throw AssertionError.Create();
            }
        }
 private static sbyte DocValuesByte(DocValuesType type)
 {
     if (type == DocValuesType.NONE)
     {
         return(0);
     }
     else if (type == DocValuesType.NUMERIC)
     {
         return(1);
     }
     else if (type == DocValuesType.BINARY)
     {
         return(2);
     }
     else if (type == DocValuesType.SORTED)
     {
         return(3);
     }
     else if (type == DocValuesType.SORTED_SET)
     {
         return(4);
     }
     else
     {
         throw AssertionError.Create();
     }
 }
        private void ReadFields(IndexInput meta /*, FieldInfos infos // LUCENENET: Not read */)
        {
            int fieldNumber = meta.ReadVInt32();

            while (fieldNumber != -1)
            {
                // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check
                // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616
                if (fieldNumber < 0)
                {
                    // trickier to validate more: because we re-use for norms, because we use multiple entries
                    // for "composite" types like sortedset, etc.
                    throw new CorruptIndexException("Invalid field number: " + fieldNumber + " (resource=" + meta + ")");
                }
                byte type = meta.ReadByte();
                if (type == Lucene45DocValuesFormat.NUMERIC)
                {
                    numerics[fieldNumber] = ReadNumericEntry(meta);
                }
                else if (type == Lucene45DocValuesFormat.BINARY)
                {
                    BinaryEntry b = ReadBinaryEntry(meta);
                    binaries[fieldNumber] = b;
                }
                else if (type == Lucene45DocValuesFormat.SORTED)
                {
                    ReadSortedField(fieldNumber, meta /*, infos // LUCENENET: Never read */);
                }
                else if (type == Lucene45DocValuesFormat.SORTED_SET)
                {
                    SortedSetEntry ss = ReadSortedSetEntry(meta);
                    sortedSets[fieldNumber] = ss;
                    if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES)
                    {
                        ReadSortedSetFieldWithAddresses(fieldNumber, meta /*, infos // LUCENENET: Never read */);
                    }
                    else if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED)
                    {
                        if (meta.ReadVInt32() != fieldNumber)
                        {
                            throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
                        }
                        if (meta.ReadByte() != Lucene45DocValuesFormat.SORTED)
                        {
                            throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
                        }
                        ReadSortedField(fieldNumber, meta /*, infos // LUCENENET: Never read */);
                    }
                    else
                    {
                        throw AssertionError.Create();
                    }
                }
                else
                {
                    throw new CorruptIndexException("invalid type: " + type + ", resource=" + meta);
                }
                fieldNumber = meta.ReadVInt32();
            }
        }
Beispiel #5
0
 protected virtual void Recompose(IList <SrndQuery> queries)
 {
     if (queries.Count < 2)
     {
         throw AssertionError.Create("Too few subqueries");
     }
     this.m_queries = new JCG.List <SrndQuery>(queries);
 }
Beispiel #6
0
        public override SortedDocValues GetSorted(FieldInfo field)
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                if (!sortedInstances.TryGetValue(field.Number, out SortedDocValues instance))
                {
                    string     dataName  = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat");
                    string     indexName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "idx");
                    IndexInput data      = null;
                    IndexInput index     = null;
                    bool       success   = false;
                    try
                    {
                        data  = dir.OpenInput(dataName, state.Context);
                        index = dir.OpenInput(indexName, state.Context);

                        var type = field.GetAttribute(legacyKey).ToLegacyDocValuesType();

                        if (type == LegacyDocValuesType.BYTES_FIXED_SORTED)
                        {
                            instance = LoadBytesFixedSorted(/* field, // LUCENENET: Never read */ data, index);
                        }
                        else if (type == LegacyDocValuesType.BYTES_VAR_SORTED)
                        {
                            instance = LoadBytesVarSorted(/* field, // LUCENENET: Never read */ data, index);
                        }
                        else
                        {
                            throw AssertionError.Create();
                        }

                        CodecUtil.CheckEOF(data);
                        CodecUtil.CheckEOF(index);
                        success = true;
                    }
                    finally
                    {
                        if (success)
                        {
                            IOUtils.Dispose(data, index);
                        }
                        else
                        {
                            IOUtils.DisposeWhileHandlingException(data, index);
                        }
                    }
                    sortedInstances[field.Number] = instance;
                }
                return(instance);
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }
 public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound)
 {
     lock (this)
     {
         if (!mayMerge.Value && writer.NextMerge() != null)
         {
             throw AssertionError.Create();
         }
         base.Merge(writer, trigger, newMergesFound);
     }
 }
Beispiel #8
0
        private NumericDocValues LoadNumeric(FieldInfo field)
        {
            NumericEntry entry = numerics[field.Number];

            data.Seek(entry.offset + entry.missingBytes);
            switch (entry.format)
            {
            case TABLE_COMPRESSED:
                int size = data.ReadVInt32();
                if (size > 256)
                {
                    throw new CorruptIndexException(
                              "TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + data);
                }
                var decode = new long[size];
                for (int i = 0; i < decode.Length; i++)
                {
                    decode[i] = data.ReadInt64();
                }
                int formatID     = data.ReadVInt32();
                int bitsPerValue = data.ReadVInt32();
                var ordsReader   = PackedInt32s.GetReaderNoHeader(data, PackedInt32s.Format.ById(formatID),
                                                                  entry.packedIntsVersion, maxDoc, bitsPerValue);
                ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(decode) + ordsReader.RamBytesUsed());
                return(new NumericDocValuesAnonymousClass(decode, ordsReader));

            case DELTA_COMPRESSED:
                int blockSize = data.ReadVInt32();
                var reader    = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc,
                                                      false);
                ramBytesUsed.AddAndGet(reader.RamBytesUsed());
                return(reader);

            case UNCOMPRESSED:
                var bytes = new byte[maxDoc];
                data.ReadBytes(bytes, 0, bytes.Length);
                ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(bytes));
                // LUCENENET: IMPORTANT - some bytes are negative here, so we need to pass as sbyte
                return(new NumericDocValuesAnonymousClass2((sbyte[])(Array)bytes));

            case GCD_COMPRESSED:
                long min  = data.ReadInt64();
                long mult = data.ReadInt64();
                int  quotientBlockSize = data.ReadVInt32();
                var  quotientReader    = new BlockPackedReader(data, entry.packedIntsVersion,
                                                               quotientBlockSize, maxDoc, false);
                ramBytesUsed.AddAndGet(quotientReader.RamBytesUsed());
                return(new NumericDocValuesAnonymousClass3(min, mult, quotientReader));

            default:
                throw AssertionError.Create();
            }
        }
Beispiel #9
0
 public virtual void AddSpanQuery(Search.Query q)
 {
     if (q == SrndQuery.TheEmptyLcnQuery)
     {
         return;
     }
     if (!(q is SpanQuery))
     {
         throw AssertionError.Create("Expected SpanQuery: " + q.ToString(FieldName));
     }
     AddSpanQueryWeighted((SpanQuery)q, q.Boost);
 }
Beispiel #10
0
        public static Search.Query MakeBooleanQuery(
            IList <Search.Query> queries,
            Occur occur)
        {
            if (queries.Count <= 1)
            {
                throw AssertionError.Create("Too few subqueries: " + queries.Count);
            }
            BooleanQuery bq = new BooleanQuery();

            AddQueriesToBoolean(bq, queries, occur);
            return(bq);
        }
        private NumericDocValues LoadNumeric(NumericEntry entry)
        {
            data.Seek(entry.offset + entry.missingBytes);
            switch (entry.byteWidth)
            {
            case 1:
            {
                var values = new byte[entry.count];
                data.ReadBytes(values, 0, entry.count);
                ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values));
                // LUCENENET: IMPORTANT - some bytes are negative here, so we need to pass as sbyte
                return(new NumericDocValuesAnonymousClass((sbyte[])(Array)values));
            }

            case 2:
            {
                var values = new short[entry.count];
                for (int i = 0; i < entry.count; i++)
                {
                    values[i] = data.ReadInt16();
                }
                ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values));
                return(new NumericDocValuesAnonymousClass2(values));
            }

            case 4:
            {
                var values = new int[entry.count];
                for (var i = 0; i < entry.count; i++)
                {
                    values[i] = data.ReadInt32();
                }
                ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values));
                return(new NumericDocValuesAnonymousClass3(values));
            }

            case 8:
            {
                var values = new long[entry.count];
                for (int i = 0; i < entry.count; i++)
                {
                    values[i] = data.ReadInt64();
                }
                ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values));
                return(new NumericDocValuesAnonymousClass4(values));
            }

            default:
                throw AssertionError.Create();
            }
        }
Beispiel #12
0
 public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound)
 {
     UninterruptableMonitor.Enter(this);
     try
     {
         if (!mayMerge.Value && writer.NextMerge() != null)
         {
             throw AssertionError.Create();
         }
         base.Merge(writer, trigger, newMergesFound);
     }
     finally
     {
         UninterruptableMonitor.Exit(this);
     }
 }
Beispiel #13
0
 internal virtual void WriteSequence()
 {
     if (Debugging.AssertsEnabled)
     {
         Debugging.Assert(SequenceIsConsistent());
     }
     try
     {
         WriteHeader(reverse, clean, dirtyWords.Length);
     }
     catch (Exception cannotHappen) when(cannotHappen.IsIOException())
     {
         throw AssertionError.Create(cannotHappen.Message, cannotHappen);
     }
     @out.WriteBytes(dirtyWords.Bytes, 0, dirtyWords.Length);
     dirtyWords.Length = 0;
     ++numSequences;
 }
Beispiel #14
0
            internal Field GetNumericField(string name, NumericType type)
            {
                Field f;

                if (reuseFields)
                {
                    numericFields.TryGetValue(name, out f);
                }
                else
                {
                    f = null;
                }

                if (f == null)
                {
                    switch (type)
                    {
                    case NumericType.INT32:
                        f = new Int32Field(name, 0, Field.Store.NO);
                        break;

                    case NumericType.INT64:
                        f = new Int64Field(name, 0L, Field.Store.NO);
                        break;

                    case NumericType.SINGLE:
                        f = new SingleField(name, 0.0F, Field.Store.NO);
                        break;

                    case NumericType.DOUBLE:
                        f = new DoubleField(name, 0.0, Field.Store.NO);
                        break;

                    default:
                        throw AssertionError.Create("Cannot get here");
                    }
                    if (reuseFields)
                    {
                        numericFields[name] = f;
                    }
                }
                return(f);
            }
        public override BinaryDocValues GetBinary(FieldInfo field)
        {
            BinaryEntry bytes = binaries[field.Number];

            switch (bytes.format)
            {
            case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED:
                return(GetFixedBinary(/*field, LUCENENET: Never read */ bytes));

            case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED:
                return(GetVariableBinary(field, bytes));

            case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED:
                return(GetCompressedBinary(field, bytes));

            default:
                throw AssertionError.Create();
            }
        }
Beispiel #16
0
            protected override SortedDocValues GetSortedDocValues(AtomicReaderContext context, string field)
            {
                SortedSetDocValues sortedSet = FieldCache.DEFAULT.GetDocTermOrds(context.AtomicReader, field);

                if (sortedSet.ValueCount >= int.MaxValue)
                {
                    throw UnsupportedOperationException.Create("fields containing more than " + (int.MaxValue - 1) + " unique terms are unsupported");
                }

                SortedDocValues singleton = DocValues.UnwrapSingleton(sortedSet);

                if (singleton != null)
                {
                    // it's actually single-valued in practice, but indexed as multi-valued,
                    // so just sort on the underlying single-valued dv directly.
                    // regardless of selector type, this optimization is safe!
                    return(singleton);
                }
                else if (outerInstance.selector == Selector.MIN)
                {
                    return(new MinValue(sortedSet));
                }
                else
                {
                    if (sortedSet is RandomAccessOrds == false)
                    {
                        throw UnsupportedOperationException.Create("codec does not support random access ordinals, cannot use selector: " + outerInstance.selector);
                    }
                    RandomAccessOrds randomOrds = (RandomAccessOrds)sortedSet;
                    switch (outerInstance.selector)
                    {
                    case Selector.MAX: return(new MaxValue(randomOrds));

                    case Selector.MIDDLE_MIN: return(new MiddleMinValue(randomOrds));

                    case Selector.MIDDLE_MAX: return(new MiddleMaxValue(randomOrds));

                    case Selector.MIN:
                    default:
                        throw AssertionError.Create();
                    }
                }
            }
Beispiel #17
0
        /// <summary>
        /// Create a random instance.
        /// </summary>
        public static CompressingCodec RandomInstance(Random random, int chunkSize, bool withSegmentSuffix)
        {
            switch (random.Next(4))
            {
            case 0:
                return(new FastCompressingCodec(chunkSize, withSegmentSuffix));

            case 1:
                return(new FastDecompressionCompressingCodec(chunkSize, withSegmentSuffix));

            case 2:
                return(new HighCompressionCompressingCodec(chunkSize, withSegmentSuffix));

            case 3:
                return(new DummyCompressingCodec(chunkSize, withSegmentSuffix));

            default:
                throw AssertionError.Create();
            }
        }
        public override IBits GetDocsWithField(FieldInfo field)
        {
            switch (field.DocValuesType)
            {
            case DocValuesType.SORTED_SET:
                return(DocValues.DocsWithValue(GetSortedSet(field), maxDoc));

            case DocValuesType.SORTED:
                return(DocValues.DocsWithValue(GetSorted(field), maxDoc));

            case DocValuesType.BINARY:
                return(GetBinaryDocsWithField(field));

            case DocValuesType.NUMERIC:
                return(GetNumericDocsWithField(field));

            default:
                throw AssertionError.Create();
            }
        }
Beispiel #19
0
        private static void ReadField(DataInput @in, StoredFieldVisitor visitor, FieldInfo info, int bits)
        {
            switch (bits & CompressingStoredFieldsWriter.TYPE_MASK)
            {
            case CompressingStoredFieldsWriter.BYTE_ARR:
                int length = @in.ReadVInt32();
                var data   = new byte[length];
                @in.ReadBytes(data, 0, length);
                visitor.BinaryField(info, data);
                break;

            case CompressingStoredFieldsWriter.STRING:
                length = @in.ReadVInt32();
                data   = new byte[length];
                @in.ReadBytes(data, 0, length);
#pragma warning disable 612, 618
                visitor.StringField(info, IOUtils.CHARSET_UTF_8.GetString(data));
#pragma warning restore 612, 618
                break;

            case CompressingStoredFieldsWriter.NUMERIC_INT32:
                visitor.Int32Field(info, @in.ReadInt32());
                break;

            case CompressingStoredFieldsWriter.NUMERIC_SINGLE:
                visitor.SingleField(info, J2N.BitConversion.Int32BitsToSingle(@in.ReadInt32()));
                break;

            case CompressingStoredFieldsWriter.NUMERIC_INT64:
                visitor.Int64Field(info, @in.ReadInt64());
                break;

            case CompressingStoredFieldsWriter.NUMERIC_DOUBLE:
                visitor.DoubleField(info, J2N.BitConversion.Int64BitsToDouble(@in.ReadInt64()));
                break;

            default:
                throw AssertionError.Create("Unknown type flag: " + bits.ToString("x"));
            }
        }
        public override IBits GetDocsWithField(FieldInfo field)
        {
            switch (field.DocValuesType)
            {
            case DocValuesType.SORTED_SET:
                return(DocValues.DocsWithValue(GetSortedSet(field), maxDoc));

            case DocValuesType.SORTED:
                return(DocValues.DocsWithValue(GetSorted(field), maxDoc));

            case DocValuesType.BINARY:
                BinaryEntry be = binaries[field.Number];
                return(GetMissingBits(field.Number, be.missingOffset, be.missingBytes));

            case DocValuesType.NUMERIC:
                NumericEntry ne = numerics[field.Number];
                return(GetMissingBits(field.Number, ne.missingOffset, ne.missingBytes));

            default:
                throw AssertionError.Create();
            }
        }
Beispiel #21
0
 public static BulkOperation Of(PackedInt32s.Format format, int bitsPerValue)
 {
     if (format == PackedInt32s.Format.PACKED)
     {
         if (Debugging.AssertsEnabled)
         {
             Debugging.Assert(packedBulkOps[bitsPerValue - 1] != null);
         }
         return(packedBulkOps[bitsPerValue - 1]);
     }
     else if (format == PackedInt32s.Format.PACKED_SINGLE_BLOCK)
     {
         if (Debugging.AssertsEnabled)
         {
             Debugging.Assert(packedSingleBlockBulkOps[bitsPerValue - 1] != null);
         }
         return(packedSingleBlockBulkOps[bitsPerValue - 1]);
     }
     else
     {
         throw AssertionError.Create();
     }
 }
        public override SortedSetDocValues GetSortedSet(FieldInfo field)
        {
            SortedSetEntry ss = sortedSets[field.Number];

            if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED)
            {
                SortedDocValues values = GetSorted(field);
                return(DocValues.Singleton(values));
            }
            else if (ss.Format != Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES)
            {
                throw AssertionError.Create();
            }

            IndexInput data       = (IndexInput)this.data.Clone();
            long       valueCount = binaries[field.Number].Count;
            // we keep the byte[]s and list of ords on disk, these could be large
            Int64BinaryDocValues binary   = (Int64BinaryDocValues)GetBinary(field);
            Int64Values          ordinals = GetNumeric(ords[field.Number]);
            // but the addresses to the ord stream are in RAM
            MonotonicBlockPackedReader ordIndex = GetOrdIndexInstance(data, field, ordIndexes[field.Number]);

            return(new RandomAccessOrdsAnonymousClass(valueCount, binary, ordinals, ordIndex));
        }
Beispiel #23
0
        private static void SkipField(DataInput @in, int bits)
        {
            switch (bits & CompressingStoredFieldsWriter.TYPE_MASK)
            {
            case CompressingStoredFieldsWriter.BYTE_ARR:
            case CompressingStoredFieldsWriter.STRING:
                int length = @in.ReadVInt32();
                @in.SkipBytes(length);
                break;

            case CompressingStoredFieldsWriter.NUMERIC_INT32:
            case CompressingStoredFieldsWriter.NUMERIC_SINGLE:
                @in.ReadInt32();
                break;

            case CompressingStoredFieldsWriter.NUMERIC_INT64:
            case CompressingStoredFieldsWriter.NUMERIC_DOUBLE:
                @in.ReadInt64();
                break;

            default:
                throw AssertionError.Create("Unknown type flag: " + bits.ToString("x"));
            }
        }
Beispiel #24
0
        public override void WriteField(FieldInfo info, IIndexableField field)
        {
            int      bits /* = 0*/; // LUCENENET: IDE0059: Remove unnecessary value assignment
            BytesRef bytes;
            string   @string;

            // LUCENENET specific - To avoid boxing/unboxing, we don't
            // call GetNumericValue(). Instead, we check the field.NumericType and then
            // call the appropriate conversion method.
            if (field.NumericType != NumericFieldType.NONE)
            {
                switch (field.NumericType)
                {
                case NumericFieldType.BYTE:
                case NumericFieldType.INT16:
                case NumericFieldType.INT32:
                    bits = NUMERIC_INT32;
                    break;

                case NumericFieldType.INT64:
                    bits = NUMERIC_INT64;
                    break;

                case NumericFieldType.SINGLE:
                    bits = NUMERIC_SINGLE;
                    break;

                case NumericFieldType.DOUBLE:
                    bits = NUMERIC_DOUBLE;
                    break;

                default:
                    throw new ArgumentException("cannot store numeric type " + field.NumericType);
                }

                @string = null;
                bytes   = null;
            }
            else
            {
                bytes = field.GetBinaryValue();
                if (bytes != null)
                {
                    bits    = BYTE_ARR;
                    @string = null;
                }
                else
                {
                    bits    = STRING;
                    @string = field.GetStringValue();
                    if (@string == null)
                    {
                        throw new ArgumentException("field " + field.Name + " is stored but does not have BinaryValue, StringValue nor NumericValue");
                    }
                }
            }

            long infoAndBits = (((long)info.Number) << TYPE_BITS) | (uint)bits;

            bufferedDocs.WriteVInt64(infoAndBits);

            if (bytes != null)
            {
                bufferedDocs.WriteVInt32(bytes.Length);
                bufferedDocs.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length);
            }
            else if (@string != null)
            {
                bufferedDocs.WriteString(field.GetStringValue());
            }
            else
            {
                switch (field.NumericType)
                {
                case NumericFieldType.BYTE:
                case NumericFieldType.INT16:
                case NumericFieldType.INT32:
                    bufferedDocs.WriteInt32(field.GetInt32Value().Value);
                    break;

                case NumericFieldType.INT64:
                    bufferedDocs.WriteInt64(field.GetInt64Value().Value);
                    break;

                case NumericFieldType.SINGLE:
                    bufferedDocs.WriteInt32(BitConversion.SingleToInt32Bits(field.GetSingleValue().Value));
                    break;

                case NumericFieldType.DOUBLE:
                    bufferedDocs.WriteInt64(BitConversion.DoubleToInt64Bits(field.GetDoubleValue().Value));
                    break;

                default:
                    throw AssertionError.Create("Cannot get here");
                }
            }
        }
Beispiel #25
0
        private void MergeDocValues(SegmentWriteState segmentWriteState)
        {
            DocValuesConsumer consumer = codec.DocValuesFormat.FieldsConsumer(segmentWriteState);
            bool success = false;

            try
            {
                foreach (FieldInfo field in mergeState.FieldInfos)
                {
                    DocValuesType type = field.DocValuesType;
                    if (type != DocValuesType.NONE)
                    {
                        if (type == DocValuesType.NUMERIC)
                        {
                            IList <NumericDocValues> toMerge       = new JCG.List <NumericDocValues>();
                            IList <IBits>            docsWithField = new JCG.List <IBits>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                NumericDocValues values = reader.GetNumericDocValues(field.Name);
                                IBits            bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_NUMERIC;
                                    bits   = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                docsWithField.Add(bits);
                            }
                            consumer.MergeNumericField(field, mergeState, toMerge, docsWithField);
                        }
                        else if (type == DocValuesType.BINARY)
                        {
                            IList <BinaryDocValues> toMerge       = new JCG.List <BinaryDocValues>();
                            IList <IBits>           docsWithField = new JCG.List <IBits>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                BinaryDocValues values = reader.GetBinaryDocValues(field.Name);
                                IBits           bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_BINARY;
                                    bits   = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                docsWithField.Add(bits);
                            }
                            consumer.MergeBinaryField(field, mergeState, toMerge, docsWithField);
                        }
                        else if (type == DocValuesType.SORTED)
                        {
                            IList <SortedDocValues> toMerge = new JCG.List <SortedDocValues>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                SortedDocValues values = reader.GetSortedDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedField(field, mergeState, toMerge);
                        }
                        else if (type == DocValuesType.SORTED_SET)
                        {
                            IList <SortedSetDocValues> toMerge = new JCG.List <SortedSetDocValues>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED_SET;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedSetField(field, mergeState, toMerge);
                        }
                        else
                        {
                            throw AssertionError.Create("type=" + type);
                        }
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(consumer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(consumer);
                }
            }
        }
        internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage)
        {
            long count    = 0;
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd      = 0;
            bool missing  = false;

            // TODO: more efficient?
            JCG.HashSet <long> uniqueValues = null;

            if (optimizeStorage)
            {
                uniqueValues = new JCG.HashSet <long>();

                foreach (long?nv in values)
                {
                    long v;
                    if (nv == null)
                    {
                        v       = 0;
                        missing = true;
                    }
                    else
                    {
                        v = nv.Value;
                    }

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
            }
            else
            {
                foreach (var nv in values)
                {
                    ++count;
                }
            }

            long delta = maxValue - minValue;

            int format;

            if (uniqueValues != null && (delta < 0L || PackedInt32s.BitsRequired(uniqueValues.Count - 1) < PackedInt32s.BitsRequired(delta)) && count <= int.MaxValue)
            {
                format = TABLE_COMPRESSED;
            }
            else if (gcd != 0 && gcd != 1)
            {
                format = GCD_COMPRESSED;
            }
            else
            {
                format = DELTA_COMPRESSED;
            }
            meta.WriteVInt32(field.Number);
            meta.WriteByte((byte)Lucene45DocValuesFormat.NUMERIC);
            meta.WriteVInt32(format);
            if (missing)
            {
                meta.WriteInt64(data.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                WriteMissingBitset(values);
            }
            else
            {
                meta.WriteInt64(-1L);
            }
            meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
            meta.WriteInt64(data.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
            meta.WriteVInt64(count);
            meta.WriteVInt32(BLOCK_SIZE);

            switch (format)
            {
            case GCD_COMPRESSED:
                meta.WriteInt64(minValue);
                meta.WriteInt64(gcd);
                BlockPackedWriter quotientWriter = new BlockPackedWriter(data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    quotientWriter.Add((nv.GetValueOrDefault() - minValue) / gcd);
                }
                quotientWriter.Finish();
                break;

            case DELTA_COMPRESSED:
                BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    writer.Add(nv.GetValueOrDefault());
                }
                writer.Finish();
                break;

            case TABLE_COMPRESSED:
                // LUCENENET NOTE: diming an array and then using .CopyTo() for better efficiency than LINQ .ToArray()
                long[] decode = new long[uniqueValues.Count];
                uniqueValues.CopyTo(decode, 0);
                Dictionary <long, int> encode = new Dictionary <long, int>();
                meta.WriteVInt32(decode.Length);
                for (int i = 0; i < decode.Length; i++)
                {
                    meta.WriteInt64(decode[i]);
                    encode[decode[i]] = i;
                }
                int bitsRequired = PackedInt32s.BitsRequired(uniqueValues.Count - 1);
                PackedInt32s.Writer ordsWriter = PackedInt32s.GetWriterNoHeader(data, PackedInt32s.Format.PACKED, (int)count, bitsRequired, PackedInt32s.DEFAULT_BUFFER_SIZE);
                foreach (long?nv in values)
                {
                    ordsWriter.Add(encode[nv.GetValueOrDefault()]);
                }
                ordsWriter.Finish();
                break;

            default:
                throw AssertionError.Create();
            }
        }
Beispiel #27
0
        private void DoTest(DocValuesType type)
        {
            Directory         d        = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            int   nDocs = AtLeast(50);
            Field id    = new NumericDocValuesField("id", 0);
            Field f;

            switch (type)
            {
            case DocValuesType.BINARY:
                f = new BinaryDocValuesField("dv", new BytesRef());
                break;

            case DocValuesType.SORTED:
                f = new SortedDocValuesField("dv", new BytesRef());
                break;

            case DocValuesType.NUMERIC:
                f = new NumericDocValuesField("dv", 0);
                break;

            default:
                throw AssertionError.Create();
            }
            Document document = new Document();

            document.Add(id);
            document.Add(f);

            object[] vals = new object[nDocs];

            RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig);

            for (int i = 0; i < nDocs; ++i)
            {
                id.SetInt64Value(i);
                switch (type)
                {
                case DocValuesType.SORTED:
                case DocValuesType.BINARY:
                    do
                    {
                        vals[i] = TestUtil.RandomSimpleString(Random, 20);
                    } while (((string)vals[i]).Length == 0);
                    f.SetBytesValue(new BytesRef((string)vals[i]));
                    break;

                case DocValuesType.NUMERIC:
                    int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31);     // keep it an int
                    vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue));
                    f.SetInt64Value((long)vals[i]);
                    break;
                }
                iw.AddDocument(document);
                if (Random.NextBoolean() && i % 10 == 9)
                {
                    iw.Commit();
                }
            }
            iw.Dispose();

            DirectoryReader rd = DirectoryReader.Open(d);

            foreach (AtomicReaderContext leave in rd.Leaves)
            {
                FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave);
                ValueSource    vs;
                switch (type)
                {
                case DocValuesType.BINARY:
                case DocValuesType.SORTED:
                    vs = new BytesRefFieldSource("dv");
                    break;

                case DocValuesType.NUMERIC:
                    vs = new Int64FieldSource("dv");
                    break;

                default:
                    throw AssertionError.Create();
                }
                FunctionValues values = vs.GetValues(null, leave);
                BytesRef       bytes  = new BytesRef();
                for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i)
                {
                    assertTrue(values.Exists(i));
                    if (vs is BytesRefFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is string);
                    }
                    else if (vs is Int64FieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is J2N.Numerics.Int64);
                        assertTrue(values.BytesVal(i, bytes));
                    }
                    else
                    {
                        throw AssertionError.Create();
                    }

                    object expected = vals[ids.Int32Val(i)];
                    switch (type)
                    {
                    case DocValuesType.SORTED:
                        values.OrdVal(i);     // no exception
                        assertTrue(values.NumOrd >= 1);
                        goto case DocValuesType.BINARY;

                    case DocValuesType.BINARY:
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertTrue(values.BytesVal(i, bytes));
                        assertEquals(new BytesRef((string)expected), bytes);
                        break;

                    case DocValuesType.NUMERIC:
                        assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i));
                        break;
                    }
                }
            }
            rd.Dispose();
            d.Dispose();
        }
Beispiel #28
0
        public override Fields Get(int doc)
        {
            EnsureOpen();

            // seek to the right place
            {
                long startPointer = indexReader.GetStartPointer(doc);
                vectorsStream.Seek(startPointer);
            }

            // decode
            // - docBase: first doc ID of the chunk
            // - chunkDocs: number of docs of the chunk
            int docBase   = vectorsStream.ReadVInt32();
            int chunkDocs = vectorsStream.ReadVInt32();

            if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs)
            {
                throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc + " (resource=" + vectorsStream + ")");
            }

            int skip;        // number of fields to skip
            int numFields;   // number of fields of the document we're looking for
            int totalFields; // total number of fields of the chunk (sum for all docs)

            if (chunkDocs == 1)
            {
                skip      = 0;
                numFields = totalFields = vectorsStream.ReadVInt32();
            }
            else
            {
                reader.Reset(vectorsStream, chunkDocs);
                int sum = 0;
                for (int i = docBase; i < doc; ++i)
                {
                    sum += (int)reader.Next();
                }
                skip      = sum;
                numFields = (int)reader.Next();
                sum      += numFields;
                for (int i = doc + 1; i < docBase + chunkDocs; ++i)
                {
                    sum += (int)reader.Next();
                }
                totalFields = sum;
            }

            if (numFields == 0)
            {
                // no vectors
                return(null);
            }

            // read field numbers that have term vectors
            int[] fieldNums;
            {
                int token = vectorsStream.ReadByte() & 0xFF;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(token != 0);                           // means no term vectors, cannot happen since we checked for numFields == 0
                }
                int bitsPerFieldNum     = token & 0x1F;
                int totalDistinctFields = token.TripleShift(5);
                if (totalDistinctFields == 0x07)
                {
                    totalDistinctFields += vectorsStream.ReadVInt32();
                }
                ++totalDistinctFields;
                PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1);
                fieldNums = new int[totalDistinctFields];
                for (int i = 0; i < totalDistinctFields; ++i)
                {
                    fieldNums[i] = (int)it.Next();
                }
            }

            // read field numbers and flags
            int[] fieldNumOffs = new int[numFields];
            PackedInt32s.Reader flags;
            {
                int bitsPerOff = PackedInt32s.BitsRequired(fieldNums.Length - 1);
                PackedInt32s.Reader allFieldNumOffs = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
                switch (vectorsStream.ReadVInt32())
                {
                case 0:
                    PackedInt32s.Reader  fieldFlags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, fieldNums.Length, CompressingTermVectorsWriter.FLAGS_BITS);
                    PackedInt32s.Mutable f          = PackedInt32s.GetMutable(totalFields, CompressingTermVectorsWriter.FLAGS_BITS, PackedInt32s.COMPACT);
                    for (int i = 0; i < totalFields; ++i)
                    {
                        int fieldNumOff = (int)allFieldNumOffs.Get(i);
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(fieldNumOff >= 0 && fieldNumOff < fieldNums.Length);
                        }
                        int fgs = (int)fieldFlags.Get(fieldNumOff);
                        f.Set(i, fgs);
                    }
                    flags = f;
                    break;

                case 1:
                    flags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, CompressingTermVectorsWriter.FLAGS_BITS);
                    break;

                default:
                    throw AssertionError.Create();
                }
                for (int i = 0; i < numFields; ++i)
                {
                    fieldNumOffs[i] = (int)allFieldNumOffs.Get(skip + i);
                }
            }

            // number of terms per field for all fields
            PackedInt32s.Reader numTerms;
            int totalTerms;
            {
                int bitsRequired = vectorsStream.ReadVInt32();
                numTerms = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsRequired);
                int sum = 0;
                for (int i = 0; i < totalFields; ++i)
                {
                    sum += (int)numTerms.Get(i);
                }
                totalTerms = sum;
            }

            // term lengths
            int docOff = 0, docLen = 0, totalLen;

            int[]   fieldLengths  = new int[numFields];
            int[][] prefixLengths = new int[numFields][];
            int[][] suffixLengths = new int[numFields][];
            {
                reader.Reset(vectorsStream, totalTerms);
                // skip
                int toSkip = 0;
                for (int i = 0; i < skip; ++i)
                {
                    toSkip += (int)numTerms.Get(i);
                }
                reader.Skip(toSkip);
                // read prefix lengths
                for (int i = 0; i < numFields; ++i)
                {
                    int   termCount          = (int)numTerms.Get(skip + i);
                    int[] fieldPrefixLengths = new int[termCount];
                    prefixLengths[i] = fieldPrefixLengths;
                    for (int j = 0; j < termCount;)
                    {
                        Int64sRef next = reader.Next(termCount - j);
                        for (int k = 0; k < next.Length; ++k)
                        {
                            fieldPrefixLengths[j++] = (int)next.Int64s[next.Offset + k];
                        }
                    }
                }
                reader.Skip(totalTerms - reader.Ord);

                reader.Reset(vectorsStream, totalTerms);
                // skip
                //toSkip = 0; // LUCENENET: IDE0059: Remove unnecessary value assignment
                for (int i = 0; i < skip; ++i)
                {
                    for (int j = 0; j < numTerms.Get(i); ++j)
                    {
                        docOff += (int)reader.Next();
                    }
                }
                for (int i = 0; i < numFields; ++i)
                {
                    int   termCount          = (int)numTerms.Get(skip + i);
                    int[] fieldSuffixLengths = new int[termCount];
                    suffixLengths[i] = fieldSuffixLengths;
                    for (int j = 0; j < termCount;)
                    {
                        Int64sRef next = reader.Next(termCount - j);
                        for (int k = 0; k < next.Length; ++k)
                        {
                            fieldSuffixLengths[j++] = (int)next.Int64s[next.Offset + k];
                        }
                    }
                    fieldLengths[i] = Sum(suffixLengths[i]);
                    docLen         += fieldLengths[i];
                }
                totalLen = docOff + docLen;
                for (int i = skip + numFields; i < totalFields; ++i)
                {
                    for (int j = 0; j < numTerms.Get(i); ++j)
                    {
                        totalLen += (int)reader.Next();
                    }
                }
            }

            // term freqs
            int[] termFreqs = new int[totalTerms];
            {
                reader.Reset(vectorsStream, totalTerms);
                for (int i = 0; i < totalTerms;)
                {
                    Int64sRef next = reader.Next(totalTerms - i);
                    for (int k = 0; k < next.Length; ++k)
                    {
                        termFreqs[i++] = 1 + (int)next.Int64s[next.Offset + k];
                    }
                }
            }

            // total number of positions, offsets and payloads
            int totalPositions = 0, totalOffsets = 0, totalPayloads = 0;

            for (int i = 0, termIndex = 0; i < totalFields; ++i)
            {
                int f         = (int)flags.Get(i);
                int termCount = (int)numTerms.Get(i);
                for (int j = 0; j < termCount; ++j)
                {
                    int freq = termFreqs[termIndex++];
                    if ((f & CompressingTermVectorsWriter.POSITIONS) != 0)
                    {
                        totalPositions += freq;
                    }
                    if ((f & CompressingTermVectorsWriter.OFFSETS) != 0)
                    {
                        totalOffsets += freq;
                    }
                    if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
                    {
                        totalPayloads += freq;
                    }
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(i != totalFields - 1 || termIndex == totalTerms, "{0} {1}", termIndex, totalTerms);
                }
            }

            int[][] positionIndex = PositionIndex(skip, numFields, numTerms, termFreqs);
            int[][] positions, startOffsets, lengths;
            if (totalPositions > 0)
            {
                positions = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.POSITIONS, totalPositions, positionIndex);
            }
            else
            {
                positions = new int[numFields][];
            }

            if (totalOffsets > 0)
            {
                // average number of chars per term
                float[] charsPerTerm = new float[fieldNums.Length];
                for (int i = 0; i < charsPerTerm.Length; ++i)
                {
                    charsPerTerm[i] = J2N.BitConversion.Int32BitsToSingle(vectorsStream.ReadInt32());
                }
                startOffsets = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex);
                lengths      = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex);

                for (int i = 0; i < numFields; ++i)
                {
                    int[] fStartOffsets = startOffsets[i];
                    int[] fPositions    = positions[i];
                    // patch offsets from positions
                    if (fStartOffsets != null && fPositions != null)
                    {
                        float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]];
                        for (int j = 0; j < startOffsets[i].Length; ++j)
                        {
                            fStartOffsets[j] += (int)(fieldCharsPerTerm * fPositions[j]);
                        }
                    }
                    if (fStartOffsets != null)
                    {
                        int[] fPrefixLengths = prefixLengths[i];
                        int[] fSuffixLengths = suffixLengths[i];
                        int[] fLengths       = lengths[i];
                        for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j)
                        {
                            // delta-decode start offsets and  patch lengths using term lengths
                            int termLength = fPrefixLengths[j] + fSuffixLengths[j];
                            lengths[i][positionIndex[i][j]] += termLength;
                            for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k)
                            {
                                fStartOffsets[k] += fStartOffsets[k - 1];
                                fLengths[k]      += termLength;
                            }
                        }
                    }
                }
            }
            else
            {
                startOffsets = lengths = new int[numFields][];
            }
            if (totalPositions > 0)
            {
                // delta-decode positions
                for (int i = 0; i < numFields; ++i)
                {
                    int[] fPositions     = positions[i];
                    int[] fpositionIndex = positionIndex[i];
                    if (fPositions != null)
                    {
                        for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j)
                        {
                            // delta-decode start offsets
                            for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k)
                            {
                                fPositions[k] += fPositions[k - 1];
                            }
                        }
                    }
                }
            }

            // payload lengths
            int[][] payloadIndex       = new int[numFields][];
            int     totalPayloadLength = 0;
            int     payloadOff         = 0;
            int     payloadLen         = 0;

            if (totalPayloads > 0)
            {
                reader.Reset(vectorsStream, totalPayloads);
                // skip
                int termIndex = 0;
                for (int i = 0; i < skip; ++i)
                {
                    int f         = (int)flags.Get(i);
                    int termCount = (int)numTerms.Get(i);
                    if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
                    {
                        for (int j = 0; j < termCount; ++j)
                        {
                            int freq = termFreqs[termIndex + j];
                            for (int k = 0; k < freq; ++k)
                            {
                                int l = (int)reader.Next();
                                payloadOff += l;
                            }
                        }
                    }
                    termIndex += termCount;
                }
                totalPayloadLength = payloadOff;
                // read doc payload lengths
                for (int i = 0; i < numFields; ++i)
                {
                    int f         = (int)flags.Get(skip + i);
                    int termCount = (int)numTerms.Get(skip + i);
                    if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
                    {
                        int totalFreq = positionIndex[i][termCount];
                        payloadIndex[i] = new int[totalFreq + 1];
                        int posIdx = 0;
                        payloadIndex[i][posIdx] = payloadLen;
                        for (int j = 0; j < termCount; ++j)
                        {
                            int freq = termFreqs[termIndex + j];
                            for (int k = 0; k < freq; ++k)
                            {
                                int payloadLength = (int)reader.Next();
                                payloadLen += payloadLength;
                                payloadIndex[i][posIdx + 1] = payloadLen;
                                ++posIdx;
                            }
                        }
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(posIdx == totalFreq);
                        }
                    }
                    termIndex += termCount;
                }
                totalPayloadLength += payloadLen;
                for (int i = skip + numFields; i < totalFields; ++i)
                {
                    int f         = (int)flags.Get(i);
                    int termCount = (int)numTerms.Get(i);
                    if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
                    {
                        for (int j = 0; j < termCount; ++j)
                        {
                            int freq = termFreqs[termIndex + j];
                            for (int k = 0; k < freq; ++k)
                            {
                                totalPayloadLength += (int)reader.Next();
                            }
                        }
                    }
                    termIndex += termCount;
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(termIndex == totalTerms, "{0} {1}", termIndex, totalTerms);
                }
            }

            // decompress data
            BytesRef suffixBytes = new BytesRef();

            decompressor.Decompress(vectorsStream, totalLen + totalPayloadLength, docOff + payloadOff, docLen + payloadLen, suffixBytes);
            suffixBytes.Length = docLen;
            BytesRef payloadBytes = new BytesRef(suffixBytes.Bytes, suffixBytes.Offset + docLen, payloadLen);

            int[] FieldFlags = new int[numFields];
            for (int i = 0; i < numFields; ++i)
            {
                FieldFlags[i] = (int)flags.Get(skip + i);
            }

            int[] fieldNumTerms = new int[numFields];
            for (int i = 0; i < numFields; ++i)
            {
                fieldNumTerms[i] = (int)numTerms.Get(skip + i);
            }

            int[][] fieldTermFreqs = new int[numFields][];
            {
                int termIdx = 0;
                for (int i = 0; i < skip; ++i)
                {
                    termIdx += (int)numTerms.Get(i);
                }
                for (int i = 0; i < numFields; ++i)
                {
                    int termCount = (int)numTerms.Get(skip + i);
                    fieldTermFreqs[i] = new int[termCount];
                    for (int j = 0; j < termCount; ++j)
                    {
                        fieldTermFreqs[i][j] = termFreqs[termIdx++];
                    }
                }
            }

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(Sum(fieldLengths) == docLen, "{0} != {1}", Sum(fieldLengths), docLen);
            }

            return(new TVFields(this, fieldNums, FieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths, prefixLengths, suffixLengths, fieldTermFreqs, positionIndex, positions, startOffsets, lengths, payloadBytes, payloadIndex, suffixBytes));
        }
Beispiel #29
0
        public override NumericDocValues GetNumeric(FieldInfo field)
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                if (!numericInstances.TryGetValue(field.Number, out NumericDocValues instance))
                {
                    string     fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat");
                    IndexInput input    = dir.OpenInput(fileName, state.Context);
                    bool       success  = false;
                    try
                    {
                        var type = field.GetAttribute(legacyKey).ToLegacyDocValuesType();

                        //switch (Enum.Parse(typeof(LegacyDocValuesType), field.GetAttribute(LegacyKey)))
                        //{
                        if (type == LegacyDocValuesType.VAR_INTS)
                        {
                            instance = LoadVarInt32sField(/* field, // LUCENENET: Never read */ input);
                        }
                        else if (type == LegacyDocValuesType.FIXED_INTS_8)
                        {
                            instance = LoadByteField(/* field, // LUCENENET: Never read */ input);
                        }
                        else if (type == LegacyDocValuesType.FIXED_INTS_16)
                        {
                            instance = LoadInt16Field(/* field, // LUCENENET: Never read */ input);
                        }
                        else if (type == LegacyDocValuesType.FIXED_INTS_32)
                        {
                            instance = LoadInt32Field(/* field, // LUCENENET: Never read */ input);
                        }
                        else if (type == LegacyDocValuesType.FIXED_INTS_64)
                        {
                            instance = LoadInt64Field(/* field, // LUCENENET: Never read */ input);
                        }
                        else if (type == LegacyDocValuesType.FLOAT_32)
                        {
                            instance = LoadSingleField(/* field, // LUCENENET: Never read */ input);
                        }
                        else if (type == LegacyDocValuesType.FLOAT_64)
                        {
                            instance = LoadDoubleField(/* field, // LUCENENET: Never read */ input);
                        }
                        else
                        {
                            throw AssertionError.Create();
                        }

                        CodecUtil.CheckEOF(input);
                        success = true;
                    }
                    finally
                    {
                        if (success)
                        {
                            IOUtils.Dispose(input);
                        }
                        else
                        {
                            IOUtils.DisposeWhileHandlingException(input);
                        }
                    }
                    numericInstances[field.Number] = instance;
                }
                return(instance);
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }
Beispiel #30
0
        public override void WriteField(FieldInfo info, IIndexableField field)
        {
            fieldsStream.WriteVInt32(info.Number);
            int      bits = 0;
            BytesRef bytes;
            string   @string;

            // TODO: maybe a field should serialize itself?
            // this way we don't bake into indexer all these
            // specific encodings for different fields?  and apps
            // can customize...

            // LUCENENET specific - To avoid boxing/unboxing, we don't
            // call GetNumericValue(). Instead, we check the field.NumericType and then
            // call the appropriate conversion method.
            if (field.NumericType != NumericFieldType.NONE)
            {
                switch (field.NumericType)
                {
                case NumericFieldType.BYTE:
                case NumericFieldType.INT16:
                case NumericFieldType.INT32:
                    bits |= FIELD_IS_NUMERIC_INT;
                    break;

                case NumericFieldType.INT64:
                    bits |= FIELD_IS_NUMERIC_LONG;
                    break;

                case NumericFieldType.SINGLE:
                    bits |= FIELD_IS_NUMERIC_FLOAT;
                    break;

                case NumericFieldType.DOUBLE:
                    bits |= FIELD_IS_NUMERIC_DOUBLE;
                    break;

                default:
                    throw new ArgumentException("cannot store numeric type " + field.NumericType);
                }

                @string = null;
                bytes   = null;
            }
            else
            {
                bytes = field.GetBinaryValue();
                if (bytes != null)
                {
                    bits   |= FIELD_IS_BINARY;
                    @string = null;
                }
                else
                {
                    @string = field.GetStringValue();
                    if (@string is null)
                    {
                        throw new ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue");
                    }
                }
            }

            fieldsStream.WriteByte((byte)bits);

            if (bytes != null)
            {
                fieldsStream.WriteVInt32(bytes.Length);
                fieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length);
            }
            else if (@string != null)
            {
                fieldsStream.WriteString(field.GetStringValue());
            }
            else
            {
                switch (field.NumericType)
                {
                case NumericFieldType.BYTE:
                case NumericFieldType.INT16:
                case NumericFieldType.INT32:
                    fieldsStream.WriteInt32(field.GetInt32Value().Value);
                    break;

                case NumericFieldType.INT64:
                    fieldsStream.WriteInt64(field.GetInt64Value().Value);
                    break;

                case NumericFieldType.SINGLE:
                    fieldsStream.WriteInt32(BitConversion.SingleToInt32Bits(field.GetSingleValue().Value));
                    break;

                case NumericFieldType.DOUBLE:
                    fieldsStream.WriteInt64(BitConversion.DoubleToInt64Bits(field.GetDoubleValue().Value));
                    break;

                default:
                    throw AssertionError.Create("Cannot get here");
                }
            }
        }