コード例 #1
0
        public virtual void TestDataInputOutput()
        {
            Random random = Random;

            for (int iter = 0; iter < 5 * RandomMultiplier; iter++)
            {
                BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("testOverflow"));
                if (dir is MockDirectoryWrapper)
                {
                    ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
                }
                int         blockBits = TestUtil.NextInt32(random, 1, 20);
                int         blockSize = 1 << blockBits;
                PagedBytes  p         = new PagedBytes(blockBits);
                IndexOutput @out      = dir.CreateOutput("foo", IOContext.DEFAULT);
                int         numBytes  = TestUtil.NextInt32(LuceneTestCase.Random, 2, 10000000);

                byte[] answer = new byte[numBytes];
                LuceneTestCase.Random.NextBytes(answer);
                int written = 0;
                while (written < numBytes)
                {
                    if (LuceneTestCase.Random.Next(10) == 7)
                    {
                        @out.WriteByte(answer[written++]);
                    }
                    else
                    {
                        int chunk = Math.Min(LuceneTestCase.Random.Next(1000), numBytes - written);
                        @out.WriteBytes(answer, written, chunk);
                        written += chunk;
                    }
                }

                @out.Dispose();
                IndexInput input = dir.OpenInput("foo", IOContext.DEFAULT);
                DataInput  @in   = (DataInput)input.Clone();

                p.Copy(input, input.Length);
                PagedBytes.Reader reader = p.Freeze(random.NextBoolean());

                byte[] verify = new byte[numBytes];
                int    read   = 0;
                while (read < numBytes)
                {
                    if (LuceneTestCase.Random.Next(10) == 7)
                    {
                        verify[read++] = @in.ReadByte();
                    }
                    else
                    {
                        int chunk = Math.Min(LuceneTestCase.Random.Next(1000), numBytes - read);
                        @in.ReadBytes(verify, read, chunk);
                        read += chunk;
                    }
                }
                Assert.IsTrue(Arrays.Equals(answer, verify));

                BytesRef slice = new BytesRef();
                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    int pos = random.Next(numBytes - 1);
                    int len = random.Next(Math.Min(blockSize + 1, numBytes - pos));
                    reader.FillSlice(slice, pos, len);
                    for (int byteUpto = 0; byteUpto < len; byteUpto++)
                    {
                        Assert.AreEqual(answer[pos + byteUpto], (byte)slice.Bytes[slice.Offset + byteUpto]);
                    }
                }
                input.Dispose();
                dir.Dispose();
            }
        }
コード例 #2
0
        public override void AddNumericField(FieldInfo field, IEnumerable <long?> values)
        {
            meta.WriteVInt32(field.Number);
            meta.WriteByte((byte)NUMBER);
            meta.WriteInt64(data.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd      = 0;

            // TODO: more efficient?
            JCG.HashSet <long> uniqueValues /* = null*/; // LUCENENET: IDE0059: Remove unnecessary value assignment
            if (true)
            {
                uniqueValues = new JCG.HashSet <long>();

                long count = 0;
                foreach (long?nv in values)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(nv != null);
                    }
                    long v = nv.Value;

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(count == maxDoc);
                }
            }

            if (uniqueValues != null)
            {
                // small number of unique values
                int           bitsPerValue  = PackedInt32s.BitsRequired(uniqueValues.Count - 1);
                FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio);
                if (formatAndBits.BitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue)
                {
                    meta.WriteByte((byte)UNCOMPRESSED); // uncompressed
                    foreach (long?nv in values)
                    {
                        data.WriteByte((byte)nv.GetValueOrDefault());
                    }
                }
                else
                {
                    meta.WriteByte((byte)TABLE_COMPRESSED); // table-compressed
                    var decode = new long[uniqueValues.Count];
                    uniqueValues.CopyTo(decode);
                    var encode = new Dictionary <long, int>();
                    data.WriteVInt32(decode.Length);
                    for (int i = 0; i < decode.Length; i++)
                    {
                        data.WriteInt64(decode[i]);
                        encode[decode[i]] = i;
                    }

                    meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                    data.WriteVInt32(formatAndBits.Format.Id);
                    data.WriteVInt32(formatAndBits.BitsPerValue);

                    PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(data, formatAndBits.Format, maxDoc, formatAndBits.BitsPerValue, PackedInt32s.DEFAULT_BUFFER_SIZE);
                    foreach (long?nv in values)
                    {
                        writer.Add(encode[nv.GetValueOrDefault()]);
                    }
                    writer.Finish();
                }
            }
            else if (gcd != 0 && gcd != 1)
            {
                meta.WriteByte((byte)GCD_COMPRESSED);
                meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                data.WriteInt64(minValue);
                data.WriteInt64(gcd);
                data.WriteVInt32(BLOCK_SIZE);

                var writer = new BlockPackedWriter(data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    writer.Add((nv.GetValueOrDefault() - minValue) / gcd);
                }
                writer.Finish();
            }
            else
            {
                meta.WriteByte((byte)DELTA_COMPRESSED); // delta-compressed

                meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                data.WriteVInt32(BLOCK_SIZE);

                var writer = new BlockPackedWriter(data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    writer.Add(nv.GetValueOrDefault());
                }
                writer.Finish();
            }
        }
コード例 #3
0
 public override void WriteByte(byte b)
 {
     _indexOutput.WriteByte(b);
 }
コード例 #4
0
        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context)
        {
            string      fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
            IndexOutput output   = directory.CreateOutput(fileName, context);
            bool        success  = false;

            try
            {
                CodecUtil.WriteHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
                output.WriteVInt32(infos.Count);
                foreach (FieldInfo fi in infos)
                {
                    IndexOptions indexOptions = fi.IndexOptions;
                    sbyte        bits         = 0x0;
                    if (fi.HasVectors)
                    {
                        bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
                    }
                    if (fi.OmitsNorms)
                    {
                        bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
                    }
                    if (fi.HasPayloads)
                    {
                        bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
                    }
                    if (fi.IsIndexed)
                    {
                        bits |= Lucene46FieldInfosFormat.IS_INDEXED;
                        Debug.Assert(indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads);
                        if (indexOptions == IndexOptions.DOCS_ONLY)
                        {
                            bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
                        }
                        else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                        {
                            bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
                        }
                        else if (indexOptions == IndexOptions.DOCS_AND_FREQS)
                        {
                            bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
                        }
                    }
                    output.WriteString(fi.Name);
                    output.WriteVInt32(fi.Number);
                    output.WriteByte((byte)bits);

                    // pack the DV types in one byte
                    var dv  = DocValuesByte(fi.DocValuesType);
                    var nrm = DocValuesByte(fi.NormType);
                    Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0);
                    var val = (byte)(0xff & ((nrm << 4) | (byte)dv));
                    output.WriteByte(val);
                    output.WriteInt64(fi.DocValuesGen);
                    output.WriteStringStringMap(fi.Attributes);
                }
                CodecUtil.WriteFooter(output);
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(output);
                }
            }
        }
コード例 #5
0
        public override void WriteField(FieldInfo info, IIndexableField field)
        {
            fieldsStream.WriteVInt32(info.Number);
            int      bits = 0;
            BytesRef bytes;
            string   @string;

            // TODO: maybe a field should serialize itself?
            // this way we don't bake into indexer all these
            // specific encodings for different fields?  and apps
            // can customize...

            // LUCENENET specific - To avoid boxing/unboxing, we don't
            // call GetNumericValue(). Instead, we check the field.NumericType and then
            // call the appropriate conversion method.
            if (field.NumericType != NumericFieldType.NONE)
            {
                switch (field.NumericType)
                {
                case NumericFieldType.BYTE:
                case NumericFieldType.INT16:
                case NumericFieldType.INT32:
                    bits |= FIELD_IS_NUMERIC_INT;
                    break;

                case NumericFieldType.INT64:
                    bits |= FIELD_IS_NUMERIC_LONG;
                    break;

                case NumericFieldType.SINGLE:
                    bits |= FIELD_IS_NUMERIC_FLOAT;
                    break;

                case NumericFieldType.DOUBLE:
                    bits |= FIELD_IS_NUMERIC_DOUBLE;
                    break;

                default:
                    throw new ArgumentException("cannot store numeric type " + field.NumericType);
                }

                @string = null;
                bytes   = null;
            }
            else
            {
                bytes = field.GetBinaryValue();
                if (bytes != null)
                {
                    bits   |= FIELD_IS_BINARY;
                    @string = null;
                }
                else
                {
                    @string = field.GetStringValue();
                    if (@string is null)
                    {
                        throw new ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue");
                    }
                }
            }

            fieldsStream.WriteByte((byte)bits);

            if (bytes != null)
            {
                fieldsStream.WriteVInt32(bytes.Length);
                fieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length);
            }
            else if (@string != null)
            {
                fieldsStream.WriteString(field.GetStringValue());
            }
            else
            {
                switch (field.NumericType)
                {
                case NumericFieldType.BYTE:
                case NumericFieldType.INT16:
                case NumericFieldType.INT32:
                    fieldsStream.WriteInt32(field.GetInt32Value().Value);
                    break;

                case NumericFieldType.INT64:
                    fieldsStream.WriteInt64(field.GetInt64Value().Value);
                    break;

                case NumericFieldType.SINGLE:
                    fieldsStream.WriteInt32(BitConversion.SingleToInt32Bits(field.GetSingleValue().Value));
                    break;

                case NumericFieldType.DOUBLE:
                    fieldsStream.WriteInt64(BitConversion.DoubleToInt64Bits(field.GetDoubleValue().Value));
                    break;

                default:
                    throw AssertionError.Create("Cannot get here");
                }
            }
        }
コード例 #6
0
 public override void WriteByte(byte b)
 {
     _cacheDirIndexOutput.WriteByte(b);
 }
コード例 #7
0
        public override void AddBinaryField(FieldInfo field, IEnumerable <BytesRef> values)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(FieldSeen(field.Name));
                Debugging.Assert(field.DocValuesType == DocValuesType.BINARY);
            }

            var maxLength = 0;

            foreach (var value in values)
            {
                var length = value == null ? 0 : value.Length;
                maxLength = Math.Max(maxLength, length);
            }
            WriteFieldEntry(field, DocValuesType.BINARY);

            // write maxLength
            SimpleTextUtil.Write(data, MAXLENGTH);
            SimpleTextUtil.Write(data, maxLength.ToString(CultureInfo.InvariantCulture), scratch);
            SimpleTextUtil.WriteNewline(data);

            var maxBytesLength = maxLength.ToString(CultureInfo.InvariantCulture).Length;
            var sb             = new StringBuilder();

            for (var i = 0; i < maxBytesLength; i++)
            {
                sb.Append('0');
            }
            // write our pattern for encoding lengths
            var patternString = sb.ToString();

            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, patternString, scratch);
            SimpleTextUtil.WriteNewline(data);


            int numDocsWritten = 0;

            foreach (BytesRef value in values)
            {
                int length = value == null ? 0 : value.Length;
                SimpleTextUtil.Write(data, LENGTH);
                SimpleTextUtil.Write(data, length.ToString(patternString, CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(data);

                // write bytes -- don't use SimpleText.Write
                // because it escapes:
                if (value != null)
                {
                    data.WriteBytes(value.Bytes, value.Offset, value.Length);
                }

                // pad to fit
                for (int i = length; i < maxLength; i++)
                {
                    data.WriteByte((byte)(sbyte)' ');
                }
                SimpleTextUtil.WriteNewline(data);
                SimpleTextUtil.Write(data, value == null ? "F" : "T", scratch);
                SimpleTextUtil.WriteNewline(data);
                numDocsWritten++;
            }

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(numDocs == numDocsWritten);
            }
        }
コード例 #8
0
        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context)
        {
            string      fileName = IndexFileNames.SegmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
            IndexOutput output   = directory.CreateOutput(fileName, context);
            bool        success  = false;

            try
            {
                output.WriteVInt32(FORMAT_PREFLEX_RW);
                output.WriteVInt32(infos.Count);
                foreach (FieldInfo fi in infos)
                {
                    sbyte bits = 0x0;
                    if (fi.HasVectors)
                    {
                        bits |= STORE_TERMVECTOR;
                    }
                    if (fi.OmitsNorms)
                    {
                        bits |= OMIT_NORMS;
                    }
                    if (fi.HasPayloads)
                    {
                        bits |= STORE_PAYLOADS;
                    }
                    if (fi.IsIndexed)
                    {
                        bits |= IS_INDEXED;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(fi.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads);
                        }
                        if (fi.IndexOptions == IndexOptions.DOCS_ONLY)
                        {
                            bits |= OMIT_TERM_FREQ_AND_POSITIONS;
                        }
                        else if (fi.IndexOptions == IndexOptions.DOCS_AND_FREQS)
                        {
                            bits |= OMIT_POSITIONS;
                        }
                    }
                    output.WriteString(fi.Name);

                    /*
                     * we need to write the field number since IW tries
                     * to stabelize the field numbers across segments so the
                     * FI ordinal is not necessarily equivalent to the field number
                     */
                    output.WriteInt32(fi.Number);
                    output.WriteByte((byte)bits);
                    if (fi.IsIndexed && !fi.OmitsNorms)
                    {
                        // to allow null norm types we need to indicate if norms are written
                        // only in RW case
                        output.WriteByte((byte)(sbyte)(fi.NormType == Index.DocValuesType.NONE ? 0 : 1));
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(fi.Attributes == null);                           // not used or supported
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(output);
                }
            }
        }
コード例 #9
0
        public override void WriteField(FieldInfo info, IIndexableField field)
        {
            fieldsStream.WriteVInt32(info.Number);
            int      bits = 0;
            BytesRef bytes;
            string   @string;
            // TODO: maybe a field should serialize itself?
            // this way we don't bake into indexer all these
            // specific encodings for different fields?  and apps
            // can customize...

            object number = (object)field.GetNumericValue();

            if (number != null)
            {
                if (number is sbyte || number is short || number is int)
                {
                    bits |= FIELD_IS_NUMERIC_INT;
                }
                else if (number is long)
                {
                    bits |= FIELD_IS_NUMERIC_LONG;
                }
                else if (number is float)
                {
                    bits |= FIELD_IS_NUMERIC_FLOAT;
                }
                else if (number is double)
                {
                    bits |= FIELD_IS_NUMERIC_DOUBLE;
                }
                else
                {
                    throw new System.ArgumentException("cannot store numeric type " + number.GetType());
                }
                @string = null;
                bytes   = null;
            }
            else
            {
                bytes = field.GetBinaryValue();
                if (bytes != null)
                {
                    bits   |= FIELD_IS_BINARY;
                    @string = null;
                }
                else
                {
                    @string = field.GetStringValue();
                    if (@string == null)
                    {
                        throw new System.ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue");
                    }
                }
            }

            fieldsStream.WriteByte((byte)(sbyte)bits);

            if (bytes != null)
            {
                fieldsStream.WriteVInt32(bytes.Length);
                fieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length);
            }
            else if (@string != null)
            {
                fieldsStream.WriteString(field.GetStringValue());
            }
            else
            {
                if (number is sbyte || number is short || number is int)
                {
                    fieldsStream.WriteInt32((int)number);
                }
                else if (number is long)
                {
                    fieldsStream.WriteInt64((long)number);
                }
                else if (number is float)
                {
                    fieldsStream.WriteInt32(Number.SingleToInt32Bits((float)number));
                }
                else if (number is double)
                {
                    fieldsStream.WriteInt64(BitConverter.DoubleToInt64Bits((double)number));
                }
                else
                {
                    throw new InvalidOperationException("Cannot get here");
                }
            }
        }
コード例 #10
0
        /// <summary>Called once per field per document if term vectors
        /// are enabled, to write the vectors to
        /// RAMOutputStream, which is then quickly flushed to
        /// the real term vectors files in the Directory.
        /// </summary>
        internal override void  Finish()
        {
            System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.finish start"));

            int numPostings = termsHashPerField.numPostings;

            System.Diagnostics.Debug.Assert(numPostings >= 0);

            if (!doVectors || numPostings == 0)
            {
                return;
            }

            if (numPostings > maxNumPostings)
            {
                maxNumPostings = numPostings;
            }

            IndexOutput tvf = perThread.doc.perDocTvf;

            // This is called once, after inverting all occurences
            // of a given field in the doc.  At this point we flush
            // our hash into the DocWriter.

            System.Diagnostics.Debug.Assert(fieldInfo.storeTermVector);
            System.Diagnostics.Debug.Assert(perThread.VectorFieldsInOrder(fieldInfo));

            perThread.doc.AddField(termsHashPerField.fieldInfo.number);

            RawPostingList[] postings = termsHashPerField.SortPostings();

            tvf.WriteVInt(numPostings);
            byte bits = (byte)(0x0);

            if (doVectorPositions)
            {
                bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
            }
            if (doVectorOffsets)
            {
                bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
            }
            tvf.WriteByte(bits);

            int encoderUpto        = 0;
            int lastTermBytesCount = 0;

            ByteSliceReader reader = perThread.vectorSliceReader;

            char[][] charBuffers = perThread.termsHashPerThread.charPool.buffers;
            for (int j = 0; j < numPostings; j++)
            {
                TermVectorsTermsWriter.PostingList posting = (TermVectorsTermsWriter.PostingList)postings[j];
                int freq = posting.freq;

                char[] text2  = charBuffers[posting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
                int    start2 = posting.textStart & DocumentsWriter.CHAR_BLOCK_MASK;

                // We swap between two encoders to save copying
                // last Term's byte array
                UnicodeUtil.UTF8Result utf8Result = perThread.utf8Results[encoderUpto];

                // TODO: we could do this incrementally
                UnicodeUtil.UTF16toUTF8(text2, start2, utf8Result);
                int termBytesCount = utf8Result.length;

                // TODO: UTF16toUTF8 could tell us this prefix
                // Compute common prefix between last term and
                // this term
                int prefix = 0;
                if (j > 0)
                {
                    byte[] lastTermBytes = perThread.utf8Results[1 - encoderUpto].result;
                    byte[] termBytes     = perThread.utf8Results[encoderUpto].result;
                    while (prefix < lastTermBytesCount && prefix < termBytesCount)
                    {
                        if (lastTermBytes[prefix] != termBytes[prefix])
                        {
                            break;
                        }
                        prefix++;
                    }
                }
                encoderUpto        = 1 - encoderUpto;
                lastTermBytesCount = termBytesCount;

                int suffix = termBytesCount - prefix;
                tvf.WriteVInt(prefix);
                tvf.WriteVInt(suffix);
                tvf.WriteBytes(utf8Result.result, prefix, suffix);
                tvf.WriteVInt(freq);

                if (doVectorPositions)
                {
                    termsHashPerField.InitReader(reader, posting, 0);
                    reader.WriteTo(tvf);
                }

                if (doVectorOffsets)
                {
                    termsHashPerField.InitReader(reader, posting, 1);
                    reader.WriteTo(tvf);
                }
            }

            termsHashPerField.Reset();

            // NOTE: we clear, per-field, at the thread level,
            // because term vectors fully write themselves on each
            // field; this saves RAM (eg if large doc has two large
            // fields w/ term vectors on) because we recycle/reuse
            // all RAM after each field:
            perThread.termsHashPerThread.Reset(false);
        }
コード例 #11
0
        public override void WriteByte(byte b)
        {
            cacheOutput.WriteByte(b);

            isWritten = true;
        }
コード例 #12
0
 public override void WriteByte(byte b)
 {
     _digest.Update(b);
     _indexOutput.WriteByte(b);
 }