Ejemplo n.º 1
0
        /// <summary> Save this segment's info.</summary>
        internal void  Write(IndexOutput output)
        {
            output.WriteString(name);
            output.WriteInt(docCount);
            output.WriteLong(delGen);
            output.WriteInt(docStoreOffset);
            if (docStoreOffset != -1)
            {
                output.WriteString(docStoreSegment);
                output.WriteByte((byte)(docStoreIsCompoundFile?1:0));
            }

            output.WriteByte((byte)(hasSingleNormFile?1:0));
            if (normGen == null)
            {
                output.WriteInt(NO);
            }
            else
            {
                output.WriteInt(normGen.Length);
                for (int j = 0; j < normGen.Length; j++)
                {
                    output.WriteLong(normGen[j]);
                }
            }
            output.WriteByte((byte)isCompoundFile);
            output.WriteInt(delCount);
            output.WriteByte((byte)(hasProx?1:0));
            output.WriteStringStringMap(diagnostics);
        }
Ejemplo n.º 2
0
 private void  MergeNorms()
 {
     for (int i = 0; i < fieldInfos.Size(); i++)
     {
         FieldInfo fi = fieldInfos.FieldInfo(i);
         if (fi.isIndexed && !fi.omitNorms)
         {
             IndexOutput output = directory.CreateOutput(segment + ".f" + i);
             try
             {
                 for (int j = 0; j < readers.Count; j++)
                 {
                     IndexReader reader = (IndexReader)readers[j];
                     int         maxDoc = reader.MaxDoc();
                     byte[]      input  = new byte[maxDoc];
                     reader.Norms(fi.name, input, 0);
                     for (int k = 0; k < maxDoc; k++)
                     {
                         if (!reader.IsDeleted(k))
                         {
                             output.WriteByte(input[k]);
                         }
                     }
                 }
             }
             finally
             {
                 output.Close();
             }
         }
     }
 }
Ejemplo n.º 3
0
        internal void  WriteField(FieldInfo fi, IFieldable field)
        {
            fieldsStream.WriteVInt(fi.number);
            byte bits = 0;

            if (field.IsTokenized)
            {
                bits |= FieldsWriter.FIELD_IS_TOKENIZED;
            }
            if (field.IsBinary)
            {
                bits |= FieldsWriter.FIELD_IS_BINARY;
            }

            fieldsStream.WriteByte(bits);

            // compression is disabled for the current field
            if (field.IsBinary)
            {
                byte[] data   = field.GetBinaryValue();
                int    len    = field.BinaryLength;
                int    offset = field.BinaryOffset;

                fieldsStream.WriteVInt(len);
                fieldsStream.WriteBytes(data, offset, len);
            }
            else
            {
                fieldsStream.WriteString(field.StringValue);
            }
        }
Ejemplo n.º 4
0
 public void  Write(IndexOutput output)
 {
     output.WriteVInt(Size());
     for (int i = 0; i < Size(); i++)
     {
         FieldInfo fi   = FieldInfo(i);
         byte      bits = (byte)(0x0);
         if (fi.isIndexed)
         {
             bits |= IS_INDEXED;
         }
         if (fi.storeTermVector)
         {
             bits |= STORE_TERMVECTOR;
         }
         if (fi.storePositionWithTermVector)
         {
             bits |= STORE_POSITIONS_WITH_TERMVECTOR;
         }
         if (fi.storeOffsetWithTermVector)
         {
             bits |= STORE_OFFSET_WITH_TERMVECTOR;
         }
         if (fi.omitNorms)
         {
             bits |= OMIT_NORMS;
         }
         output.WriteString(fi.name);
         output.WriteByte(bits);
     }
 }
Ejemplo n.º 5
0
        private void  Demo_FSIndexInputBug(Directory fsdir, System.String file)
        {
            // Setup the test file - we need more than 1024 bytes
            IndexOutput os = fsdir.CreateOutput(file, null);

            for (int i = 0; i < 2000; i++)
            {
                os.WriteByte((byte)i);
            }
            os.Close();

            IndexInput in_Renamed = fsdir.OpenInput(file, null);

            // This read primes the buffer in IndexInput
            byte b = in_Renamed.ReadByte(null);

            // Close the file
            in_Renamed.Close();

            // ERROR: this call should fail, but succeeds because the buffer
            // is still filled
            b = in_Renamed.ReadByte(null);

            // ERROR: this call should fail, but succeeds for some reason as well
            in_Renamed.Seek(1099, null);

            // OK: this call correctly fails. We are now past the 1024 internal
            // buffer, so an actual IO is attempted, which fails
            Assert.Throws <NullReferenceException>(() => in_Renamed.ReadByte(null), "expected readByte() to throw exception");
        }
Ejemplo n.º 6
0
        private void  MergeNorms()
        {
            byte[]      normBuffer = null;
            IndexOutput output     = null;

            try
            {
                for (int i = 0; i < fieldInfos.Size(); i++)
                {
                    FieldInfo fi = fieldInfos.FieldInfo(i);
                    if (fi.isIndexed && !fi.omitNorms)
                    {
                        if (output == null)
                        {
                            output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION);
                            output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length);
                        }
                        for (int j = 0; j < readers.Count; j++)
                        {
                            IndexReader reader = (IndexReader)readers[j];
                            int         maxDoc = reader.MaxDoc();
                            if (normBuffer == null || normBuffer.Length < maxDoc)
                            {
                                // the buffer is too small for the current segment
                                normBuffer = new byte[maxDoc];
                            }
                            reader.Norms(fi.name, normBuffer, 0);
                            if (!reader.HasDeletions())
                            {
                                //optimized case for segments without deleted docs
                                output.WriteBytes(normBuffer, maxDoc);
                            }
                            else
                            {
                                // this segment has deleted docs, so we have to
                                // check for every doc if it is deleted or not
                                for (int k = 0; k < maxDoc; k++)
                                {
                                    if (!reader.IsDeleted(k))
                                    {
                                        output.WriteByte(normBuffer[k]);
                                    }
                                }
                            }
                            if (checkAbort != null)
                            {
                                checkAbort.Work(maxDoc);
                            }
                        }
                    }
                }
            }
            finally
            {
                if (output != null)
                {
                    output.Close();
                }
            }
        }
Ejemplo n.º 7
0
 /// <summary> Save this segment's info.</summary>
 internal void  Write(IndexOutput output)
 {
     output.WriteString(name);
     output.WriteInt(docCount);
     output.WriteLong(delGen);
     output.WriteByte((byte)(hasSingleNormFile ? 1 : 0));
     if (normGen == null)
     {
         output.WriteInt(-1);
     }
     else
     {
         output.WriteInt(normGen.Length);
         for (int j = 0; j < normGen.Length; j++)
         {
             output.WriteLong(normGen[j]);
         }
     }
     output.WriteByte((byte)isCompoundFile);
 }
Ejemplo n.º 8
0
        /// <summary>Creates a file of the specified size with random data. </summary>
        private void  CreateRandomFile(Directory dir, System.String name, int size)
        {
            IndexOutput os = dir.CreateOutput(name);

            for (int i = 0; i < size; i++)
            {
                byte b = (byte)((new System.Random().NextDouble()) * 256);
                os.WriteByte(b);
            }
            os.Close();
        }
Ejemplo n.º 9
0
        /// <summary>Creates a file of the specified size with sequential data. The first
        /// byte is written as the start byte provided. All subsequent bytes are
        /// computed as start + offset where offset is the number of the byte.
        /// </summary>
        private void  CreateSequenceFile(Directory dir, System.String name, byte start, int size)
        {
            IndexOutput os = dir.CreateOutput(name);

            for (int i = 0; i < size; i++)
            {
                os.WriteByte(start);
                start++;
            }
            os.Close();
        }
Ejemplo n.º 10
0
 private void  WriteNorms(System.String segment)
 {
     for (int n = 0; n < fieldInfos.Size(); n++)
     {
         FieldInfo fi = fieldInfos.FieldInfo(n);
         if (fi.isIndexed && !fi.omitNorms)
         {
             float       norm  = fieldBoosts[n] * similarity.LengthNorm(fi.name, fieldLengths[n]);
             IndexOutput norms = directory.CreateOutput(segment + ".f" + n);
             try
             {
                 norms.WriteByte(Similarity.EncodeNorm(norm));
             }
             finally
             {
                 norms.Close();
             }
         }
     }
 }
Ejemplo n.º 11
0
        /// <summary>Write as a d-gaps list </summary>
        private void  WriteDgaps(IndexOutput output)
        {
            output.WriteInt(-1);              // mark using d-gaps
            output.WriteInt(Size());          // write size
            output.WriteInt(Count());         // write count
            int last = 0;
            int n    = Count();
            int m    = bits.Length;

            for (int i = 0; i < m && n > 0; i++)
            {
                if (bits[i] != 0)
                {
                    output.WriteVInt(i - last);
                    output.WriteByte(bits[i]);
                    last = i;
                    n   -= BYTE_COUNTS[bits[i] & 0xFF];
                }
            }
        }
Ejemplo n.º 12
0
        public void  Write(IndexOutput output)
        {
            output.WriteVInt(CURRENT_FORMAT);
            output.WriteVInt(Size());
            for (int i = 0; i < Size(); i++)
            {
                FieldInfo fi   = FieldInfo(i);
                var       bits = (byte)(0x0);
                if (fi.isIndexed)
                {
                    bits |= IS_INDEXED;
                }
                if (fi.storeTermVector)
                {
                    bits |= STORE_TERMVECTOR;
                }
                if (fi.storePositionWithTermVector)
                {
                    bits |= STORE_POSITIONS_WITH_TERMVECTOR;
                }
                if (fi.storeOffsetWithTermVector)
                {
                    bits |= STORE_OFFSET_WITH_TERMVECTOR;
                }
                if (fi.omitNorms)
                {
                    bits |= OMIT_NORMS;
                }
                if (fi.storePayloads)
                {
                    bits |= STORE_PAYLOADS;
                }
                if (fi.omitTermFreqAndPositions)
                {
                    bits |= OMIT_TERM_FREQ_AND_POSITIONS;
                }

                output.WriteString(fi.name);
                output.WriteByte(bits);
            }
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Save a single segment's info. </summary>
        public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext)
        {
            string fileName = IndexFileNames.SegmentFileName(si.Name, "", Lucene46SegmentInfoFormat.SI_EXTENSION);

            si.AddFile(fileName);

            IndexOutput output = dir.CreateOutput(fileName, ioContext);

            bool success = false;

            try
            {
                CodecUtil.WriteHeader(output, Lucene46SegmentInfoFormat.CODEC_NAME, Lucene46SegmentInfoFormat.VERSION_CURRENT);
                // Write the Lucene version that created this segment, since 3.1
                output.WriteString(si.Version);
                output.WriteInt32(si.DocCount);

                output.WriteByte((byte)(si.UseCompoundFile ? SegmentInfo.YES : SegmentInfo.NO));
                output.WriteStringStringMap(si.Diagnostics);
                output.WriteStringSet(si.GetFiles());
                CodecUtil.WriteFooter(output);
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(output);
                    si.Dir.DeleteFile(fileName);
                }
                else
                {
                    output.Dispose();
                }
            }
        }
Ejemplo n.º 14
0
 /// <summary>Write as a d-gaps list </summary>
 private void WriteDgaps(IndexOutput output)
 {
     output.WriteInt(- 1); // mark using d-gaps
     output.WriteInt(Size()); // write size
     output.WriteInt(Count()); // write count
     int last = 0;
     int n = Count();
     int m = bits.Length;
     for (int i = 0; i < m && n > 0; i++)
     {
         if (bits[i] != 0)
         {
             output.WriteVInt(i - last);
             output.WriteByte(bits[i]);
             last = i;
             n -= BYTE_COUNTS[bits[i] & 0xFF];
         }
     }
 }
Ejemplo n.º 15
0
        internal void  AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;

            foreach (Field field  in doc.Fields())
            {
                if (field.IsStored())
                {
                    storedCount++;
                }
            }
            fieldsStream.WriteVInt(storedCount);

            foreach (Field field in doc.Fields())
            {
                if (field.IsStored())
                {
                    fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name()));

                    byte bits = 0;
                    if (field.IsTokenized())
                    {
                        bits |= FieldsWriter.FIELD_IS_TOKENIZED;
                    }
                    if (field.IsBinary())
                    {
                        bits |= FieldsWriter.FIELD_IS_BINARY;
                    }
                    if (field.IsCompressed())
                    {
                        bits |= FieldsWriter.FIELD_IS_COMPRESSED;
                    }

                    fieldsStream.WriteByte(bits);

                    if (field.IsCompressed())
                    {
                        // compression is enabled for the current field
                        byte[] data = null;
                        // check if it is a binary field
                        if (field.IsBinary())
                        {
                            data = Compress(field.BinaryValue());
                        }
                        else
                        {
                            data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
                        }
                        int len = data.Length;
                        fieldsStream.WriteVInt(len);
                        fieldsStream.WriteBytes(data, len);
                    }
                    else
                    {
                        // compression is disabled for the current field
                        if (field.IsBinary())
                        {
                            byte[] data = field.BinaryValue();
                            int    len  = data.Length;
                            fieldsStream.WriteVInt(len);
                            fieldsStream.WriteBytes(data, len);
                        }
                        else
                        {
                            fieldsStream.WriteString(field.StringValue());
                        }
                    }
                }
            }
        }
Ejemplo n.º 16
0
        internal void  WriteField(FieldInfo fi, Fieldable field)
        {
            // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
            // and field.binaryValue() already returns the compressed value for a field
            // with isCompressed()==true, so we disable compression in that case
            bool disableCompression = (field is FieldsReader.FieldForMerge);

            fieldsStream.WriteVInt(fi.number);
            byte bits = 0;

            if (field.IsTokenized())
            {
                bits |= FieldsWriter.FIELD_IS_TOKENIZED;
            }
            if (field.IsBinary())
            {
                bits |= FieldsWriter.FIELD_IS_BINARY;
            }
            if (field.IsCompressed())
            {
                bits |= FieldsWriter.FIELD_IS_COMPRESSED;
            }

            fieldsStream.WriteByte(bits);

            if (field.IsCompressed())
            {
                // compression is enabled for the current field
                byte[] data;
                int    len;
                int    offset;

                if (disableCompression)
                {
                    // optimized case for merging, the data
                    // is already compressed
                    data = field.GetBinaryValue();
                    System.Diagnostics.Debug.Assert(data != null);
                    len    = field.GetBinaryLength();
                    offset = field.GetBinaryOffset();
                }
                else
                {
                    // check if it is a binary field
                    if (field.IsBinary())
                    {
                        data = Compress(field.GetBinaryValue(), field.GetBinaryOffset(), field.GetBinaryLength());
                    }
                    else
                    {
                        byte[] x = System.Text.Encoding.UTF8.GetBytes(field.StringValue());
                        data = Compress(x, 0, x.Length);
                    }
                    len    = data.Length;
                    offset = 0;
                }

                fieldsStream.WriteVInt(len);
                fieldsStream.WriteBytes(data, offset, len);
            }
            else
            {
                // compression is disabled for the current field
                if (field.IsBinary())
                {
                    int length = field.GetBinaryLength();
                    fieldsStream.WriteVInt(length);
                    fieldsStream.WriteBytes(field.BinaryValue(), field.GetBinaryOffset(), length);
                }
                else
                {
                    fieldsStream.WriteString(field.StringValue());
                }
            }
        }
Ejemplo n.º 17
0
        internal void  AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;

            System.Collections.IEnumerator fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                if (field.IsStored())
                {
                    storedCount++;
                }
            }
            fieldsStream.WriteVInt(storedCount);

            fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
                // and field.binaryValue() already returns the compressed value for a field
                // with isCompressed()==true, so we disable compression in that case
                bool disableCompression = (field is FieldsReader.FieldForMerge);
                if (field.IsStored())
                {
                    fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name()));

                    byte bits = 0;
                    if (field.IsTokenized())
                    {
                        bits |= FieldsWriter.FIELD_IS_TOKENIZED;
                    }
                    if (field.IsBinary())
                    {
                        bits |= FieldsWriter.FIELD_IS_BINARY;
                    }
                    if (field.IsCompressed())
                    {
                        bits |= FieldsWriter.FIELD_IS_COMPRESSED;
                    }

                    fieldsStream.WriteByte(bits);

                    if (field.IsCompressed())
                    {
                        // compression is enabled for the current field
                        byte[] data = null;

                        if (disableCompression)
                        {
                            // optimized case for merging, the data
                            // is already compressed
                            data = field.BinaryValue();
                        }
                        else
                        {
                            // check if it is a binary field
                            if (field.IsBinary())
                            {
                                data = Compress(field.BinaryValue());
                            }
                            else
                            {
                                data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
                            }
                        }
                        int len = data.Length;
                        fieldsStream.WriteVInt(len);
                        fieldsStream.WriteBytes(data, len);
                    }
                    else
                    {
                        // compression is disabled for the current field
                        if (field.IsBinary())
                        {
                            byte[] data = field.BinaryValue();
                            int    len  = data.Length;
                            fieldsStream.WriteVInt(len);
                            fieldsStream.WriteBytes(data, len);
                        }
                        else
                        {
                            fieldsStream.WriteString(field.StringValue());
                        }
                    }
                }
            }
        }
Ejemplo n.º 18
0
        internal void  WriteField(FieldInfo fi, Fieldable field)
        {
            // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
            // and field.binaryValue() already returns the compressed value for a field
            // with isCompressed()==true, so we disable compression in that case
            bool disableCompression = (field is FieldsReader.FieldForMerge);

            fieldsStream.WriteVInt(fi.number);
            byte bits = 0;

            if (field.IsTokenized())
            {
                bits |= FieldsWriter.FIELD_IS_TOKENIZED;
            }
            if (field.IsBinary())
            {
                bits |= FieldsWriter.FIELD_IS_BINARY;
            }
            if (field.IsCompressed())
            {
                bits |= FieldsWriter.FIELD_IS_COMPRESSED;
            }

            fieldsStream.WriteByte(bits);

            if (field.IsCompressed())
            {
                // compression is enabled for the current field
                byte[] data = null;

                if (disableCompression)
                {
                    // optimized case for merging, the data
                    // is already compressed
                    data = field.BinaryValue();
                }
                else
                {
                    // check if it is a binary field
                    if (field.IsBinary())
                    {
                        data = Compress(field.BinaryValue());
                    }
                    else
                    {
                        data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
                    }
                }
                int len = data.Length;
                fieldsStream.WriteVInt(len);
                fieldsStream.WriteBytes(data, len);
            }
            else
            {
                // compression is disabled for the current field
                if (field.IsBinary())
                {
                    byte[] data = field.BinaryValue();
                    int    len  = data.Length;
                    fieldsStream.WriteVInt(len);
                    fieldsStream.WriteBytes(data, len);
                }
                else
                {
                    fieldsStream.WriteString(field.StringValue());
                }
            }
        }
Ejemplo n.º 19
0
 /// <summary>
 /// Write as a d-gaps list </summary>
 private void WriteClearedDgaps(IndexOutput output)
 {
     output.WriteInt(-1); // mark using d-gaps
     output.WriteInt(Size()); // write size
     output.WriteInt(Count()); // write count
     int last = 0;
     int numCleared = Size() - Count();
     for (int i = 0; i < Bits.Length && numCleared > 0; i++)
     {
         if (Bits[i] != unchecked((byte)0xff))
         {
             output.WriteVInt(i - last);
             output.WriteByte(Bits[i]);
             last = i;
             numCleared -= (8 - BitUtil.BitCount(Bits[i]));
             Debug.Assert(numCleared >= 0 || (i == (Bits.Length - 1) && numCleared == -(8 - (Size_Renamed & 7))));
         }
     }
 }
Ejemplo n.º 20
0
 private void AddBytesField(FieldInfo field, IndexOutput output, IEnumerable<long?> values)
 {
     field.PutAttribute(LegacyKey, LegacyDocValuesType.FIXED_INTS_8.Name);
     CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
     output.WriteInt(1); // size
     foreach (long? n in values)
     {
         output.WriteByte(n == null ? (byte)0 : (byte)n);
     }
 }
Ejemplo n.º 21
0
        private void AddVarIntsField(FieldInfo field, IndexOutput output, IEnumerable<long?> values, long minValue, long maxValue)
        {
            field.PutAttribute(LegacyKey, LegacyDocValuesType.VAR_INTS.Name);

            CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT);

            long delta = maxValue - minValue;

            if (delta < 0)
            {
                // writes longs
                output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_FIXED_64);
                foreach (long? n in values)
                {
                    output.WriteLong(n == null ? 0 : n.Value);
                }
            }
            else
            {
                // writes packed ints
                output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_PACKED);
                output.WriteLong(minValue);
                output.WriteLong(0 - minValue); // default value (representation of 0)
                PackedInts.Writer writer = PackedInts.GetWriter(output, State.SegmentInfo.DocCount, PackedInts.BitsRequired(delta), PackedInts.DEFAULT);
                foreach (long? n in values)
                {
                    long v = n == null ? 0 : (long)n;
                    writer.Add(v - minValue);
                }
                writer.Finish();
            }
        }
Ejemplo n.º 22
0
		/// <summary> Save this segment's info.</summary>
		internal void  Write(IndexOutput output)
		{
			output.WriteString(name);
			output.WriteInt(docCount);
			output.WriteLong(delGen);
			output.WriteByte((byte) (hasSingleNormFile ? 1 : 0));
			if (normGen == null)
			{
				output.WriteInt(- 1);
			}
			else
			{
				output.WriteInt(normGen.Length);
				for (int j = 0; j < normGen.Length; j++)
				{
					output.WriteLong(normGen[j]);
				}
			}
			output.WriteByte((byte) isCompoundFile);
		}
        /// <summary>Called once per field per document if term vectors
        /// are enabled, to write the vectors to
        /// RAMOutputStream, which is then quickly flushed to
        /// * the real term vectors files in the Directory.
        /// </summary>
        internal override void  Finish()
        {
            System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.finish start"));

            int numPostings = termsHashPerField.numPostings;

            System.Diagnostics.Debug.Assert(numPostings >= 0);

            if (!doVectors || numPostings == 0)
            {
                return;
            }

            if (numPostings > maxNumPostings)
            {
                maxNumPostings = numPostings;
            }

            IndexOutput tvf = perThread.doc.tvf;

            // This is called once, after inverting all occurences
            // of a given field in the doc.  At this point we flush
            // our hash into the DocWriter.

            System.Diagnostics.Debug.Assert(fieldInfo.storeTermVector);
            System.Diagnostics.Debug.Assert(perThread.VectorFieldsInOrder(fieldInfo));

            perThread.doc.AddField(termsHashPerField.fieldInfo.number);

            RawPostingList[] postings = termsHashPerField.SortPostings();

            tvf.WriteVInt(numPostings);
            byte bits = (byte)(0x0);

            if (doVectorPositions)
            {
                bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
            }
            if (doVectorOffsets)
            {
                bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
            }
            tvf.WriteByte(bits);

            int encoderUpto        = 0;
            int lastTermBytesCount = 0;

            ByteSliceReader reader = perThread.vectorSliceReader;

            char[][] charBuffers = perThread.termsHashPerThread.charPool.buffers;
            for (int j = 0; j < numPostings; j++)
            {
                TermVectorsTermsWriter.PostingList posting = (TermVectorsTermsWriter.PostingList)postings[j];
                int freq = posting.freq;

                char[] text2  = charBuffers[posting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
                int    start2 = posting.textStart & DocumentsWriter.CHAR_BLOCK_MASK;

                // We swap between two encoders to save copying
                // last Term's byte array
                UnicodeUtil.UTF8Result utf8Result = perThread.utf8Results[encoderUpto];

                // TODO: we could do this incrementally
                UnicodeUtil.UTF16toUTF8(text2, start2, utf8Result);
                int termBytesCount = utf8Result.length;

                // TODO: UTF16toUTF8 could tell us this prefix
                // Compute common prefix between last term and
                // this term
                int prefix = 0;
                if (j > 0)
                {
                    byte[] lastTermBytes = perThread.utf8Results[1 - encoderUpto].result;
                    byte[] termBytes     = perThread.utf8Results[encoderUpto].result;
                    while (prefix < lastTermBytesCount && prefix < termBytesCount)
                    {
                        if (lastTermBytes[prefix] != termBytes[prefix])
                        {
                            break;
                        }
                        prefix++;
                    }
                }
                encoderUpto        = 1 - encoderUpto;
                lastTermBytesCount = termBytesCount;

                int suffix = termBytesCount - prefix;
                tvf.WriteVInt(prefix);
                tvf.WriteVInt(suffix);
                tvf.WriteBytes(utf8Result.result, prefix, suffix);
                tvf.WriteVInt(freq);

                if (doVectorPositions)
                {
                    termsHashPerField.InitReader(reader, posting, 0);
                    reader.WriteTo(tvf);
                }

                if (doVectorOffsets)
                {
                    termsHashPerField.InitReader(reader, posting, 1);
                    reader.WriteTo(tvf);
                }
            }

            termsHashPerField.Reset();
            perThread.termsHashPerThread.Reset(false);
        }
Ejemplo n.º 24
0
 // the little vint encoding used for var-deref
 private static void WriteVShort(IndexOutput o, int i)
 {
     Debug.Assert(i >= 0 && i <= short.MaxValue);
     if (i < 128)
     {
         o.WriteByte((byte)(sbyte)i);
     }
     else
     {
         o.WriteByte((byte)unchecked((sbyte)(0x80 | (i >> 8))));
         o.WriteByte((byte)unchecked((sbyte)(i & 0xff)));
     }
 }
Ejemplo n.º 25
0
        /** Produce _X.nrm if any document had a field with norms
         *  not disabled */
        internal override void flush(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state)
        {
            IDictionary <object, object> byField = new Dictionary <object, object>();

            // Typically, each thread will have encountered the same
            // field.  So first we collate by field, ie, all
            // per-thread field instances that correspond to the
            // same FieldInfo
            IEnumerator <KeyValuePair <object, ICollection <object> > > it = threadsAndFields.GetEnumerator();

            while (it.MoveNext())
            {
                KeyValuePair <object, ICollection <object> > entry = it.Current;

                ICollection <object> fields         = entry.Value;
                IEnumerator <object> fieldsIt       = fields.GetEnumerator();
                List <object>        fieldsToRemove = new List <object>(fields.Count);

                while (fieldsIt.MoveNext())
                {
                    NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current;

                    if (perField.upto > 0)
                    {
                        // It has some norms
                        IList <object> l;
                        if (byField.ContainsKey(perField.fieldInfo))
                        {
                            l = (IList <object>)byField[perField.fieldInfo];
                        }
                        else
                        {
                            l = new List <object>();
                            byField[perField.fieldInfo] = l;
                        }
                        //IList<object> l = (IList<object>)byField[perField.fieldInfo];
                        //if (l == null)
                        //{
                        //    l = new List<object>();
                        //    byField[perField.fieldInfo] = l;
                        //}
                        l.Add(perField);
                    }
                    else
                    {
                        // Remove this field since we haven't seen it
                        // since the previous flush
                        fieldsToRemove.Add(perField);
                        //fields.Remove(perField);
                    }
                }
                for (int i = 0; i < fieldsToRemove.Count; i++)
                {
                    fields.Remove(fieldsToRemove[i]);
                }
            }

            string normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;

            state.flushedFiles[normsFileName] = normsFileName;
            IndexOutput normsOut = state.directory.CreateOutput(normsFileName);

            try
            {
                normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);

                int numField = fieldInfos.Size();

                int normCount = 0;

                for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
                {
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);

                    List <object> toMerge;
                    int           upto = 0;
                    if (byField.ContainsKey(fieldInfo))
                    {
                        toMerge = (List <object>)byField[fieldInfo];

                        int numFields = toMerge.Count;

                        normCount++;

                        NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
                        int[] uptos = new int[numFields];

                        for (int j = 0; j < numFields; j++)
                        {
                            fields[j] = (NormsWriterPerField)toMerge[j];
                        }

                        int numLeft = numFields;

                        while (numLeft > 0)
                        {
                            System.Diagnostics.Debug.Assert(uptos[0] < fields[0].docIDs.Length, " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.Length));

                            int minLoc   = 0;
                            int minDocID = fields[0].docIDs[uptos[0]];

                            for (int j = 1; j < numLeft; j++)
                            {
                                int docID = fields[j].docIDs[uptos[j]];
                                if (docID < minDocID)
                                {
                                    minDocID = docID;
                                    minLoc   = j;
                                }
                            }

                            System.Diagnostics.Debug.Assert(minDocID < state.numDocsInRAM);

                            // Fill hole
                            for (; upto < minDocID; upto++)
                            {
                                normsOut.WriteByte(defaultNorm);
                            }

                            normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]);
                            (uptos[minLoc])++;
                            upto++;

                            if (uptos[minLoc] == fields[minLoc].upto)
                            {
                                fields[minLoc].reset();
                                if (minLoc != numLeft - 1)
                                {
                                    fields[minLoc] = fields[numLeft - 1];
                                    uptos[minLoc]  = uptos[numLeft - 1];
                                }
                                numLeft--;
                            }
                        }

                        // Fill final hole with defaultNorm
                        for (; upto < state.numDocsInRAM; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }
                    else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
                    {
                        normCount++;
                        // Fill entire field with default norm:
                        for (; upto < state.numDocsInRAM; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }

                    System.Diagnostics.Debug.Assert(4 + normCount * state.numDocsInRAM == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocsInRAM) + " actual=" + normsOut.GetFilePointer());
                }
            }
            finally
            {
                normsOut.Close();
            }
        }
Ejemplo n.º 26
0
		public void  Write(IndexOutput output)
		{
			output.WriteVInt(CURRENT_FORMAT);
			output.WriteVInt(Size());
			for (int i = 0; i < Size(); i++)
			{
				FieldInfo fi = FieldInfo(i);
				byte bits = (byte) (0x0);
				if (fi.isIndexed)
					bits |= IS_INDEXED;
				if (fi.storeTermVector)
					bits |= STORE_TERMVECTOR;
				if (fi.storePositionWithTermVector)
					bits |= STORE_POSITIONS_WITH_TERMVECTOR;
				if (fi.storeOffsetWithTermVector)
					bits |= STORE_OFFSET_WITH_TERMVECTOR;
				if (fi.omitNorms)
					bits |= OMIT_NORMS;
				if (fi.storePayloads)
					bits |= STORE_PAYLOADS;
				if (fi.omitTermFreqAndPositions)
					bits |= OMIT_TERM_FREQ_AND_POSITIONS;
				
				output.WriteString(fi.name);
				output.WriteByte(bits);
			}
		}
Ejemplo n.º 27
0
		/// <summary> Save this segment's info.</summary>
		internal void  Write(IndexOutput output)
		{
			output.WriteString(name);
			output.WriteInt(docCount);
			output.WriteLong(delGen);
			output.WriteInt(docStoreOffset);
			if (docStoreOffset != - 1)
			{
				output.WriteString(docStoreSegment);
				output.WriteByte((byte) (docStoreIsCompoundFile?1:0));
			}
			
			output.WriteByte((byte) (hasSingleNormFile?1:0));
			if (normGen == null)
			{
				output.WriteInt(NO);
			}
			else
			{
				output.WriteInt(normGen.Length);
				for (int j = 0; j < normGen.Length; j++)
				{
					output.WriteLong(normGen[j]);
				}
			}
			output.WriteByte((byte) isCompoundFile);
			output.WriteInt(delCount);
			output.WriteByte((byte) (hasProx?1:0));
			output.WriteStringStringMap(diagnostics);
		}
Ejemplo n.º 28
0
        /// <summary>Produce _X.nrm if any document had a field with norms
        /// not disabled
        /// </summary>
        public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
        {
            System.Collections.IDictionary byField = new System.Collections.Hashtable();

            // Typically, each thread will have encountered the same
            // field.  So first we collate by field, ie, all
            // per-thread field instances that correspond to the
            // same FieldInfo
            System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
            while (it.MoveNext())
            {
                System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current;

                System.Collections.ICollection fields         = (System.Collections.ICollection)entry.Value;
                System.Collections.IEnumerator fieldsIt       = fields.GetEnumerator();
                System.Collections.ArrayList   fieldsToRemove = new System.Collections.ArrayList();

                while (fieldsIt.MoveNext())
                {
                    NormsWriterPerField perField = (NormsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key;

                    if (perField.upto > 0)
                    {
                        // It has some norms
                        System.Collections.IList l = (System.Collections.IList)byField[perField.fieldInfo];
                        if (l == null)
                        {
                            l = new System.Collections.ArrayList();
                            byField[perField.fieldInfo] = l;
                        }
                        l.Add(perField);
                    }
                    // Remove this field since we haven't seen it
                    // since the previous flush
                    else
                    {
                        fieldsToRemove.Add(perField);
                    }
                }

                System.Collections.Hashtable fieldsHT = (System.Collections.Hashtable)fields;
                for (int i = 0; i < fieldsToRemove.Count; i++)
                {
                    fieldsHT.Remove(fieldsToRemove[i]);
                }
            }

            System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;
            state.flushedFiles[normsFileName] = normsFileName;
            IndexOutput normsOut = state.directory.CreateOutput(normsFileName);

            try
            {
                normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);

                int numField = fieldInfos.Size();

                int normCount = 0;

                for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
                {
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);

                    System.Collections.IList toMerge = (System.Collections.IList)byField[fieldInfo];
                    int upto = 0;
                    if (toMerge != null)
                    {
                        int numFields = toMerge.Count;

                        normCount++;

                        NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
                        int[] uptos = new int[numFields];

                        for (int j = 0; j < numFields; j++)
                        {
                            fields[j] = (NormsWriterPerField)toMerge[j];
                        }

                        int numLeft = numFields;

                        while (numLeft > 0)
                        {
                            System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length));

                            int minLoc   = 0;
                            int minDocID = fields[0].docIDs[uptos[0]];

                            for (int j = 1; j < numLeft; j++)
                            {
                                int docID = fields[j].docIDs[uptos[j]];
                                if (docID < minDocID)
                                {
                                    minDocID = docID;
                                    minLoc   = j;
                                }
                            }

                            System.Diagnostics.Debug.Assert(minDocID < state.numDocs);

                            // Fill hole
                            for (; upto < minDocID; upto++)
                            {
                                normsOut.WriteByte(defaultNorm);
                            }

                            normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]);
                            (uptos[minLoc])++;
                            upto++;

                            if (uptos[minLoc] == fields[minLoc].upto)
                            {
                                fields[minLoc].Reset();
                                if (minLoc != numLeft - 1)
                                {
                                    fields[minLoc] = fields[numLeft - 1];
                                    uptos[minLoc]  = uptos[numLeft - 1];
                                }
                                numLeft--;
                            }
                        }

                        // Fill final hole with defaultNorm
                        for (; upto < state.numDocs; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }
                    else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
                    {
                        normCount++;
                        // Fill entire field with default norm:
                        for (; upto < state.numDocs; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }

                    System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer());
                }
            }
            finally
            {
                normsOut.Close();
            }
        }
Ejemplo n.º 29
0
		/* Used only when writing norms to fill in default norm
		* value into the holes in docID stream for those docs
		* that didn't have this field. */
		internal static void  FillBytes(IndexOutput out_Renamed, byte b, int numBytes)
		{
			for (int i = 0; i < numBytes; i++)
				out_Renamed.WriteByte(b);
		}