/// <summary> Save this segment's info.</summary> internal void Write(IndexOutput output) { output.WriteString(name); output.WriteInt(docCount); output.WriteLong(delGen); output.WriteInt(docStoreOffset); if (docStoreOffset != -1) { output.WriteString(docStoreSegment); output.WriteByte((byte)(docStoreIsCompoundFile?1:0)); } output.WriteByte((byte)(hasSingleNormFile?1:0)); if (normGen == null) { output.WriteInt(NO); } else { output.WriteInt(normGen.Length); for (int j = 0; j < normGen.Length; j++) { output.WriteLong(normGen[j]); } } output.WriteByte((byte)isCompoundFile); output.WriteInt(delCount); output.WriteByte((byte)(hasProx?1:0)); output.WriteStringStringMap(diagnostics); }
private void MergeNorms() { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { IndexOutput output = directory.CreateOutput(segment + ".f" + i); try { for (int j = 0; j < readers.Count; j++) { IndexReader reader = (IndexReader)readers[j]; int maxDoc = reader.MaxDoc(); byte[] input = new byte[maxDoc]; reader.Norms(fi.name, input, 0); for (int k = 0; k < maxDoc; k++) { if (!reader.IsDeleted(k)) { output.WriteByte(input[k]); } } } } finally { output.Close(); } } } }
internal void WriteField(FieldInfo fi, IFieldable field) { fieldsStream.WriteVInt(fi.number); byte bits = 0; if (field.IsTokenized) { bits |= FieldsWriter.FIELD_IS_TOKENIZED; } if (field.IsBinary) { bits |= FieldsWriter.FIELD_IS_BINARY; } fieldsStream.WriteByte(bits); // compression is disabled for the current field if (field.IsBinary) { byte[] data = field.GetBinaryValue(); int len = field.BinaryLength; int offset = field.BinaryOffset; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, offset, len); } else { fieldsStream.WriteString(field.StringValue); } }
public void Write(IndexOutput output) { output.WriteVInt(Size()); for (int i = 0; i < Size(); i++) { FieldInfo fi = FieldInfo(i); byte bits = (byte)(0x0); if (fi.isIndexed) { bits |= IS_INDEXED; } if (fi.storeTermVector) { bits |= STORE_TERMVECTOR; } if (fi.storePositionWithTermVector) { bits |= STORE_POSITIONS_WITH_TERMVECTOR; } if (fi.storeOffsetWithTermVector) { bits |= STORE_OFFSET_WITH_TERMVECTOR; } if (fi.omitNorms) { bits |= OMIT_NORMS; } output.WriteString(fi.name); output.WriteByte(bits); } }
private void Demo_FSIndexInputBug(Directory fsdir, System.String file) { // Setup the test file - we need more than 1024 bytes IndexOutput os = fsdir.CreateOutput(file, null); for (int i = 0; i < 2000; i++) { os.WriteByte((byte)i); } os.Close(); IndexInput in_Renamed = fsdir.OpenInput(file, null); // This read primes the buffer in IndexInput byte b = in_Renamed.ReadByte(null); // Close the file in_Renamed.Close(); // ERROR: this call should fail, but succeeds because the buffer // is still filled b = in_Renamed.ReadByte(null); // ERROR: this call should fail, but succeeds for some reason as well in_Renamed.Seek(1099, null); // OK: this call correctly fails. We are now past the 1024 internal // buffer, so an actual IO is attempted, which fails Assert.Throws <NullReferenceException>(() => in_Renamed.ReadByte(null), "expected readByte() to throw exception"); }
private void MergeNorms() { byte[] normBuffer = null; IndexOutput output = null; try { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { if (output == null) { output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION); output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length); } for (int j = 0; j < readers.Count; j++) { IndexReader reader = (IndexReader)readers[j]; int maxDoc = reader.MaxDoc(); if (normBuffer == null || normBuffer.Length < maxDoc) { // the buffer is too small for the current segment normBuffer = new byte[maxDoc]; } reader.Norms(fi.name, normBuffer, 0); if (!reader.HasDeletions()) { //optimized case for segments without deleted docs output.WriteBytes(normBuffer, maxDoc); } else { // this segment has deleted docs, so we have to // check for every doc if it is deleted or not for (int k = 0; k < maxDoc; k++) { if (!reader.IsDeleted(k)) { output.WriteByte(normBuffer[k]); } } } if (checkAbort != null) { checkAbort.Work(maxDoc); } } } } } finally { if (output != null) { output.Close(); } } }
/// <summary> Save this segment's info.</summary> internal void Write(IndexOutput output) { output.WriteString(name); output.WriteInt(docCount); output.WriteLong(delGen); output.WriteByte((byte)(hasSingleNormFile ? 1 : 0)); if (normGen == null) { output.WriteInt(-1); } else { output.WriteInt(normGen.Length); for (int j = 0; j < normGen.Length; j++) { output.WriteLong(normGen[j]); } } output.WriteByte((byte)isCompoundFile); }
/// <summary>Creates a file of the specified size with random data. </summary> private void CreateRandomFile(Directory dir, System.String name, int size) { IndexOutput os = dir.CreateOutput(name); for (int i = 0; i < size; i++) { byte b = (byte)((new System.Random().NextDouble()) * 256); os.WriteByte(b); } os.Close(); }
/// <summary>Creates a file of the specified size with sequential data. The first /// byte is written as the start byte provided. All subsequent bytes are /// computed as start + offset where offset is the number of the byte. /// </summary> private void CreateSequenceFile(Directory dir, System.String name, byte start, int size) { IndexOutput os = dir.CreateOutput(name); for (int i = 0; i < size; i++) { os.WriteByte(start); start++; } os.Close(); }
private void WriteNorms(System.String segment) { for (int n = 0; n < fieldInfos.Size(); n++) { FieldInfo fi = fieldInfos.FieldInfo(n); if (fi.isIndexed && !fi.omitNorms) { float norm = fieldBoosts[n] * similarity.LengthNorm(fi.name, fieldLengths[n]); IndexOutput norms = directory.CreateOutput(segment + ".f" + n); try { norms.WriteByte(Similarity.EncodeNorm(norm)); } finally { norms.Close(); } } } }
/// <summary>Write as a d-gaps list </summary> private void WriteDgaps(IndexOutput output) { output.WriteInt(-1); // mark using d-gaps output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count int last = 0; int n = Count(); int m = bits.Length; for (int i = 0; i < m && n > 0; i++) { if (bits[i] != 0) { output.WriteVInt(i - last); output.WriteByte(bits[i]); last = i; n -= BYTE_COUNTS[bits[i] & 0xFF]; } } }
public void Write(IndexOutput output) { output.WriteVInt(CURRENT_FORMAT); output.WriteVInt(Size()); for (int i = 0; i < Size(); i++) { FieldInfo fi = FieldInfo(i); var bits = (byte)(0x0); if (fi.isIndexed) { bits |= IS_INDEXED; } if (fi.storeTermVector) { bits |= STORE_TERMVECTOR; } if (fi.storePositionWithTermVector) { bits |= STORE_POSITIONS_WITH_TERMVECTOR; } if (fi.storeOffsetWithTermVector) { bits |= STORE_OFFSET_WITH_TERMVECTOR; } if (fi.omitNorms) { bits |= OMIT_NORMS; } if (fi.storePayloads) { bits |= STORE_PAYLOADS; } if (fi.omitTermFreqAndPositions) { bits |= OMIT_TERM_FREQ_AND_POSITIONS; } output.WriteString(fi.name); output.WriteByte(bits); } }
/// <summary> /// Save a single segment's info. </summary> public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) { string fileName = IndexFileNames.SegmentFileName(si.Name, "", Lucene46SegmentInfoFormat.SI_EXTENSION); si.AddFile(fileName); IndexOutput output = dir.CreateOutput(fileName, ioContext); bool success = false; try { CodecUtil.WriteHeader(output, Lucene46SegmentInfoFormat.CODEC_NAME, Lucene46SegmentInfoFormat.VERSION_CURRENT); // Write the Lucene version that created this segment, since 3.1 output.WriteString(si.Version); output.WriteInt32(si.DocCount); output.WriteByte((byte)(si.UseCompoundFile ? SegmentInfo.YES : SegmentInfo.NO)); output.WriteStringStringMap(si.Diagnostics); output.WriteStringSet(si.GetFiles()); CodecUtil.WriteFooter(output); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(output); si.Dir.DeleteFile(fileName); } else { output.Dispose(); } } }
/// <summary>Write as a d-gaps list </summary> private void WriteDgaps(IndexOutput output) { output.WriteInt(- 1); // mark using d-gaps output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count int last = 0; int n = Count(); int m = bits.Length; for (int i = 0; i < m && n > 0; i++) { if (bits[i] != 0) { output.WriteVInt(i - last); output.WriteByte(bits[i]); last = i; n -= BYTE_COUNTS[bits[i] & 0xFF]; } } }
internal void AddDocument(Document doc) { indexStream.WriteLong(fieldsStream.GetFilePointer()); int storedCount = 0; foreach (Field field in doc.Fields()) { if (field.IsStored()) { storedCount++; } } fieldsStream.WriteVInt(storedCount); foreach (Field field in doc.Fields()) { if (field.IsStored()) { fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name())); byte bits = 0; if (field.IsTokenized()) { bits |= FieldsWriter.FIELD_IS_TOKENIZED; } if (field.IsBinary()) { bits |= FieldsWriter.FIELD_IS_BINARY; } if (field.IsCompressed()) { bits |= FieldsWriter.FIELD_IS_COMPRESSED; } fieldsStream.WriteByte(bits); if (field.IsCompressed()) { // compression is enabled for the current field byte[] data = null; // check if it is a binary field if (field.IsBinary()) { data = Compress(field.BinaryValue()); } else { data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue())); } int len = data.Length; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, len); } else { // compression is disabled for the current field if (field.IsBinary()) { byte[] data = field.BinaryValue(); int len = data.Length; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, len); } else { fieldsStream.WriteString(field.StringValue()); } } } } }
internal void WriteField(FieldInfo fi, Fieldable field) { // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode // and field.binaryValue() already returns the compressed value for a field // with isCompressed()==true, so we disable compression in that case bool disableCompression = (field is FieldsReader.FieldForMerge); fieldsStream.WriteVInt(fi.number); byte bits = 0; if (field.IsTokenized()) { bits |= FieldsWriter.FIELD_IS_TOKENIZED; } if (field.IsBinary()) { bits |= FieldsWriter.FIELD_IS_BINARY; } if (field.IsCompressed()) { bits |= FieldsWriter.FIELD_IS_COMPRESSED; } fieldsStream.WriteByte(bits); if (field.IsCompressed()) { // compression is enabled for the current field byte[] data; int len; int offset; if (disableCompression) { // optimized case for merging, the data // is already compressed data = field.GetBinaryValue(); System.Diagnostics.Debug.Assert(data != null); len = field.GetBinaryLength(); offset = field.GetBinaryOffset(); } else { // check if it is a binary field if (field.IsBinary()) { data = Compress(field.GetBinaryValue(), field.GetBinaryOffset(), field.GetBinaryLength()); } else { byte[] x = System.Text.Encoding.UTF8.GetBytes(field.StringValue()); data = Compress(x, 0, x.Length); } len = data.Length; offset = 0; } fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, offset, len); } else { // compression is disabled for the current field if (field.IsBinary()) { int length = field.GetBinaryLength(); fieldsStream.WriteVInt(length); fieldsStream.WriteBytes(field.BinaryValue(), field.GetBinaryOffset(), length); } else { fieldsStream.WriteString(field.StringValue()); } } }
internal void AddDocument(Document doc) { indexStream.WriteLong(fieldsStream.GetFilePointer()); int storedCount = 0; System.Collections.IEnumerator fieldIterator = doc.GetFields().GetEnumerator(); while (fieldIterator.MoveNext()) { Fieldable field = (Fieldable)fieldIterator.Current; if (field.IsStored()) { storedCount++; } } fieldsStream.WriteVInt(storedCount); fieldIterator = doc.GetFields().GetEnumerator(); while (fieldIterator.MoveNext()) { Fieldable field = (Fieldable)fieldIterator.Current; // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode // and field.binaryValue() already returns the compressed value for a field // with isCompressed()==true, so we disable compression in that case bool disableCompression = (field is FieldsReader.FieldForMerge); if (field.IsStored()) { fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name())); byte bits = 0; if (field.IsTokenized()) { bits |= FieldsWriter.FIELD_IS_TOKENIZED; } if (field.IsBinary()) { bits |= FieldsWriter.FIELD_IS_BINARY; } if (field.IsCompressed()) { bits |= FieldsWriter.FIELD_IS_COMPRESSED; } fieldsStream.WriteByte(bits); if (field.IsCompressed()) { // compression is enabled for the current field byte[] data = null; if (disableCompression) { // optimized case for merging, the data // is already compressed data = field.BinaryValue(); } else { // check if it is a binary field if (field.IsBinary()) { data = Compress(field.BinaryValue()); } else { data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue())); } } int len = data.Length; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, len); } else { // compression is disabled for the current field if (field.IsBinary()) { byte[] data = field.BinaryValue(); int len = data.Length; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, len); } else { fieldsStream.WriteString(field.StringValue()); } } } } }
internal void WriteField(FieldInfo fi, Fieldable field) { // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode // and field.binaryValue() already returns the compressed value for a field // with isCompressed()==true, so we disable compression in that case bool disableCompression = (field is FieldsReader.FieldForMerge); fieldsStream.WriteVInt(fi.number); byte bits = 0; if (field.IsTokenized()) { bits |= FieldsWriter.FIELD_IS_TOKENIZED; } if (field.IsBinary()) { bits |= FieldsWriter.FIELD_IS_BINARY; } if (field.IsCompressed()) { bits |= FieldsWriter.FIELD_IS_COMPRESSED; } fieldsStream.WriteByte(bits); if (field.IsCompressed()) { // compression is enabled for the current field byte[] data = null; if (disableCompression) { // optimized case for merging, the data // is already compressed data = field.BinaryValue(); } else { // check if it is a binary field if (field.IsBinary()) { data = Compress(field.BinaryValue()); } else { data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue())); } } int len = data.Length; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, len); } else { // compression is disabled for the current field if (field.IsBinary()) { byte[] data = field.BinaryValue(); int len = data.Length; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, len); } else { fieldsStream.WriteString(field.StringValue()); } } }
/// <summary> /// Write as a d-gaps list </summary> private void WriteClearedDgaps(IndexOutput output) { output.WriteInt(-1); // mark using d-gaps output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count int last = 0; int numCleared = Size() - Count(); for (int i = 0; i < Bits.Length && numCleared > 0; i++) { if (Bits[i] != unchecked((byte)0xff)) { output.WriteVInt(i - last); output.WriteByte(Bits[i]); last = i; numCleared -= (8 - BitUtil.BitCount(Bits[i])); Debug.Assert(numCleared >= 0 || (i == (Bits.Length - 1) && numCleared == -(8 - (Size_Renamed & 7)))); } } }
private void AddBytesField(FieldInfo field, IndexOutput output, IEnumerable<long?> values) { field.PutAttribute(LegacyKey, LegacyDocValuesType.FIXED_INTS_8.Name); CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_CURRENT); output.WriteInt(1); // size foreach (long? n in values) { output.WriteByte(n == null ? (byte)0 : (byte)n); } }
private void AddVarIntsField(FieldInfo field, IndexOutput output, IEnumerable<long?> values, long minValue, long maxValue) { field.PutAttribute(LegacyKey, LegacyDocValuesType.VAR_INTS.Name); CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT); long delta = maxValue - minValue; if (delta < 0) { // writes longs output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_FIXED_64); foreach (long? n in values) { output.WriteLong(n == null ? 0 : n.Value); } } else { // writes packed ints output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_PACKED); output.WriteLong(minValue); output.WriteLong(0 - minValue); // default value (representation of 0) PackedInts.Writer writer = PackedInts.GetWriter(output, State.SegmentInfo.DocCount, PackedInts.BitsRequired(delta), PackedInts.DEFAULT); foreach (long? n in values) { long v = n == null ? 0 : (long)n; writer.Add(v - minValue); } writer.Finish(); } }
/// <summary> Save this segment's info.</summary> internal void Write(IndexOutput output) { output.WriteString(name); output.WriteInt(docCount); output.WriteLong(delGen); output.WriteByte((byte) (hasSingleNormFile ? 1 : 0)); if (normGen == null) { output.WriteInt(- 1); } else { output.WriteInt(normGen.Length); for (int j = 0; j < normGen.Length; j++) { output.WriteLong(normGen[j]); } } output.WriteByte((byte) isCompoundFile); }
/// <summary>Called once per field per document if term vectors /// are enabled, to write the vectors to /// RAMOutputStream, which is then quickly flushed to /// * the real term vectors files in the Directory. /// </summary> internal override void Finish() { System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.finish start")); int numPostings = termsHashPerField.numPostings; System.Diagnostics.Debug.Assert(numPostings >= 0); if (!doVectors || numPostings == 0) { return; } if (numPostings > maxNumPostings) { maxNumPostings = numPostings; } IndexOutput tvf = perThread.doc.tvf; // This is called once, after inverting all occurences // of a given field in the doc. At this point we flush // our hash into the DocWriter. System.Diagnostics.Debug.Assert(fieldInfo.storeTermVector); System.Diagnostics.Debug.Assert(perThread.VectorFieldsInOrder(fieldInfo)); perThread.doc.AddField(termsHashPerField.fieldInfo.number); RawPostingList[] postings = termsHashPerField.SortPostings(); tvf.WriteVInt(numPostings); byte bits = (byte)(0x0); if (doVectorPositions) { bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; } if (doVectorOffsets) { bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; } tvf.WriteByte(bits); int encoderUpto = 0; int lastTermBytesCount = 0; ByteSliceReader reader = perThread.vectorSliceReader; char[][] charBuffers = perThread.termsHashPerThread.charPool.buffers; for (int j = 0; j < numPostings; j++) { TermVectorsTermsWriter.PostingList posting = (TermVectorsTermsWriter.PostingList)postings[j]; int freq = posting.freq; char[] text2 = charBuffers[posting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int start2 = posting.textStart & DocumentsWriter.CHAR_BLOCK_MASK; // We swap between two encoders to save copying // last Term's byte array UnicodeUtil.UTF8Result utf8Result = perThread.utf8Results[encoderUpto]; // TODO: we could do this incrementally UnicodeUtil.UTF16toUTF8(text2, start2, utf8Result); int termBytesCount = utf8Result.length; // TODO: UTF16toUTF8 could tell us this prefix // Compute common prefix between last term and // this term int prefix = 0; if (j > 0) { byte[] lastTermBytes = perThread.utf8Results[1 - encoderUpto].result; byte[] termBytes = perThread.utf8Results[encoderUpto].result; while (prefix < lastTermBytesCount && prefix < termBytesCount) { if (lastTermBytes[prefix] != termBytes[prefix]) { break; } prefix++; } } encoderUpto = 1 - encoderUpto; lastTermBytesCount = termBytesCount; int suffix = termBytesCount - prefix; tvf.WriteVInt(prefix); tvf.WriteVInt(suffix); tvf.WriteBytes(utf8Result.result, prefix, suffix); tvf.WriteVInt(freq); if (doVectorPositions) { termsHashPerField.InitReader(reader, posting, 0); reader.WriteTo(tvf); } if (doVectorOffsets) { termsHashPerField.InitReader(reader, posting, 1); reader.WriteTo(tvf); } } termsHashPerField.Reset(); perThread.termsHashPerThread.Reset(false); }
// the little vint encoding used for var-deref private static void WriteVShort(IndexOutput o, int i) { Debug.Assert(i >= 0 && i <= short.MaxValue); if (i < 128) { o.WriteByte((byte)(sbyte)i); } else { o.WriteByte((byte)unchecked((sbyte)(0x80 | (i >> 8)))); o.WriteByte((byte)unchecked((sbyte)(i & 0xff))); } }
/** Produce _X.nrm if any document had a field with norms * not disabled */ internal override void flush(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state) { IDictionary <object, object> byField = new Dictionary <object, object>(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo IEnumerator <KeyValuePair <object, ICollection <object> > > it = threadsAndFields.GetEnumerator(); while (it.MoveNext()) { KeyValuePair <object, ICollection <object> > entry = it.Current; ICollection <object> fields = entry.Value; IEnumerator <object> fieldsIt = fields.GetEnumerator(); List <object> fieldsToRemove = new List <object>(fields.Count); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current; if (perField.upto > 0) { // It has some norms IList <object> l; if (byField.ContainsKey(perField.fieldInfo)) { l = (IList <object>)byField[perField.fieldInfo]; } else { l = new List <object>(); byField[perField.fieldInfo] = l; } //IList<object> l = (IList<object>)byField[perField.fieldInfo]; //if (l == null) //{ // l = new List<object>(); // byField[perField.fieldInfo] = l; //} l.Add(perField); } else { // Remove this field since we haven't seen it // since the previous flush fieldsToRemove.Add(perField); //fields.Remove(perField); } } for (int i = 0; i < fieldsToRemove.Count; i++) { fields.Remove(fieldsToRemove[i]); } } string normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles[normsFileName] = normsFileName; IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); List <object> toMerge; int upto = 0; if (byField.ContainsKey(fieldInfo)) { toMerge = (List <object>)byField[fieldInfo]; int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) { fields[j] = (NormsWriterPerField)toMerge[j]; } int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos[0] < fields[0].docIDs.Length, " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocsInRAM); // Fill hole for (; upto < minDocID; upto++) { normsOut.WriteByte(defaultNorm); } normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocsInRAM; upto++) { normsOut.WriteByte(defaultNorm); } } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocsInRAM; upto++) { normsOut.WriteByte(defaultNorm); } } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocsInRAM == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocsInRAM) + " actual=" + normsOut.GetFilePointer()); } } finally { normsOut.Close(); } }
public void Write(IndexOutput output) { output.WriteVInt(CURRENT_FORMAT); output.WriteVInt(Size()); for (int i = 0; i < Size(); i++) { FieldInfo fi = FieldInfo(i); byte bits = (byte) (0x0); if (fi.isIndexed) bits |= IS_INDEXED; if (fi.storeTermVector) bits |= STORE_TERMVECTOR; if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR; if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; if (fi.omitNorms) bits |= OMIT_NORMS; if (fi.storePayloads) bits |= STORE_PAYLOADS; if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS; output.WriteString(fi.name); output.WriteByte(bits); } }
/// <summary> Save this segment's info.</summary> internal void Write(IndexOutput output) { output.WriteString(name); output.WriteInt(docCount); output.WriteLong(delGen); output.WriteInt(docStoreOffset); if (docStoreOffset != - 1) { output.WriteString(docStoreSegment); output.WriteByte((byte) (docStoreIsCompoundFile?1:0)); } output.WriteByte((byte) (hasSingleNormFile?1:0)); if (normGen == null) { output.WriteInt(NO); } else { output.WriteInt(normGen.Length); for (int j = 0; j < normGen.Length; j++) { output.WriteLong(normGen[j]); } } output.WriteByte((byte) isCompoundFile); output.WriteInt(delCount); output.WriteByte((byte) (hasProx?1:0)); output.WriteStringStringMap(diagnostics); }
/// <summary>Produce _X.nrm if any document had a field with norms /// not disabled /// </summary> public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary byField = new System.Collections.Hashtable(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.ArrayList fieldsToRemove = new System.Collections.ArrayList(); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key; if (perField.upto > 0) { // It has some norms System.Collections.IList l = (System.Collections.IList)byField[perField.fieldInfo]; if (l == null) { l = new System.Collections.ArrayList(); byField[perField.fieldInfo] = l; } l.Add(perField); } // Remove this field since we haven't seen it // since the previous flush else { fieldsToRemove.Add(perField); } } System.Collections.Hashtable fieldsHT = (System.Collections.Hashtable)fields; for (int i = 0; i < fieldsToRemove.Count; i++) { fieldsHT.Remove(fieldsToRemove[i]); } } System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles[normsFileName] = normsFileName; IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); System.Collections.IList toMerge = (System.Collections.IList)byField[fieldInfo]; int upto = 0; if (toMerge != null) { int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) { fields[j] = (NormsWriterPerField)toMerge[j]; } int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocs); // Fill hole for (; upto < minDocID; upto++) { normsOut.WriteByte(defaultNorm); } normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].Reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer()); } } finally { normsOut.Close(); } }
/* Used only when writing norms to fill in default norm * value into the holes in docID stream for those docs * that didn't have this field. */ internal static void FillBytes(IndexOutput out_Renamed, byte b, int numBytes) { for (int i = 0; i < numBytes; i++) out_Renamed.WriteByte(b); }