/// <summary> /// Called to complete TermInfos creation. </summary> public void Dispose() { try { Output.Seek(4); // write size after format Output.WriteInt64(Size); } finally { try { Output.Dispose(); } finally { if (!IsIndex) { Other.Dispose(); } } } }
/// <summary> /// Called to complete TermInfos creation. </summary> public void Dispose() { try { output.Seek(4); // write size after format output.WriteInt64(size); } finally { try { output.Dispose(); } finally { if (!isIndex) { other.Dispose(); } } } }
private void Initialize(Directory directory, string segment, FieldInfos fis, int interval, bool isi) { indexInterval = interval; fieldInfos = fis; isIndex = isi; output = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", (isIndex ? Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION : Lucene3xPostingsFormat.TERMS_EXTENSION)), IOContext.DEFAULT); bool success = false; try { output.WriteInt32(FORMAT_CURRENT); // write format output.WriteInt64(0); // leave space for size output.WriteInt32(indexInterval); // write indexInterval output.WriteInt32(skipInterval); // write skipInterval output.WriteInt32(maxSkipLevels); // write maxSkipLevels if (Debugging.AssertsEnabled) { Debugging.Assert(InitUTF16Results()); } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(output); try { directory.DeleteFile(IndexFileNames.SegmentFileName(segment, "", (isIndex ? Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION : Lucene3xPostingsFormat.TERMS_EXTENSION))); } #pragma warning disable 168 catch (IOException ignored) #pragma warning restore 168 { } } } }
public override FieldsConsumer FieldsConsumer(SegmentWriteState state) { int minSkipInterval; if (state.SegmentInfo.DocCount > 1000000) { // Test2BPostings can OOME otherwise: minSkipInterval = 3; } else { minSkipInterval = 2; } // we pull this before the seed intentionally: because its not consumed at runtime // (the skipInterval is written into postings header) int skipInterval = TestUtil.NextInt32(seedRandom, minSkipInterval, 10); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: skipInterval=" + skipInterval); } long seed = seedRandom.NextInt64(); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing to seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed); } string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT); IndexOutput @out = state.Directory.CreateOutput(seedFileName, state.Context); try { @out.WriteInt64(seed); } finally { @out.Dispose(); } Random random = new J2N.Randomizer(seed); random.Next(); // consume a random for buffersize PostingsWriterBase postingsWriter; if (random.nextBoolean()) { postingsWriter = new SepPostingsWriter(state, new MockInt32StreamFactory(random), skipInterval); } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing Standard postings"); } // TODO: randomize variables like acceptibleOverHead?! postingsWriter = new Lucene41PostingsWriter(state, skipInterval); } if (random.NextBoolean()) { int totTFCutoff = TestUtil.NextInt32(random, 1, 20); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff); } postingsWriter = new PulsingPostingsWriter(state, totTFCutoff, postingsWriter); } FieldsConsumer fields; int t1 = random.Next(4); if (t1 == 0) { bool success = false; try { fields = new FSTTermsWriter(state, postingsWriter); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else if (t1 == 1) { bool success = false; try { fields = new FSTOrdTermsWriter(state, postingsWriter); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing BlockTree terms dict"); } // TODO: would be nice to allow 1 but this is very // slow to write int minTermsInBlock = TestUtil.NextInt32(random, 2, 100); int maxTermsInBlock = Math.Max(2, (minTermsInBlock - 1) * 2 + random.Next(100)); bool success = false; try { fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing Block terms dict"); } bool success = false; TermsIndexWriterBase indexWriter; try { if (random.NextBoolean()) { state.TermIndexInterval = TestUtil.NextInt32(random, 1, 100); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: fixed-gap terms index (tii=" + state.TermIndexInterval + ")"); } indexWriter = new FixedGapTermsIndexWriter(state); } else { VariableGapTermsIndexWriter.IndexTermSelector selector; int n2 = random.Next(3); if (n2 == 0) { int tii = TestUtil.NextInt32(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: variable-gap terms index (tii=" + tii + ")"); } } else if (n2 == 1) { int docFreqThresh = TestUtil.NextInt32(random, 2, 100); int tii = TestUtil.NextInt32(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii); } else { long seed2 = random.NextInt64(); int gap = TestUtil.NextInt32(random, 2, 40); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: random-gap terms index (max gap=" + gap + ")"); } selector = new IndexTermSelectorAnonymousClass(seed2, gap); } indexWriter = new VariableGapTermsIndexWriter(state, selector); } success = true; } finally { if (!success) { postingsWriter.Dispose(); } } success = false; try { fields = new BlockTermsWriter(indexWriter, state, postingsWriter); success = true; } finally { if (!success) { try { postingsWriter.Dispose(); } finally { indexWriter.Dispose(); } } } } return(fields); }
private void WriteTrailer(IndexOutput output, long dirStart) { output.WriteInt64(dirStart); }
public override void WriteField(FieldInfo info, IIndexableField field) { fieldsStream.WriteVInt32(info.Number); int bits = 0; BytesRef bytes; string @string; // TODO: maybe a field should serialize itself? // this way we don't bake into indexer all these // specific encodings for different fields? and apps // can customize... // LUCENENET specific - To avoid boxing/unboxing, we don't // call GetNumericValue(). Instead, we check the field.NumericType and then // call the appropriate conversion method. if (field.NumericType != NumericFieldType.NONE) { switch (field.NumericType) { case NumericFieldType.BYTE: case NumericFieldType.INT16: case NumericFieldType.INT32: bits |= FIELD_IS_NUMERIC_INT; break; case NumericFieldType.INT64: bits |= FIELD_IS_NUMERIC_LONG; break; case NumericFieldType.SINGLE: bits |= FIELD_IS_NUMERIC_FLOAT; break; case NumericFieldType.DOUBLE: bits |= FIELD_IS_NUMERIC_DOUBLE; break; default: throw new ArgumentException("cannot store numeric type " + field.NumericType); } @string = null; bytes = null; } else { bytes = field.GetBinaryValue(); if (bytes != null) { bits |= FIELD_IS_BINARY; @string = null; } else { @string = field.GetStringValue(); if (@string == null) { throw new ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue"); } } } fieldsStream.WriteByte((byte)(sbyte)bits); if (bytes != null) { fieldsStream.WriteVInt32(bytes.Length); fieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); } else if (@string != null) { fieldsStream.WriteString(field.GetStringValue()); } else { switch (field.NumericType) { case NumericFieldType.BYTE: case NumericFieldType.INT16: case NumericFieldType.INT32: fieldsStream.WriteInt32(field.GetInt32Value().Value); break; case NumericFieldType.INT64: fieldsStream.WriteInt64(field.GetInt64Value().Value); break; case NumericFieldType.SINGLE: fieldsStream.WriteInt32(BitConversion.SingleToInt32Bits(field.GetSingleValue().Value)); break; case NumericFieldType.DOUBLE: fieldsStream.WriteInt64(BitConversion.DoubleToInt64Bits(field.GetDoubleValue().Value)); break; default: throw new InvalidOperationException("Cannot get here"); } } }
// Writes the contents of buffer into the fields stream // and adds a new entry for this document into the index // stream. this assumes the buffer was already written // in the correct fields format. public override void StartDocument(int numStoredFields) { indexStream.WriteInt64(fieldsStream.GetFilePointer()); fieldsStream.WriteVInt32(numStoredFields); }
private void WriteTrailer(long dirStart) { m_output.WriteInt64(dirStart); }
public override void WriteField(FieldInfo info, IIndexableField field) { fieldsStream.WriteVInt32(info.Number); int bits = 0; BytesRef bytes; string @string; // TODO: maybe a field should serialize itself? // this way we don't bake into indexer all these // specific encodings for different fields? and apps // can customize... object number = (object)field.GetNumericValue(); if (number != null) { if (number is sbyte || number is short || number is int) { bits |= FIELD_IS_NUMERIC_INT; } else if (number is long) { bits |= FIELD_IS_NUMERIC_LONG; } else if (number is float) { bits |= FIELD_IS_NUMERIC_FLOAT; } else if (number is double) { bits |= FIELD_IS_NUMERIC_DOUBLE; } else { throw new System.ArgumentException("cannot store numeric type " + number.GetType()); } @string = null; bytes = null; } else { bytes = field.GetBinaryValue(); if (bytes != null) { bits |= FIELD_IS_BINARY; @string = null; } else { @string = field.GetStringValue(); if (@string == null) { throw new System.ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue"); } } } fieldsStream.WriteByte((byte)(sbyte)bits); if (bytes != null) { fieldsStream.WriteVInt32(bytes.Length); fieldsStream.WriteBytes(bytes.Bytes, bytes.Offset, bytes.Length); } else if (@string != null) { fieldsStream.WriteString(field.GetStringValue()); } else { if (number is sbyte || number is short || number is int) { fieldsStream.WriteInt32((int)number); } else if (number is long) { fieldsStream.WriteInt64((long)number); } else if (number is float) { fieldsStream.WriteInt32(Number.SingleToInt32Bits((float)number)); } else if (number is double) { fieldsStream.WriteInt64(BitConverter.DoubleToInt64Bits((double)number)); } else { throw new InvalidOperationException("Cannot get here"); } } }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { CodecUtil.WriteHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT); output.WriteVInt32(infos.Count); foreach (FieldInfo fi in infos) { IndexOptions indexOptions = fi.IndexOptions; sbyte bits = 0x0; if (fi.HasVectors) { bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR; } if (fi.OmitsNorms) { bits |= Lucene46FieldInfosFormat.OMIT_NORMS; } if (fi.HasPayloads) { bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS; } if (fi.IsIndexed) { bits |= Lucene46FieldInfosFormat.IS_INDEXED; // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare() if (Debugging.AssertsEnabled) { Debugging.Assert(IndexOptionsComparer.Default.Compare(indexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads); } if (indexOptions == IndexOptions.DOCS_ONLY) { bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS; } } output.WriteString(fi.Name); output.WriteVInt32(fi.Number); output.WriteByte((byte)bits); // pack the DV types in one byte var dv = DocValuesByte(fi.DocValuesType); var nrm = DocValuesByte(fi.NormType); if (Debugging.AssertsEnabled) { Debugging.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0); } var val = (byte)(0xff & ((nrm << 4) | (byte)dv)); output.WriteByte(val); output.WriteInt64(fi.DocValuesGen); output.WriteStringStringMap(fi.Attributes); } CodecUtil.WriteFooter(output); success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.DisposeWhileHandlingException(output); } } }
public override void StartDocument(int numStoredFields) { indexStream.WriteInt64(fieldsStream.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream fieldsStream.WriteVInt32(numStoredFields); }
/// <summary> /// Writes a codec footer, which records both a checksum /// algorithm ID and a checksum. This footer can /// be parsed and validated with /// <see cref="CheckFooter(ChecksumIndexInput)"/>. /// <para/> /// CodecFooter --> Magic,AlgorithmID,Checksum /// <list type="bullet"> /// <item><description>Magic --> Uint32 (<see cref="DataOutput.WriteInt32(int)"/>). this /// identifies the start of the footer. It is always <see cref="FOOTER_MAGIC"/>.</description></item> /// <item><description>AlgorithmID --> Uint32 (<see cref="DataOutput.WriteInt32(int)"/>). this /// indicates the checksum algorithm used. Currently this is always 0, /// for zlib-crc32.</description></item> /// <item><description>Checksum --> Uint32 (<see cref="DataOutput.WriteInt64(long)"/>). The /// actual checksum value for all previous bytes in the stream, including /// the bytes from Magic and AlgorithmID.</description></item> /// </list> /// </summary> /// <param name="out"> Output stream </param> /// <exception cref="IOException"> If there is an I/O error writing to the underlying medium. </exception> public static void WriteFooter(IndexOutput @out) { @out.WriteInt32(FOOTER_MAGIC); @out.WriteInt32(0); @out.WriteInt64(@out.Checksum); }
public override void AddNumericField(FieldInfo field, IEnumerable <long?> values) { meta.WriteVInt32(field.Number); meta.WriteByte((byte)NUMBER); meta.WriteInt64(data.GetFilePointer()); long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; // TODO: more efficient? HashSet <long> uniqueValues = null; if (true) { uniqueValues = new HashSet <long>(); long count = 0; foreach (long?nv in values) { Debug.Assert(nv != null); long v = nv.Value; if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } Debug.Assert(count == maxDoc); } if (uniqueValues != null) { // small number of unique values int bitsPerValue = PackedInt32s.BitsRequired(uniqueValues.Count - 1); FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio); if (formatAndBits.BitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue) { meta.WriteByte((byte)UNCOMPRESSED); // uncompressed foreach (long?nv in values) { data.WriteByte((byte)nv.GetValueOrDefault()); } } else { meta.WriteByte((byte)TABLE_COMPRESSED); // table-compressed var decode = uniqueValues.ToArray(); var encode = new Dictionary <long, int>(); data.WriteVInt32(decode.Length); for (int i = 0; i < decode.Length; i++) { data.WriteInt64(decode[i]); encode[decode[i]] = i; } meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteVInt32(formatAndBits.Format.Id); data.WriteVInt32(formatAndBits.BitsPerValue); PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(data, formatAndBits.Format, maxDoc, formatAndBits.BitsPerValue, PackedInt32s.DEFAULT_BUFFER_SIZE); foreach (long?nv in values) { writer.Add(encode[nv.GetValueOrDefault()]); } writer.Finish(); } } else if (gcd != 0 && gcd != 1) { meta.WriteByte((byte)GCD_COMPRESSED); meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteInt64(minValue); data.WriteInt64(gcd); data.WriteVInt32(BLOCK_SIZE); var writer = new BlockPackedWriter(data, BLOCK_SIZE); foreach (long?nv in values) { writer.Add((nv.GetValueOrDefault() - minValue) / gcd); } writer.Finish(); } else { meta.WriteByte((byte)DELTA_COMPRESSED); // delta-compressed meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteVInt32(BLOCK_SIZE); var writer = new BlockPackedWriter(data, BLOCK_SIZE); foreach (long?nv in values) { writer.Add(nv.GetValueOrDefault()); } writer.Finish(); } }
private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.ToString()); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); /* values */ long startPos = data.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream int valueCount = 0; foreach (BytesRef v in values) { data.WriteBytes(v.Bytes, v.Offset, v.Length); valueCount++; } /* addresses */ long maxAddress = data.Position - startPos; // LUCENENET specific: Renamed from getFilePointer() to match FileStream index.WriteInt64(maxAddress); if (Debugging.AssertsEnabled) { Debugging.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl } PackedInt32s.Writer w = PackedInt32s.GetWriter(index, valueCount + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); currentPosition += v.Length; } // write sentinel if (Debugging.AssertsEnabled) { Debugging.Assert(currentPosition == maxAddress); } w.Add(currentPosition); w.Finish(); /* ordinals */ int maxDoc = state.SegmentInfo.DocCount; if (Debugging.AssertsEnabled) { Debugging.Assert(valueCount > 0); } PackedInt32s.Writer ords = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT); foreach (long n in docToOrd) { ords.Add((long)n); } ords.Finish(); }