public override void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads) { Write(FIELD); Write(Convert.ToString(info.Number)); NewLine(); Write(FIELDNAME); Write(info.Name); NewLine(); Write(FIELDPOSITIONS); Write(Convert.ToString(positions)); NewLine(); Write(FIELDOFFSETS); Write(Convert.ToString(offsets)); NewLine(); Write(FIELDPAYLOADS); Write(Convert.ToString(payloads)); NewLine(); Write(FIELDTERMCOUNT); Write(Convert.ToString(numTerms)); NewLine(); _positions = positions; _offsets = offsets; _payloads = payloads; }
public override int SetField(FieldInfo fieldInfo) { FieldInfo.IndexOptions?indexOptions = fieldInfo.FieldIndexOptions; FieldHasFreqs = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS; FieldHasPositions = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; FieldHasOffsets = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; FieldHasPayloads = fieldInfo.HasPayloads(); SkipWriter.SetField(FieldHasPositions, FieldHasOffsets, FieldHasPayloads); LastState = EmptyState; if (FieldHasPositions) { if (FieldHasPayloads || FieldHasOffsets) { return(3); // doc + pos + pay FP } else { return(2); // doc + pos FP } } else { return(1); // doc FP } }
public override void EncodeTerm(long[] longs, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute) { IntBlockTermState state = (IntBlockTermState)_state; if (absolute) { LastState = EmptyState; } longs[0] = state.DocStartFP - LastState.DocStartFP; if (FieldHasPositions) { longs[1] = state.PosStartFP - LastState.PosStartFP; if (FieldHasPayloads || FieldHasOffsets) { longs[2] = state.PayStartFP - LastState.PayStartFP; } } if (state.SingletonDocID != -1) { @out.WriteVInt(state.SingletonDocID); } if (FieldHasPositions) { if (state.LastPosBlockOffset != -1) { @out.WriteVLong(state.LastPosBlockOffset); } } if (state.SkipOffset != -1) { @out.WriteVLong(state.SkipOffset); } LastState = state; }
public override void AddNumericField(FieldInfo field, IEnumerable<long> values) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.NUMERIC || field.NormType == FieldInfo.DocValuesType_e.NUMERIC); WriteFieldEntry(field, FieldInfo.DocValuesType_e.NUMERIC); // first pass to find min/max var minValue = long.MaxValue; var maxValue = long.MinValue; foreach (var n in values) { var v = n; minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } // write our minimum value to the .dat, all entries are deltas from that SimpleTextUtil.Write(data, MINVALUE); SimpleTextUtil.Write(data, Convert.ToString(minValue), scratch); SimpleTextUtil.WriteNewline(data); // build up our fixed-width "simple text packed ints" format System.Numerics.BigInteger maxBig = maxValue; System.Numerics.BigInteger minBig = minValue; var diffBig = maxBig - minBig; var maxBytesPerValue = diffBig.ToString().Length; var sb = new StringBuilder(); for (var i = 0; i < maxBytesPerValue; i++) sb.Append('0'); // write our pattern to the .dat SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); var patternString = sb.ToString(); int numDocsWritten = 0; // second pass to write the values foreach (var value in values) { Debug.Assert(value >= minValue); var delta = value - minValue; string s = delta.ToString(patternString); Debug.Assert(s.Length == patternString.Length); SimpleTextUtil.Write(data, s, scratch); SimpleTextUtil.WriteNewline(data); SimpleTextUtil.Write(data, n == null ? "F" : "T", scratch); SimpleTextUtil.WriteNewline(data); numDocsWritten++; Debug.Assert(numDocsWritten <= numDocs); } Debug.Assert(numDocs == numDocsWritten, "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten); }
private Bits GetBinaryDocsWithField(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; var input = (IndexInput)DATA.Clone(); var scratch = new BytesRef(); return(new BitsAnonymousInnerClassHelper2(this, field, input, scratch)); }
public SimpleTextTerms(SimpleTextFieldsReader outerInstance, string field, long termsStart, int maxDoc) { _outerInstance = outerInstance; _maxDoc = maxDoc; _termsStart = termsStart; _fieldInfo = outerInstance._fieldInfos.FieldInfo(field); LoadTerms(); }
public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { // write the ordinals as numerics AddNumericField(field, docToOrd, false); // write the values as FST WriteFST(field, values); }
// note: this might not be the most efficient... but its fairly simple public override void AddSortedSetField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrdCount, IEnumerable <long?> ords) { // write the ordinals as a binary field AddBinaryField(field, new IterableAnonymousInnerClassHelper(this, docToOrdCount, ords)); // write the values as FST WriteFST(field, values); }
internal TermsWriter(FSTTermsWriter outerInstance, FieldInfo fieldInfo) { _outerInstance = outerInstance; _numTerms = 0; _fieldInfo = fieldInfo; _longsSize = outerInstance._postingsWriter.SetField(fieldInfo); _outputs = new FSTTermOutputs(fieldInfo, _longsSize); _builder = new Builder <FSTTermOutputs.TermData>(FST.INPUT_TYPE.BYTE1, _outputs); }
public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.BINARY); var maxLength = 0; foreach (var value in values) { var length = value == null ? 0 : value.Length; maxLength = Math.Max(maxLength, length); } WriteFieldEntry(field, FieldInfo.DocValuesType_e.BINARY); // write maxLength SimpleTextUtil.Write(data, MAXLENGTH); SimpleTextUtil.Write(data, Convert.ToString(maxLength), scratch); SimpleTextUtil.WriteNewline(data); var maxBytesLength = Convert.ToString(maxLength).Length; var sb = new StringBuilder(); for (var i = 0; i < maxBytesLength; i++) { sb.Append('0'); } // write our pattern for encoding lengths SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); int numDocsWritten = 0; foreach (BytesRef value in values) { int length = value == null ? 0 : value.Length; SimpleTextUtil.Write(data, LENGTH); SimpleTextUtil.Write(data, length.ToString(sb.ToString()), scratch); SimpleTextUtil.WriteNewline(data); // write bytes -- don't use SimpleText.Write // because it escapes: if (value != null) { data.WriteBytes(value.Bytes, value.Offset, value.Length); } // pad to fit for (int i = length; i < maxLength; i++) { data.WriteByte((byte)(sbyte) ' '); } SimpleTextUtil.WriteNewline(data); SimpleTextUtil.Write(data, value == null ? "F" : "T", scratch); SimpleTextUtil.WriteNewline(data); numDocsWritten++; } Debug.Assert(numDocs == numDocsWritten); }
/// <summary>Write the header for this field </summary> private void WriteFieldEntry(FieldInfo field, FieldInfo.DocValuesType_e type) { SimpleTextUtil.Write(data, FIELD); SimpleTextUtil.Write(data, field.Name, scratch); SimpleTextUtil.WriteNewline(data); SimpleTextUtil.Write(data, TYPE); SimpleTextUtil.Write(data, type.ToString(), scratch); SimpleTextUtil.WriteNewline(data); }
public override BinaryDocValues GetBinary(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; Debug.Assert(field != null); var input = (IndexInput)DATA.Clone(); var scratch = new BytesRef(); return(new BinaryDocValuesAnonymousInnerClassHelper(this, field, input, scratch)); }
private void AddBinaryFieldValues(FieldInfo field, IEnumerable <BytesRef> values) { // write the byte[] data long startFP = data.FilePointer; bool missing = false; long totalBytes = 0; int count = 0; foreach (BytesRef v in values) { if (v != null) { data.WriteBytes(v.Bytes, v.Offset, v.Length); totalBytes += v.Length; if (totalBytes > DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH) { throw new ArgumentException("DocValuesField \"" + field.Name + "\" is too large, cannot have more than DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH (" + DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH + ") bytes"); } } else { missing = true; } count++; } meta.WriteLong(startFP); meta.WriteInt((int)totalBytes); meta.WriteInt(count); if (missing) { long start = data.FilePointer; WriteMissingBitset(values); meta.WriteLong(start); meta.WriteLong(data.FilePointer - start); } else { meta.WriteLong(-1L); } int addr = 0; foreach (BytesRef v in values) { data.WriteInt(addr); if (v != null) { addr += v.Length; } } data.WriteInt(addr); }
public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { meta.WriteVInt(field.Number); meta.WriteByte((byte)DirectDocValuesProducer.SORTED); // write the ordinals as numerics AddNumericFieldValues(field, docToOrd); // write the values as binary AddBinaryFieldValues(field, values); }
internal TermsReader(FSTTermsReader outerInstance, FieldInfo fieldInfo, IndexInput @in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) { this.outerInstance = outerInstance; this.fieldInfo = fieldInfo; this.numTerms = numTerms; this.sumTotalTermFreq = sumTotalTermFreq; this.sumDocFreq = sumDocFreq; this.docCount = docCount; this.longsSize = longsSize; this.dict = new FST <FSTTermOutputs.TermData>(@in, new FSTTermOutputs(fieldInfo, longsSize)); }
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST <FSTTermOutputs.TermData> fst) { FieldInfo = fieldInfo; NumTerms = numTerms; SumTotalTermFreq = sumTotalTermFreq; SumDocFreq = sumDocFreq; DocCount = docCount; LongsSize = longsSize; Dict = fst; }
public override SortedDocValues GetSorted(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; // SegmentCoreReaders already verifies this field is valid: Debug.Assert(field != null); var input = (IndexInput)DATA.Clone(); var scratch = new BytesRef(); return(new SortedDocValuesAnonymousInnerClassHelper(this, field, input, scratch)); }
public override Status NeedsField(Index.FieldInfo fieldInfo) { currentField = Array.BinarySearch(fields, fieldInfo.Name); if (currentField < 0) { return(Status.NO); } else if (builders[currentField].Length > maxLength) { return(fields.Length == 1 ? Status.STOP : Status.NO); } return(Status.YES); }
public override SortedSetDocValues GetSortedSet(FieldInfo fieldInfo) { OneField field = FIELDS[fieldInfo.Name]; // SegmentCoreReaders already verifies this field is // valid: Debug.Assert(field != null); IndexInput @in = (IndexInput)DATA.Clone(); BytesRef scratch = new BytesRef(); DecimalFormat decoder = new DecimalFormat(field.Pattern, new DecimalFormatSymbols(Locale.ROOT)); return(new SortedSetDocValuesAnonymousInnerClassHelper(this, field, @in, scratch, decoder)); }
public override NumericDocValues GetNumeric(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; Debug.Assert(field != null); // SegmentCoreReaders already verifies this field is valid: Debug.Assert(field != null, "field=" + fieldInfo.Name + " fields=" + FIELDS); var @in = (IndexInput)DATA.Clone(); var scratch = new BytesRef(); return(new NumericDocValuesAnonymousInnerClassHelper(this, field, @in, scratch)); }
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) { string termsFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTTermsWriter.TERMS_EXTENSION); this.postingsReader = postingsReader; IndexInput @in = state.Directory.OpenInput(termsFileName, state.Context); bool success = false; try { version = ReadHeader(@in); if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(@in); } this.postingsReader.Init(@in); SeekDir(@in); FieldInfos fieldInfos = state.FieldInfos; int numFields = @in.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = @in.ReadVInt(); FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); long numTerms = @in.ReadVLong(); long sumTotalTermFreq = fieldInfo.FieldIndexOptions == IndexOptions.DOCS_ONLY ? -1 : @in.ReadVLong(); long sumDocFreq = @in.ReadVLong(); int docCount = @in.ReadVInt(); int longsSize = @in.ReadVInt(); TermsReader current = new TermsReader(this, fieldInfo, @in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); TermsReader previous = fields[fieldInfo.Name] = current; CheckFieldSummary(state.SegmentInfo, @in, current, previous); } success = true; } finally { if (success) { IOUtils.Close(@in); } else { IOUtils.CloseWhileHandlingException(@in); } } }
// note: this might not be the most efficient... but its fairly simple public override void AddSortedSetField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrdCount, IEnumerable <long?> ords) { meta.WriteVInt(field.Number); meta.WriteByte((byte)DirectDocValuesProducer.SORTED_SET); // First write docToOrdCounts, except we "aggregate" the // counts so they turn into addresses, and add a final // value = the total aggregate: AddNumericFieldValues(field, new IterableAnonymousInnerClassHelper(this, docToOrdCount)); // Write ordinals for all docs, appended into one big // numerics: AddNumericFieldValues(field, ords); // write the values as binary AddBinaryFieldValues(field, values); }
public override void StringField(Index.FieldInfo fieldInfo, string value) { Debug.Assert(currentField >= 0); StringBuilder builder = builders[currentField]; if (builder.Length > 0 && builder.Length < maxLength) { builder.Append(valueSeparators[currentField]); } if (builder.Length + value.Length > maxLength) { builder.Append(value, 0, maxLength - builder.Length); } else { builder.Append(value); } }
private void ReadField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE)); if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_STRING)) { visitor.StringField(fieldInfo, _scratch.Bytes.SubList(_scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length).ToString()); // new string(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, // Encoding.UTF8)); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_BINARY)) { var copy = new byte[_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length]; Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, copy, 0, copy.Length); visitor.BinaryField(fieldInfo, copy); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_INT)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.IntField(fieldInfo, Convert.ToInt32(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_LONG)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.LongField(fieldInfo, Convert.ToInt64(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_FLOAT)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.FloatField(fieldInfo, Convert.ToSingle(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_DOUBLE)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.DoubleField(fieldInfo, Convert.ToDouble(_scratchUtf16.ToString())); } }
public override Terms GetTerms(string field) { Index.FieldInfo fieldInfo = outerInstance.fieldInfos.FieldInfo(field); if (fieldInfo == null) { return(null); } int idx = -1; for (int i = 0; i < fieldNumOffs.Length; ++i) { if (fieldNums[fieldNumOffs[i]] == fieldInfo.Number) { idx = i; break; } } if (idx == -1 || numTerms[idx] == 0) { // no term return(null); } int fieldOff = 0, fieldLen = -1; for (int i = 0; i < fieldNumOffs.Length; ++i) { if (i < idx) { fieldOff += fieldLengths[i]; } else { fieldLen = fieldLengths[i]; break; } } if (Debugging.AssertsEnabled) { Debugging.Assert(fieldLen >= 0); } return(new TVTerms(numTerms[idx], fieldFlags[idx], prefixLengths[idx], suffixLengths[idx], termFreqs[idx], positionIndex[idx], positions[idx], startOffsets[idx], lengths[idx], payloadIndex[idx], payloadBytes, new BytesRef(suffixBytes.Bytes, suffixBytes.Offset + fieldOff, fieldLen))); }
public override Bits GetDocsWithField(FieldInfo field) { switch (field.DocValuesType) { case FieldInfo.DocValuesType_e.SORTED_SET: return(DocValues.DocsWithValue(GetSortedSet(field), MAX_DOC)); case FieldInfo.DocValuesType_e.SORTED: return(DocValues.DocsWithValue(GetSorted(field), MAX_DOC)); case FieldInfo.DocValuesType_e.BINARY: return(GetBinaryDocsWithField(field)); case FieldInfo.DocValuesType_e.NUMERIC: return(GetNumericDocsWithField(field)); default: throw new InvalidEnumArgumentException(); } }
private void WriteFST(FieldInfo field, IEnumerable <BytesRef> values) { meta.WriteVInt(field.Number); meta.WriteByte(MemoryDocValuesProducer.FST); meta.WriteLong(data.FilePointer); PositiveIntOutputs outputs = PositiveIntOutputs.Singleton; var builder = new Builder <long?>(INPUT_TYPE.BYTE1, outputs); var scratch = new IntsRef(); long ord = 0; foreach (BytesRef v in values) { builder.Add(Util.ToIntsRef(v, scratch), ord); ord++; } FST <long?> fst = builder.Finish(); if (fst != null) { fst.Save(data); } meta.WriteVLong(ord); }
public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.SORTED); WriteFieldEntry(field, FieldInfo.DocValuesType_e.SORTED); int valueCount = 0; int maxLength = -1; foreach (BytesRef value in values) { maxLength = Math.Max(maxLength, value.Length); valueCount++; } // write numValues SimpleTextUtil.Write(data, NUMVALUES); SimpleTextUtil.Write(data, Convert.ToString(valueCount), scratch); SimpleTextUtil.WriteNewline(data); // write maxLength SimpleTextUtil.Write(data, MAXLENGTH); SimpleTextUtil.Write(data, Convert.ToString(maxLength), scratch); SimpleTextUtil.WriteNewline(data); int maxBytesLength = Convert.ToString(maxLength).Length; var sb = new StringBuilder(); for (int i = 0; i < maxBytesLength; i++) { sb.Append('0'); } // write our pattern for encoding lengths SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); var encoderFormat = sb.ToString(); int maxOrdBytes = Convert.ToString(valueCount + 1L).Length; sb.Length = 0; for (int i = 0; i < maxOrdBytes; i++) { sb.Append('0'); } // write our pattern for ords SimpleTextUtil.Write(data, ORDPATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); var ordEncoderFormat = sb.ToString(); // for asserts: int valuesSeen = 0; foreach (BytesRef value in values) { // write length SimpleTextUtil.Write(data, LENGTH); SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat), scratch); SimpleTextUtil.WriteNewline(data); // write bytes -- don't use SimpleText.Write // because it escapes: data.WriteBytes(value.Bytes, value.Offset, value.Length); // pad to fit for (int i = value.Length; i < maxLength; i++) { data.WriteByte((byte)(sbyte)' '); } SimpleTextUtil.WriteNewline(data); valuesSeen++; Debug.Assert(valuesSeen <= valueCount); } Debug.Assert(valuesSeen == valueCount); foreach (var ord in docToOrd) { SimpleTextUtil.Write(data, (ord + 1).Value.ToString(ordEncoderFormat), scratch); SimpleTextUtil.WriteNewline(data); } }
internal TermsWriter(FSTTermsWriter outerInstance, FieldInfo fieldInfo) { _outerInstance = outerInstance; _numTerms = 0; _fieldInfo = fieldInfo; _longsSize = outerInstance._postingsWriter.SetField(fieldInfo); _outputs = new FSTTermOutputs(fieldInfo, _longsSize); _builder = new Builder<FSTTermOutputs.TermData>(FST.INPUT_TYPE.BYTE1, _outputs); }
public override void AddBinaryField(FieldInfo field, IEnumerable <BytesRef> values) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.BINARY); var maxLength = 0; foreach (var value in values) { var length = value == null ? 0 : value.Length; maxLength = Math.Max(maxLength, length); } WriteFieldEntry(field, FieldInfo.DocValuesType_e.BINARY); // write maxLength SimpleTextUtil.Write(data, MAXLENGTH); SimpleTextUtil.Write(data, Convert.ToString(maxLength), scratch); SimpleTextUtil.WriteNewline(data); var maxBytesLength = Convert.ToString(maxLength).Length; var sb = new StringBuilder(); for (var i = 0; i < maxBytesLength; i++) { sb.Append('0'); } // write our pattern for encoding lengths SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); int numDocsWritten = 0; foreach (BytesRef value in values) { int length = value == null ? 0 : value.Length; SimpleTextUtil.Write(data, LENGTH); SimpleTextUtil.Write(data, length.ToString(sb.ToString()), scratch); SimpleTextUtil.WriteNewline(data); // write bytes -- don't use SimpleText.Write // because it escapes: if (value != null) { data.WriteBytes(value.Bytes, value.Offset, value.Length); } // pad to fit for (int i = length; i < maxLength; i++) { data.WriteByte((byte)(sbyte)' '); } SimpleTextUtil.WriteNewline(data); SimpleTextUtil.Write(data, value == null ? "F" : "T", scratch); SimpleTextUtil.WriteNewline(data); numDocsWritten++; } Debug.Assert(numDocs == numDocsWritten); }
public override TermsConsumer AddField(FieldInfo field) { return new TermsWriter(this, field); }
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<FSTTermOutputs.TermData> fst) { FieldInfo = fieldInfo; NumTerms = numTerms; SumTotalTermFreq = sumTotalTermFreq; SumDocFreq = sumDocFreq; DocCount = docCount; LongsSize = longsSize; Dict = fst; }
public override void AddNumericField(FieldInfo field, IEnumerable <long?> values) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.NUMERIC || field.NormType == FieldInfo.DocValuesType_e.NUMERIC); WriteFieldEntry(field, FieldInfo.DocValuesType_e.NUMERIC); // first pass to find min/max var minValue = long.MaxValue; var maxValue = long.MinValue; foreach (var n in values) { var v = n; minValue = Math.Min(minValue, v.Value); // Added .Value to account for long? maxValue = Math.Max(maxValue, v.Value); // Added .Value to account for long? } // write our minimum value to the .dat, all entries are deltas from that SimpleTextUtil.Write(data, MINVALUE); SimpleTextUtil.Write(data, Convert.ToString(minValue), scratch); SimpleTextUtil.WriteNewline(data); // build up our fixed-width "simple text packed ints" format System.Numerics.BigInteger maxBig = maxValue; System.Numerics.BigInteger minBig = minValue; var diffBig = maxBig - minBig; var maxBytesPerValue = diffBig.ToString().Length; var sb = new StringBuilder(); for (var i = 0; i < maxBytesPerValue; i++) { sb.Append('0'); } // write our pattern to the .dat SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); var patternString = sb.ToString(); int numDocsWritten = 0; // second pass to write the values foreach (var n in values) { long value = n == null ? 0 : n.Value; Debug.Assert(value >= minValue); var delta = value - minValue; string s = delta.ToString(patternString); Debug.Assert(s.Length == patternString.Length); SimpleTextUtil.Write(data, s, scratch); SimpleTextUtil.WriteNewline(data); SimpleTextUtil.Write(data, n == null ? "F" : "T", scratch); SimpleTextUtil.WriteNewline(data); numDocsWritten++; Debug.Assert(numDocsWritten <= numDocs); } Debug.Assert(numDocs == numDocsWritten, "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten); }
public override Bits GetDocsWithField(FieldInfo field) { switch (field.DocValuesType) { case FieldInfo.DocValuesType_e.SORTED_SET: return DocValues.DocsWithValue(GetSortedSet(field), MAX_DOC); case FieldInfo.DocValuesType_e.SORTED: return DocValues.DocsWithValue(GetSorted(field), MAX_DOC); case FieldInfo.DocValuesType_e.BINARY: return GetBinaryDocsWithField(field); case FieldInfo.DocValuesType_e.NUMERIC: return GetNumericDocsWithField(field); default: throw new InvalidEnumArgumentException(); } }
private void ReadField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE)); if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_STRING)) { visitor.StringField(fieldInfo, _scratch.Bytes.SubList(_scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length).ToString()); // new string(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, // Encoding.UTF8)); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_BINARY)) { var copy = new sbyte[_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length]; Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, copy, 0, copy.Length); visitor.BinaryField(fieldInfo, copy); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_INT)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.IntField(fieldInfo, Convert.ToInt32(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_LONG)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.LongField(fieldInfo, Convert.ToInt64(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_FLOAT)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.FloatField(fieldInfo, Convert.ToSingle(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_DOUBLE)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.DoubleField(fieldInfo, Convert.ToDouble(_scratchUtf16.ToString())); } }
/// <summary>Write the header for this field </summary> private void WriteFieldEntry(FieldInfo field, FieldInfo.DocValuesType_e type) { SimpleTextUtil.Write(data, FIELD); SimpleTextUtil.Write(data, field.Name, scratch); SimpleTextUtil.WriteNewline(data); SimpleTextUtil.Write(data, TYPE); SimpleTextUtil.Write(data, type.ToString(), scratch); SimpleTextUtil.WriteNewline(data); }
public override void EncodeTerm(long[] longs, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute) { IntBlockTermState state = (IntBlockTermState)_state; if (absolute) { LastState = EmptyState; } longs[0] = state.DocStartFP - LastState.DocStartFP; if (FieldHasPositions) { longs[1] = state.PosStartFP - LastState.PosStartFP; if (FieldHasPayloads || FieldHasOffsets) { longs[2] = state.PayStartFP - LastState.PayStartFP; } } if (state.SingletonDocID != -1) { @out.WriteVInt(state.SingletonDocID); } if (FieldHasPositions) { if (state.LastPosBlockOffset != -1) { @out.WriteVLong(state.LastPosBlockOffset); } } if (state.SkipOffset != -1) { @out.WriteVLong(state.SkipOffset); } LastState = state; }
public override void AddSortedField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrd) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.SORTED); WriteFieldEntry(field, FieldInfo.DocValuesType_e.SORTED); int valueCount = 0; int maxLength = -1; foreach (BytesRef value in values) { maxLength = Math.Max(maxLength, value.Length); valueCount++; } // write numValues SimpleTextUtil.Write(data, NUMVALUES); SimpleTextUtil.Write(data, valueCount.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); // write maxLength SimpleTextUtil.Write(data, MAXLENGTH); SimpleTextUtil.Write(data, maxLength.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); int maxBytesLength = maxLength.ToString(CultureInfo.InvariantCulture).Length; var sb = new StringBuilder(); for (int i = 0; i < maxBytesLength; i++) { sb.Append('0'); } // write our pattern for encoding lengths SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); var encoderFormat = sb.ToString(); int maxOrdBytes = (valueCount + 1L).ToString(CultureInfo.InvariantCulture).Length; sb.Length = 0; for (int i = 0; i < maxOrdBytes; i++) { sb.Append('0'); } // write our pattern for ords SimpleTextUtil.Write(data, ORDPATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); var ordEncoderFormat = sb.ToString(); // for asserts: int valuesSeen = 0; foreach (BytesRef value in values) { // write length SimpleTextUtil.Write(data, LENGTH); SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); // write bytes -- don't use SimpleText.Write // because it escapes: data.WriteBytes(value.Bytes, value.Offset, value.Length); // pad to fit for (int i = value.Length; i < maxLength; i++) { data.WriteByte((byte)' '); } SimpleTextUtil.WriteNewline(data); valuesSeen++; Debug.Assert(valuesSeen <= valueCount); } Debug.Assert(valuesSeen == valueCount); foreach (var ord in docToOrd) { SimpleTextUtil.Write(data, (ord + 1).GetValueOrDefault().ToString(ordEncoderFormat, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); } }
private Bits GetNumericDocsWithField(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; var input = (IndexInput)DATA.Clone(); var scratch = new BytesRef(); return new BitsAnonymousInnerClassHelper(this, field, input, scratch); }
public override SortedSetDocValues GetSortedSet(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; // SegmentCoreReaders already verifies this field is // valid: Debug.Assert(field != null); var input = (IndexInput) DATA.Clone(); var scratch = new BytesRef(); return new SortedSetDocValuesAnonymousInnerClassHelper(this, field, input, scratch); }
public override NumericDocValues GetNumeric(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; Debug.Assert(field != null); // SegmentCoreReaders already verifies this field is valid: Debug.Assert(field != null, "field=" + fieldInfo.Name + " fields=" + FIELDS); var @in = (IndexInput)DATA.Clone(); var scratch = new BytesRef(); return new NumericDocValuesAnonymousInnerClassHelper(this, field, @in, scratch); }
public override void AddSortedSetField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrdCount, IEnumerable <long?> ords) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.SORTED_SET); WriteFieldEntry(field, FieldInfo.DocValuesType_e.SORTED_SET); long valueCount = 0; int maxLength = 0; foreach (var value in values) { maxLength = Math.Max(maxLength, value.Length); valueCount++; } // write numValues SimpleTextUtil.Write(data, NUMVALUES); SimpleTextUtil.Write(data, Convert.ToString(valueCount), scratch); SimpleTextUtil.WriteNewline(data); // write maxLength SimpleTextUtil.Write(data, MAXLENGTH); SimpleTextUtil.Write(data, Convert.ToString(maxLength), scratch); SimpleTextUtil.WriteNewline(data); int maxBytesLength = Convert.ToString(maxLength).Length; var sb = new StringBuilder(); for (int i = 0; i < maxBytesLength; i++) { sb.Append('0'); } // write our pattern for encoding lengths SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); string encoderFormat = sb.ToString(); // compute ord pattern: this is funny, we encode all values for all docs to find the maximum length var maxOrdListLength = 0; var sb2 = new StringBuilder(); var ordStream = ords.GetEnumerator(); foreach (var n in docToOrdCount) { sb2.Length = 0; var count = (int)n; for (int i = 0; i < count; i++) { ordStream.MoveNext(); var ord = ordStream.Current; if (sb2.Length > 0) { sb2.Append(","); } sb2.Append(Convert.ToString(ord)); } maxOrdListLength = Math.Max(maxOrdListLength, sb2.Length); } sb2.Length = 0; for (int i = 0; i < maxOrdListLength; i++) { sb2.Append('X'); } // write our pattern for ord lists SimpleTextUtil.Write(data, ORDPATTERN); SimpleTextUtil.Write(data, sb2.ToString(), scratch); SimpleTextUtil.WriteNewline(data); // for asserts: long valuesSeen = 0; foreach (var value in values) { // write length SimpleTextUtil.Write(data, LENGTH); SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat), scratch); SimpleTextUtil.WriteNewline(data); // write bytes -- don't use SimpleText.Write // because it escapes: data.WriteBytes(value.Bytes, value.Offset, value.Length); // pad to fit for (var i = value.Length; i < maxLength; i++) { data.WriteByte((byte)(sbyte)' '); } SimpleTextUtil.WriteNewline(data); valuesSeen++; Debug.Assert(valuesSeen <= valueCount); } Debug.Assert(valuesSeen == valueCount); ordStream = ords.GetEnumerator(); // write the ords for each doc comma-separated foreach (var n in docToOrdCount) { sb2.Length = 0; var count = (int)n; for (var i = 0; i < count; i++) { ordStream.MoveNext(); var ord = ordStream.Current; if (sb2.Length > 0) { sb2.Append(","); } sb2.Append(Convert.ToString(ord)); } // now pad to fit: these are numbers so spaces work well. reader calls trim() var numPadding = maxOrdListLength - sb2.Length; for (var i = 0; i < numPadding; i++) { sb2.Append(' '); } SimpleTextUtil.Write(data, sb2.ToString(), scratch); SimpleTextUtil.WriteNewline(data); } }
private static string GetDocValuesType(FieldInfo.DocValuesType_e? type) { return type.HasValue ? type.ToString() : "false"; }
public override int SetField(FieldInfo fieldInfo) { FieldInfo.IndexOptions? indexOptions = fieldInfo.FieldIndexOptions; FieldHasFreqs = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS; FieldHasPositions = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; FieldHasOffsets = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; FieldHasPayloads = fieldInfo.HasPayloads(); SkipWriter.SetField(FieldHasPositions, FieldHasOffsets, FieldHasPayloads); LastState = EmptyState; if (FieldHasPositions) { if (FieldHasPayloads || FieldHasOffsets) { return 3; // doc + pos + pay FP } else { return 2; // doc + pos FP } } else { return 1; // doc FP } }
public SimpleTextTerms(SimpleTextFieldsReader outerInstance, string field, long termsStart, int maxDoc) { _outerInstance = outerInstance; _maxDoc = maxDoc; _termsStart = termsStart; _fieldInfo = outerInstance._fieldInfos.FieldInfo(field); LoadTerms(); }
public override void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads) { Write(FIELD); Write(Convert.ToString(info.Number, CultureInfo.InvariantCulture)); NewLine(); Write(FIELDNAME); Write(info.Name); NewLine(); Write(FIELDPOSITIONS); Write(Convert.ToString(positions, CultureInfo.InvariantCulture).ToLowerInvariant()); NewLine(); Write(FIELDOFFSETS); Write(Convert.ToString(offsets, CultureInfo.InvariantCulture).ToLowerInvariant()); NewLine(); Write(FIELDPAYLOADS); Write(Convert.ToString(payloads, CultureInfo.InvariantCulture).ToLowerInvariant()); NewLine(); Write(FIELDTERMCOUNT); Write(Convert.ToString(numTerms, CultureInfo.InvariantCulture)); NewLine(); _positions = positions; _offsets = offsets; _payloads = payloads; }
public override void WriteField(FieldInfo info, IndexableField field) { Write(FIELD); Write(info.Number.ToString(CultureInfo.InvariantCulture)); NewLine(); Write(NAME); Write(field.Name); NewLine(); Write(TYPE); var n = field.NumericValue; if (n != null) { if (n is sbyte? || n is short? || n is int?) { Write(TYPE_INT); NewLine(); Write(VALUE); Write(((int)n).ToString(CultureInfo.InvariantCulture)); NewLine(); } else if (n is long?) { Write(TYPE_LONG); NewLine(); Write(VALUE); Write(((long)n).ToString(CultureInfo.InvariantCulture)); NewLine(); } else if (n is float?) { Write(TYPE_FLOAT); NewLine(); Write(VALUE); // LUCENENET: Need to specify the "R" for round-trip: http://stackoverflow.com/a/611564/181087 Write(((float)n).ToString("R", CultureInfo.InvariantCulture)); NewLine(); } else if (n is double?) { Write(TYPE_DOUBLE); NewLine(); Write(VALUE); // LUCENENET: Need to specify the "R" for round-trip: http://stackoverflow.com/a/611564/181087 Write(((double)n).ToString("R", CultureInfo.InvariantCulture)); NewLine(); } else { throw new ArgumentException("cannot store numeric type " + n.GetType()); } } else { BytesRef bytes = field.BinaryValue; if (bytes != null) { Write(TYPE_BINARY); NewLine(); Write(VALUE); Write(bytes); NewLine(); } else if (field.StringValue == null) { throw new ArgumentException("field " + field.Name + " is stored but does not have binaryValue, stringValue nor numericValue"); } else { Write(TYPE_STRING); NewLine(); Write(VALUE); Write(field.StringValue); NewLine(); } } }
internal TermsWriter(FSTOrdTermsWriter outerInstance, FieldInfo fieldInfo) { _outerInstance = outerInstance; _numTerms = 0; _fieldInfo = fieldInfo; _longsSize = outerInstance.postingsWriter.SetField(fieldInfo); _outputs = PositiveIntOutputs.Singleton; _builder = new Builder<long?>(FST.INPUT_TYPE.BYTE1, _outputs); _lastBlockStatsFp = 0; _lastBlockMetaLongsFp = 0; _lastBlockMetaBytesFp = 0; _lastBlockLongs = new long[_longsSize]; _lastLongs = new long[_longsSize]; _lastMetaBytesFp = 0; }
public override TermsConsumer AddField(FieldInfo field) { return(new TermsWriter(this, field)); }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, SimpleTextFieldInfosWriter.FIELD_INFOS_EXTENSION); var input = directory.OpenChecksumInput(fileName, iocontext); var scratch = new BytesRef(); var success = false; try { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUMFIELDS)); var size = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUMFIELDS.Length, scratch)); var infos = new FieldInfo[size]; for (var i = 0; i < size; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NAME)); string name = ReadString(SimpleTextFieldInfosWriter.NAME.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUMBER)); int fieldNumber = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUMBER.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ISINDEXED)); bool isIndexed = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.ISINDEXED.Length, scratch)); FieldInfo.IndexOptions? indexOptions; if (isIndexed) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.INDEXOPTIONS)); indexOptions = (FieldInfo.IndexOptions)Enum.Parse(typeof(FieldInfo.IndexOptions), ReadString(SimpleTextFieldInfosWriter.INDEXOPTIONS.Length, scratch)); } else { indexOptions = null; } SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.STORETV)); bool storeTermVector = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.STORETV.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.PAYLOADS)); bool storePayloads = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.PAYLOADS.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NORMS)); bool omitNorms = !Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.NORMS.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NORMS_TYPE)); string nrmType = ReadString(SimpleTextFieldInfosWriter.NORMS_TYPE.Length, scratch); FieldInfo.DocValuesType_e? normsType = DocValuesType(nrmType); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.DOCVALUES)); string dvType = ReadString(SimpleTextFieldInfosWriter.DOCVALUES.Length, scratch); FieldInfo.DocValuesType_e? docValuesType = DocValuesType(dvType); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.DOCVALUES_GEN)); long dvGen = Convert.ToInt64(ReadString(SimpleTextFieldInfosWriter.DOCVALUES_GEN.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUM_ATTS)); int numAtts = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUM_ATTS.Length, scratch)); IDictionary<string, string> atts = new Dictionary<string, string>(); for (int j = 0; j < numAtts; j++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ATT_KEY)); string key = ReadString(SimpleTextFieldInfosWriter.ATT_KEY.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ATT_VALUE)); string value = ReadString(SimpleTextFieldInfosWriter.ATT_VALUE.Length, scratch); atts[key] = value; } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normsType, new ReadOnlyDictionary<string,string>(atts)) { DocValuesGen = dvGen }; } SimpleTextUtil.CheckFooter(input); var fieldInfos = new FieldInfos(infos); success = true; return fieldInfos; } finally { if (success) { input.Dispose(); } else { IOUtils.CloseWhileHandlingException(input); } } }
public override BinaryDocValues GetBinary(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; Debug.Assert(field != null); var input = (IndexInput)DATA.Clone(); var scratch = new BytesRef(); return new BinaryDocValuesAnonymousInnerClassHelper(this, field, input, scratch); }
public override void WriteField(FieldInfo info, IndexableField field) { Write(FIELD); Write(Convert.ToString(info.Number)); NewLine(); Write(NAME); Write(field.Name()); NewLine(); Write(TYPE); var n = field.NumericValue; if (n != null) { if (n is sbyte? || n is short? || n is int?) { Write(TYPE_INT); NewLine(); Write(VALUE); Write(Convert.ToString((int) n)); NewLine(); } else if (n is long?) { Write(TYPE_LONG); NewLine(); Write(VALUE); Write(Convert.ToString((long) n)); NewLine(); } else if (n is float?) { Write(TYPE_FLOAT); NewLine(); Write(VALUE); Write(Convert.ToString((float) n)); NewLine(); } else if (n is double?) { Write(TYPE_DOUBLE); NewLine(); Write(VALUE); Write(Convert.ToString((double) n)); NewLine(); } else { throw new ArgumentException("cannot store numeric type " + n.GetType()); } } else { BytesRef bytes = field.BinaryValue(); if (bytes != null) { Write(TYPE_BINARY); NewLine(); Write(VALUE); Write(bytes); NewLine(); } else if (field.StringValue == null) { throw new ArgumentException("field " + field.Name() + " is stored but does not have binaryValue, stringValue nor numericValue"); } else { Write(TYPE_STRING); NewLine(); Write(VALUE); Write(field.StringValue); NewLine(); } } }
public override void AddSortedSetField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrdCount, IEnumerable<long?> ords) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.SORTED_SET); WriteFieldEntry(field, FieldInfo.DocValuesType_e.SORTED_SET); long valueCount = 0; int maxLength = 0; foreach (var value in values) { maxLength = Math.Max(maxLength, value.Length); valueCount++; } // write numValues SimpleTextUtil.Write(data, NUMVALUES); SimpleTextUtil.Write(data, valueCount.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); // write maxLength SimpleTextUtil.Write(data, MAXLENGTH); SimpleTextUtil.Write(data, maxLength.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); int maxBytesLength = maxLength.ToString(CultureInfo.InvariantCulture).Length; var sb = new StringBuilder(); for (int i = 0; i < maxBytesLength; i++) { sb.Append('0'); } // write our pattern for encoding lengths SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); string encoderFormat = sb.ToString(); // compute ord pattern: this is funny, we encode all values for all docs to find the maximum length var maxOrdListLength = 0; var sb2 = new StringBuilder(); var ordStream = ords.GetEnumerator(); foreach (var n in docToOrdCount) { sb2.Length = 0; var count = (int) n; for (int i = 0; i < count; i++) { ordStream.MoveNext(); var ord = ordStream.Current; if (sb2.Length > 0) { sb2.Append(","); } sb2.Append(ord.GetValueOrDefault().ToString(CultureInfo.InvariantCulture)); } maxOrdListLength = Math.Max(maxOrdListLength, sb2.Length); } sb2.Length = 0; for (int i = 0; i < maxOrdListLength; i++) { sb2.Append('X'); } // write our pattern for ord lists SimpleTextUtil.Write(data, ORDPATTERN); SimpleTextUtil.Write(data, sb2.ToString(), scratch); SimpleTextUtil.WriteNewline(data); // for asserts: long valuesSeen = 0; foreach (var value in values) { // write length SimpleTextUtil.Write(data, LENGTH); SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); // write bytes -- don't use SimpleText.Write // because it escapes: data.WriteBytes(value.Bytes, value.Offset, value.Length); // pad to fit for (var i = value.Length; i < maxLength; i++) { data.WriteByte((byte)' '); } SimpleTextUtil.WriteNewline(data); valuesSeen++; Debug.Assert(valuesSeen <= valueCount); } Debug.Assert(valuesSeen == valueCount); ordStream = ords.GetEnumerator(); // write the ords for each doc comma-separated foreach (var n in docToOrdCount) { sb2.Length = 0; var count = (int) n; for (var i = 0; i < count; i++) { ordStream.MoveNext(); var ord = ordStream.Current; if (sb2.Length > 0) sb2.Append(","); sb2.Append(ord); } // now pad to fit: these are numbers so spaces work well. reader calls trim() var numPadding = maxOrdListLength - sb2.Length; for (var i = 0; i < numPadding; i++) { sb2.Append(' '); } SimpleTextUtil.Write(data, sb2.ToString(), scratch); SimpleTextUtil.WriteNewline(data); } }