public override void Get(int docId, BytesRef result) { if (docId < 0 || docId >= _outerInstance.MAX_DOC) { throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) + "; got " + docId); } _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2) * docId); SimpleTextUtil.ReadLine(_input, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH)); int len; try { // LUCNENENET: .NET doesn't have a way to specify a pattern with integer, but all of the standard ones are built in. len = int.Parse(Encoding.UTF8.GetString(_scratch.Bytes, _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length, _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length), NumberStyles.Integer, CultureInfo.InvariantCulture); } catch (FormatException ex) { throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex); } result.Bytes = new byte[len]; result.Offset = 0; result.Length = len; _input.ReadBytes(result.Bytes, 0, len); }
public override void LookupOrd(long ord, BytesRef result) { if (ord < 0 || ord >= field.NumValues) { throw new IndexOutOfRangeException("ord must be 0 .. " + (field.NumValues - 1) + "; got " + ord); } @in.Seek(field.DataStartFilePointer + ord * (9 + field.Pattern.Length + field.MaxLength)); SimpleTextUtil.ReadLine(@in, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextDocValuesWriter.LENGTH), "got " + scratch.Utf8ToString() + " in=" + @in); int len; try { len = (int) decoder.parse(scratch.Bytes.SubList( scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length, scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString()); } catch (ParseException pe) { CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + @in + ")"); e.initCause(pe); throw e; } result.Bytes = new sbyte[len]; result.Offset = 0; result.Length = len; @in.ReadBytes(result.Bytes, 0, len); }
public static void ReadLine(DataInput input, BytesRef scratch) { var upto = 0; while (true) { var b = input.ReadSByte(); if (scratch.Bytes.Length == upto) { scratch.Grow(1 + upto); } if (b == ESCAPE) { scratch.Bytes[upto++] = input.ReadSByte(); } else { if (b == NEWLINE) { break; } scratch.Bytes[upto++] = b; } } scratch.Offset = 0; scratch.Length = upto; }
public override void LookupOrd(long ord, BytesRef result) { if (ord < 0 || ord >= _field.NumValues) { throw new IndexOutOfRangeException("ord must be 0 .. " + (_field.NumValues - 1) + "; got " + ord); } _input.Seek(_field.DataStartFilePointer + ord * (9 + _field.Pattern.Length + _field.MaxLength)); SimpleTextUtil.ReadLine(_input, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH), "got " + _scratch.Utf8ToString() + " in=" + _input); int len; try { len = (int) _decoderFormat.parse(_scratch.Bytes.SubList( _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length, _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString()); } catch (Exception pe) { var e = new CorruptIndexException("failed to parse int length (resource=" + _input + ")", pe); throw e; } result.Bytes = new byte[len]; result.Offset = 0; result.Length = len; _input.ReadBytes(result.Bytes, 0, len); }
public override void Get(int docId, BytesRef result) { if (docId < 0 || docId >= _outerInstance.MAX_DOC) { throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) + "; got " + docId); } _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2) * docId); SimpleTextUtil.ReadLine(_input, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH)); int len; try { len = int.Parse(_scratch.Bytes.SubList( _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length, _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString()); } catch (FormatException ex) { throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex); } result.Bytes = new byte[len]; result.Offset = 0; result.Length = len; _input.ReadBytes(result.Bytes, 0, len); }
public static void ReadLine(DataInput input, BytesRef scratch) { var upto = 0; while (true) { var b = input.ReadByte(); if (scratch.Bytes.Length == upto) { scratch.Grow(1 + upto); } if (b == ESCAPE) { scratch.Bytes[upto++] = input.ReadByte(); } else { if (b == NEWLINE) { break; } scratch.Bytes[upto++] = b; } } scratch.Offset = 0; scratch.Length = upto; }
public override void FinishTerm(BytesRef text, TermStats stats) { // write term meta data into fst var state = _outerInstance._postingsWriter.NewTermState(); var meta = new FSTTermOutputs.TermData { LONGS = new long[_longsSize], BYTES = null, DOC_FREQ = state.DocFreq = stats.DocFreq, TOTAL_TERM_FREQ = state.TotalTermFreq = stats.TotalTermFreq }; _outerInstance._postingsWriter.FinishTerm(state); _outerInstance._postingsWriter.EncodeTerm(meta.LONGS, _metaWriter, _fieldInfo, state, true); var bytesSize = (int)_metaWriter.FilePointer; if (bytesSize > 0) { meta.BYTES = new byte[bytesSize]; _metaWriter.WriteTo(meta.BYTES, 0); _metaWriter.Reset(); } _builder.Add(Util.ToIntsRef(text, _scratchTerm), meta); _numTerms++; }
public static void CheckFooter(ChecksumIndexInput input) { var scratch = new BytesRef(); var expectedChecksum = string.Format("{0:D}", input.Checksum); ReadLine(input, scratch); if (StringHelper.StartsWith(scratch, CHECKSUM) == false) { throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.Utf8ToString() + " (resource=" + input + ")"); } var actualChecksum = (new BytesRef(scratch.Bytes, CHECKSUM.Length, scratch.Length - CHECKSUM.Length)).Utf8ToString(); if (!expectedChecksum.Equals(actualChecksum)) { throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")"); } if (input.Length() != input.FilePointer) { throw new CorruptIndexException( "Unexpected stuff at the end of file, please be careful with your text editor! (resource=" + input + ")"); } }
public override void LookupOrd(long ord, BytesRef result) { if (ord < 0 || ord >= _field.NumValues) { throw new IndexOutOfRangeException("ord must be 0 .. " + (_field.NumValues - 1) + "; got " + ord); } _input.Seek(_field.DataStartFilePointer + ord * (9 + _field.Pattern.Length + _field.MaxLength)); SimpleTextUtil.ReadLine(_input, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH), "got " + _scratch.Utf8ToString() + " in=" + _input); int len; try { // LUCNENENET: .NET doesn't have a way to specify a pattern with integer, but all of the standard ones are built in. len = int.Parse(Encoding.UTF8.GetString(_scratch.Bytes, _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length, _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length), NumberStyles.Integer, CultureInfo.InvariantCulture); } catch (Exception pe) { var e = new CorruptIndexException("failed to parse int length (resource=" + _input + ")", pe); throw e; } result.Bytes = new byte[len]; result.Offset = 0; result.Length = len; _input.ReadBytes(result.Bytes, 0, len); }
public NumericDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance, OneField field, IndexInput @in, BytesRef scratch) { _outerInstance = outerInstance; _field = field; _input = @in; _scratch = scratch; }
private Bits GetBinaryDocsWithField(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; var input = (IndexInput)DATA.Clone(); var scratch = new BytesRef(); return(new BitsAnonymousInnerClassHelper2(this, field, input, scratch)); }
internal BaseTermsEnum(FSTTermsReader.TermsReader outerInstance) { this.outerInstance = outerInstance; this.state = outerInstance.outerInstance.postingsReader.NewTermState(); this.bytesReader = new ByteArrayDataInput(); this.term_Renamed = null; // NOTE: metadata will only be initialized in child class }
public BinaryDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance, OneField field, IndexInput input, BytesRef scratch) { _outerInstance = outerInstance; _field = field; _input = input; _scratch = scratch; }
/// <summary> /// Add a new position & payload </summary> public override void AddPosition(int position, BytesRef payload, int startOffset, int endOffset) { // if (DEBUG) { // System.out.println("FPW.addPosition pos=" + position + " posBufferUpto=" + posBufferUpto + (fieldHasPayloads ? " payloadByteUpto=" + payloadByteUpto: "")); // } PosDeltaBuffer[PosBufferUpto] = position - LastPosition; if (FieldHasPayloads) { if (payload == null || payload.Length == 0) { // no payload PayloadLengthBuffer[PosBufferUpto] = 0; } else { PayloadLengthBuffer[PosBufferUpto] = payload.Length; if (PayloadByteUpto + payload.Length > PayloadBytes.Length) { PayloadBytes = ArrayUtil.Grow(PayloadBytes, PayloadByteUpto + payload.Length); } Array.Copy(payload.Bytes, payload.Offset, PayloadBytes, PayloadByteUpto, payload.Length); PayloadByteUpto += payload.Length; } } if (FieldHasOffsets) { Debug.Assert(startOffset >= LastStartOffset); Debug.Assert(endOffset >= startOffset); OffsetStartDeltaBuffer[PosBufferUpto] = startOffset - LastStartOffset; OffsetLengthBuffer[PosBufferUpto] = endOffset - startOffset; LastStartOffset = startOffset; } PosBufferUpto++; LastPosition = position; if (PosBufferUpto == Lucene41PostingsFormat.BLOCK_SIZE) { // if (DEBUG) { // System.out.println(" write pos bulk block @ fp=" + posOut.getFilePointer()); // } ForUtil.WriteBlock(PosDeltaBuffer, Encoded, PosOut); if (FieldHasPayloads) { ForUtil.WriteBlock(PayloadLengthBuffer, Encoded, PayOut); PayOut.WriteVInt(PayloadByteUpto); PayOut.WriteBytes(PayloadBytes, 0, PayloadByteUpto); PayloadByteUpto = 0; } if (FieldHasOffsets) { ForUtil.WriteBlock(OffsetStartDeltaBuffer, Encoded, PayOut); ForUtil.WriteBlock(OffsetLengthBuffer, Encoded, PayOut); } PosBufferUpto = 0; } }
public override void SeekExact(BytesRef target, TermState otherState) { if (!target.Equals(term_Renamed)) { state.CopyFrom(otherState); term_Renamed = BytesRef.DeepCopyOf(target); seekPending = true; } }
public override void StartTerm(BytesRef term, int freq) { Write(TERMTEXT); Write(term); NewLine(); Write(TERMFREQ); Write(Convert.ToString(freq)); NewLine(); }
public SortedDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance, OneField field, IndexInput input, BytesRef scratch) { _outerInstance = outerInstance; _field = field; _input = input; _scratch = scratch; _decoderFormat = field.Pattern; _ordDecoderFormat = field.OrdPattern; }
public override BinaryDocValues GetBinary(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; Debug.Assert(field != null); var input = (IndexInput)DATA.Clone(); var scratch = new BytesRef(); return(new BinaryDocValuesAnonymousInnerClassHelper(this, field, input, scratch)); }
public override void StartTerm(BytesRef term, int freq) { Write(TERMTEXT); Write(term); NewLine(); Write(TERMFREQ); Write(Convert.ToString(freq, CultureInfo.InvariantCulture)); NewLine(); }
public SortedDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance, Lucene.Net.Codecs.SimpleText.SimpleTextDocValuesReader.OneField field, IndexInput @in, BytesRef scratch, DecimalFormat decoder, DecimalFormat ordDecoder) { this.outerInstance = outerInstance; this.field = field; this.@in = @in; this.scratch = scratch; this.decoder = decoder; this.ordDecoder = ordDecoder; }
public static void WriteChecksum(IndexOutput output, BytesRef scratch) { // Pad with zeros so different checksum values use the // same number of bytes // (BaseIndexFileFormatTestCase.testMergeStability cares): var checksum = string.Format("{0:D}", output.Checksum); Write(output, CHECKSUM); Write(output, checksum, scratch); WriteNewline(output); }
public SortedSetDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance, OneField field, IndexInput input, BytesRef scratch) { _outerInstance = outerInstance; _field = field; _input = input; _scratch = scratch; _decoderFormat = field.Pattern; _currentOrds = new string[0]; _currentIndex = 0; }
public override SortedDocValues GetSorted(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; // SegmentCoreReaders already verifies this field is valid: Debug.Assert(field != null); var input = (IndexInput)DATA.Clone(); var scratch = new BytesRef(); return(new SortedDocValuesAnonymousInnerClassHelper(this, field, input, scratch)); }
public override SeekStatus SeekCeil(BytesRef target) { UpdateEnum(fstEnum.SeekCeil(target)); if (term_Renamed == null) { return(SeekStatus.END); } else { return(term_Renamed.Equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND); } }
public static void Write(DataOutput output, BytesRef b) { for (var i = 0; i < b.Length; i++) { var bx = b.Bytes[b.Offset + i]; if (bx == NEWLINE || bx == ESCAPE) { output.WriteByte(ESCAPE); } output.WriteByte(bx); } }
public SortedSetDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance, Lucene.Net.Codecs.SimpleText.SimpleTextDocValuesReader.OneField field, IndexInput @in, BytesRef scratch, DecimalFormat decoder) { this.outerInstance = outerInstance; this.field = field; this.@in = @in; this.scratch = scratch; this.decoder = decoder; currentOrds = new string[0]; currentIndex = 0; }
public override int NextPosition() { int pos; if (_readPositions) { SimpleTextUtil.ReadLine(_in, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), "got line=" + _scratch.Utf8ToString()); UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length, _scratchUtf162); pos = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length); } else { pos = -1; } if (_readOffsets) { SimpleTextUtil.ReadLine(_in, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), "got line=" + _scratch.Utf8ToString()); UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length, _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162); _startOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length); SimpleTextUtil.ReadLine(_in, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), "got line=" + _scratch.Utf8ToString()); UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length, _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162); _endOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length); } long fp = _in.FilePointer; SimpleTextUtil.ReadLine(_in, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length; if (_scratch2.Bytes.Length < len) { _scratch2.Grow(len); } Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len); _scratch2.Length = len; _payload = _scratch2; } else { _payload = null; _in.Seek(fp); } return(pos); }
public override SeekStatus SeekCeil(BytesRef text) { _iterator = terms.TailMap(text).entrySet().GetEnumerator(); //JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops: if (!_iterator.HasNext()) { return(SeekStatus.END); } else { return(Next().Equals(text) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND); } }
public override SeekStatus SeekCeil(BytesRef target) { decoded = false; term_Renamed = DoSeekCeil(target); LoadMetaData(); if (term_Renamed == null) { return(SeekStatus.END); } else { return(term_Renamed.Equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND); } }
public override NumericDocValues GetNumeric(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; Debug.Assert(field != null); // SegmentCoreReaders already verifies this field is valid: Debug.Assert(field != null, "field=" + fieldInfo.Name + " fields=" + FIELDS); var @in = (IndexInput)DATA.Clone(); var scratch = new BytesRef(); return(new NumericDocValuesAnonymousInnerClassHelper(this, field, @in, scratch)); }
public override SortedSetDocValues GetSortedSet(FieldInfo fieldInfo) { OneField field = FIELDS[fieldInfo.Name]; // SegmentCoreReaders already verifies this field is // valid: Debug.Assert(field != null); IndexInput @in = (IndexInput)DATA.Clone(); BytesRef scratch = new BytesRef(); DecimalFormat decoder = new DecimalFormat(field.Pattern, new DecimalFormatSymbols(Locale.ROOT)); return(new SortedSetDocValuesAnonymousInnerClassHelper(this, field, @in, scratch, decoder)); }
public override Bits ReadLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) { Debug.Assert(info.HasDeletions()); var scratch = new BytesRef(); var scratchUtf16 = new CharsRef(); var fileName = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.DelGen); ChecksumIndexInput input = null; var success = false; try { input = dir.OpenChecksumInput(fileName, context); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SIZE)); var size = ParseIntAt(scratch, SIZE.Length, scratchUtf16); var bits = new BitArray(size); SimpleTextUtil.ReadLine(input, scratch); while (!scratch.Equals(END)) { Debug.Assert(StringHelper.StartsWith(scratch, DOC)); var docid = ParseIntAt(scratch, DOC.Length, scratchUtf16); bits.SafeSet(docid, true); SimpleTextUtil.ReadLine(input, scratch); } SimpleTextUtil.CheckFooter(input); success = true; return new SimpleTextBits(bits, size); } finally { if (success) { IOUtils.Close(input); } else { IOUtils.CloseWhileHandlingException(input); } } }
public static void Write(DataOutput output, string s, BytesRef scratch) { UnicodeUtil.UTF16toUTF8(s.ToCharArray(), 0, s.Length, scratch); Write(output, scratch); }
public void Reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets, BytesRef[] payloads) { _liveDocs = liveDocs; _positions = positions; _startOffsets = startOffsets; _endOffsets = endOffsets; _payloads = payloads; _doc = -1; _didNext = false; _nextPos = 0; }
public override Fields Get(int doc) { var fields = new SortedDictionary<string, SimpleTVTerms>(); _input.Seek(_offsets[doc]); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.NUMFIELDS)); var numFields = ParseIntAt(SimpleTextTermVectorsWriter.NUMFIELDS.Length); if (numFields == 0) { return null; // no vectors for this doc } for (var i = 0; i < numFields; i++) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELD)); // skip fieldNumber: ParseIntAt(SimpleTextTermVectorsWriter.FIELD.Length); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDNAME)); var fieldName = ReadString(SimpleTextTermVectorsWriter.FIELDNAME.Length, _scratch); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDPOSITIONS)); var positions = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDPOSITIONS.Length, _scratch)); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDOFFSETS)); var offsets = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDOFFSETS.Length, _scratch)); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDPAYLOADS)); var payloads = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDPAYLOADS.Length, _scratch)); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDTERMCOUNT)); var termCount = ParseIntAt(SimpleTextTermVectorsWriter.FIELDTERMCOUNT.Length); var terms = new SimpleTVTerms(offsets, positions, payloads); fields.Add(fieldName, terms); for (var j = 0; j < termCount; j++) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.TERMTEXT)); var term = new BytesRef(); var termLength = _scratch.Length - SimpleTextTermVectorsWriter.TERMTEXT.Length; term.Grow(termLength); term.Length = termLength; Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextTermVectorsWriter.TERMTEXT.Length, term.Bytes, term.Offset, termLength); var postings = new SimpleTVPostings(); terms.TERMS.Add(term, postings); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.TERMFREQ)); postings.FREQ = ParseIntAt(SimpleTextTermVectorsWriter.TERMFREQ.Length); if (!positions && !offsets) continue; if (positions) { postings.POSITIONS = new int[postings.FREQ]; if (payloads) { postings.PAYLOADS = new BytesRef[postings.FREQ]; } } if (offsets) { postings.START_OFFSETS = new int[postings.FREQ]; postings.END_OFFSETS = new int[postings.FREQ]; } for (var k = 0; k < postings.FREQ; k++) { if (positions) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.POSITION)); postings.POSITIONS[k] = ParseIntAt(SimpleTextTermVectorsWriter.POSITION.Length); if (payloads) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.PAYLOAD)); if (_scratch.Length - SimpleTextTermVectorsWriter.PAYLOAD.Length == 0) { postings.PAYLOADS[k] = null; } else { var payloadBytes = new byte[_scratch.Length - SimpleTextTermVectorsWriter.PAYLOAD.Length]; Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextTermVectorsWriter.PAYLOAD.Length, payloadBytes, 0, payloadBytes.Length); postings.PAYLOADS[k] = new BytesRef(payloadBytes); } } } if (!offsets) continue; ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.STARTOFFSET)); postings.START_OFFSETS[k] = ParseIntAt(SimpleTextTermVectorsWriter.STARTOFFSET.Length); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.ENDOFFSET)); postings.END_OFFSETS[k] = ParseIntAt(SimpleTextTermVectorsWriter.ENDOFFSET.Length); } } } return new SimpleTVFields(this, fields); }
private void ReadField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE)); if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_STRING)) { visitor.StringField(fieldInfo, _scratch.Bytes.SubList(_scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length).ToString()); // new string(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, // Encoding.UTF8)); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_BINARY)) { var copy = new sbyte[_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length]; Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, copy, 0, copy.Length); visitor.BinaryField(fieldInfo, copy); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_INT)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.IntField(fieldInfo, Convert.ToInt32(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_LONG)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.LongField(fieldInfo, Convert.ToInt64(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_FLOAT)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.FloatField(fieldInfo, Convert.ToSingle(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_DOUBLE)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.DoubleField(fieldInfo, Convert.ToDouble(_scratchUtf16.ToString())); } }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION); var output = directory.CreateOutput(fileName, context); var scratch = new BytesRef(); var success = false; try { SimpleTextUtil.Write(output, NUMFIELDS); SimpleTextUtil.Write(output, Convert.ToString(infos.Size()), scratch); SimpleTextUtil.WriteNewline(output); foreach (FieldInfo fi in infos) { SimpleTextUtil.Write(output, NAME); SimpleTextUtil.Write(output, fi.Name, scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, NUMBER); SimpleTextUtil.Write(output, Convert.ToString(fi.Number), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, ISINDEXED); SimpleTextUtil.Write(output, Convert.ToString(fi.Indexed), scratch); SimpleTextUtil.WriteNewline(output); if (fi.Indexed) { Debug.Assert(fi.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads()); SimpleTextUtil.Write(output, INDEXOPTIONS); SimpleTextUtil.Write(output, fi.FieldIndexOptions.ToString(), scratch); SimpleTextUtil.WriteNewline(output); } SimpleTextUtil.Write(output, STORETV); SimpleTextUtil.Write(output, Convert.ToString(fi.HasVectors()), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, PAYLOADS); SimpleTextUtil.Write(output, Convert.ToString(fi.HasPayloads()), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, NORMS); SimpleTextUtil.Write(output, Convert.ToString(!fi.OmitsNorms()), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, NORMS_TYPE); SimpleTextUtil.Write(output, GetDocValuesType(fi.NormType), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, DOCVALUES); SimpleTextUtil.Write(output, GetDocValuesType(fi.DocValuesType), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, DOCVALUES_GEN); SimpleTextUtil.Write(output, Convert.ToString(fi.DocValuesGen), scratch); SimpleTextUtil.WriteNewline(output); IDictionary<string, string> atts = fi.Attributes(); int numAtts = atts == null ? 0 : atts.Count; SimpleTextUtil.Write(output, NUM_ATTS); SimpleTextUtil.Write(output, Convert.ToString(numAtts), scratch); SimpleTextUtil.WriteNewline(output); if (numAtts <= 0 || atts == null) continue; foreach (var entry in atts) { SimpleTextUtil.Write(output, ATT_KEY); SimpleTextUtil.Write(output, entry.Key, scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, ATT_VALUE); SimpleTextUtil.Write(output, entry.Value, scratch); SimpleTextUtil.WriteNewline(output); } } SimpleTextUtil.WriteChecksum(output, scratch); success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.CloseWhileHandlingException(output); } } }
public override SegmentInfo Read(Directory directory, string segmentName, IOContext context) { var scratch = new BytesRef(); string segFileName = IndexFileNames.SegmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); ChecksumIndexInput input = directory.OpenChecksumInput(segFileName, context); bool success = false; try { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_VERSION)); string version = ReadString(SimpleTextSegmentInfoWriter.SI_VERSION.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DOCCOUNT)); int docCount = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_DOCCOUNT.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_USECOMPOUND)); bool isCompoundFile = Convert.ToBoolean(ReadString(SimpleTextSegmentInfoWriter.SI_USECOMPOUND.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_DIAG)); int numDiag = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_DIAG.Length, scratch)); IDictionary<string, string> diagnostics = new Dictionary<string, string>(); for (int i = 0; i < numDiag; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_KEY)); string key = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_KEY.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_VALUE)); string value = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_VALUE.Length, scratch); diagnostics[key] = value; } SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_FILES)); int numFiles = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_FILES.Length, scratch)); var files = new HashSet<string>(); for (int i = 0; i < numFiles; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_FILE)); string fileName = ReadString(SimpleTextSegmentInfoWriter.SI_FILE.Length, scratch); files.Add(fileName); } SimpleTextUtil.CheckFooter(input); var info = new SegmentInfo(directory, version, segmentName, docCount, isCompoundFile, null, diagnostics) {Files = files}; success = true; return info; } finally { if (!success) { IOUtils.CloseWhileHandlingException(input); } else { input.Dispose(); } } }
private void LoadTerms() { var posIntOutputs = PositiveIntOutputs.Singleton; var outputsInner = new PairOutputs<long?, long?>(posIntOutputs, posIntOutputs); var outputs = new PairOutputs<long?, PairOutputs<long?,long?>.Pair>(posIntOutputs, outputsInner); // honestly, wtf kind of generic mess is this. var b = new Builder<PairOutputs<long?, PairOutputs<long?,long?>.Pair>.Pair>(FST.INPUT_TYPE.BYTE1, outputs); var input = (IndexInput) _outerInstance._input.Clone(); input.Seek(_termsStart); var lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; var visitedDocs = new FixedBitSet(_maxDoc); var scratchIntsRef = new IntsRef(); while (true) { SimpleTextUtil.ReadLine(input, _scratch); if (_scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq))); _sumTotalTermFreq += totalTermFreq; } break; } if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { docFreq++; _sumDocFreq++; UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); int docId = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); visitedDocs.Set(docId); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); totalTermFreq += ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq))); } lastDocsStart = input.FilePointer; int len = _scratch.Length - SimpleTextFieldsWriter.TERM.Length; if (len > lastTerm.Length) { lastTerm.Grow(len); } Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len); lastTerm.Length = len; docFreq = 0; _sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; _termCount++; } } _docCount = visitedDocs.Cardinality(); _fst = b.Finish(); }
public override int NextPosition() { int pos; if (_readPositions) { SimpleTextUtil.ReadLine(_in, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), "got line=" + _scratch.Utf8ToString()); UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length, _scratchUtf162); pos = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length); } else { pos = -1; } if (_readOffsets) { SimpleTextUtil.ReadLine(_in, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), "got line=" + _scratch.Utf8ToString()); UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length, _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162); _startOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length); SimpleTextUtil.ReadLine(_in, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), "got line=" + _scratch.Utf8ToString()); UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length, _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162); _endOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length); } long fp = _in.FilePointer; SimpleTextUtil.ReadLine(_in, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length; if (_scratch2.Bytes.Length < len) { _scratch2.Grow(len); } Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len); _scratch2.Length = len; _payload = _scratch2; } else { _payload = null; _in.Seek(fp); } return pos; }
private SortedDictionary<string, long?> ReadFields(IndexInput @in) { ChecksumIndexInput input = new BufferedChecksumIndexInput(@in); var scratch = new BytesRef(10); var fields = new SortedDictionary<string, long?>(); while (true) { SimpleTextUtil.ReadLine(input, scratch); if (scratch.Equals(SimpleTextFieldsWriter.END)) { SimpleTextUtil.CheckFooter(input); return fields; } if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD)) { var fieldName = scratch.Bytes.SubList(scratch.Offset + SimpleTextFieldsWriter.FIELD.Length, scratch.Length - SimpleTextFieldsWriter.FIELD.Length).ToString(); fields[fieldName] = input.FilePointer; } } }
private void Write(BytesRef bytes) { SimpleTextUtil.Write(_output, bytes); }
public override void AddPosition(int position, int startOffset, int endOffset, BytesRef payload) { Debug.Assert(_positions || _offsets); if (_positions) { Write(POSITION); Write(Convert.ToString(position, CultureInfo.InvariantCulture)); NewLine(); if (_payloads) { Write(PAYLOAD); if (payload != null) { Debug.Assert(payload.Length > 0); Write(payload); } NewLine(); } } if (_offsets) { Write(STARTOFFSET); Write(Convert.ToString(startOffset, CultureInfo.InvariantCulture)); NewLine(); Write(ENDOFFSET); Write(Convert.ToString(endOffset, CultureInfo.InvariantCulture)); NewLine(); } }
public override void WriteLiveDocs(MutableBits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) { var set = ((SimpleTextBits) bits).BITS; var size = bits.Length(); var scratch = new BytesRef(); var fileName = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.NextDelGen); IndexOutput output = null; var success = false; try { output = dir.CreateOutput(fileName, context); SimpleTextUtil.Write(output, SIZE); SimpleTextUtil.Write(output, Convert.ToString(size), scratch); SimpleTextUtil.WriteNewline(output); for (int i = set.NextSetBit(0); i >= 0; i = set.NextSetBit(i + 1)) { SimpleTextUtil.Write(output, DOC); SimpleTextUtil.Write(output, Convert.ToString(i), scratch); SimpleTextUtil.WriteNewline(output); } SimpleTextUtil.Write(output, END); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.WriteChecksum(output, scratch); success = true; } finally { if (success) { IOUtils.Close(output); } else { IOUtils.CloseWhileHandlingException(output); } } }
private static int ParseIntAt(BytesRef bytes, int offset, CharsRef scratch) { UnicodeUtil.UTF8toUTF16(bytes.Bytes, bytes.Offset + offset, bytes.Length - offset, scratch); return ArrayUtil.ParseInt(scratch.Chars, 0, scratch.Length); }
public override SeekStatus SeekCeil(BytesRef text) { var result = _fstEnum.SeekCeil(text); if (result == null) return SeekStatus.END; var pair1 = result.Output; var pair2 = pair1.Output2; _docsStart = pair1.Output1.Value; _docFreq = (int) pair2.Output1; _totalTermFreq = pair2.Output2.Value; return result.Input.Equals(text) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND; }
private static string ReadString(int offset, BytesRef scratch) { return scratch.Bytes.SubList(scratch.Offset + offset, scratch.Length - offset).ToString(); //return new string(scratch.Bytes, scratch.Offset + offset, scratch.Length - offset, StandardCharsets.UTF_8); }
public override bool SeekExact(BytesRef text) { var result = _fstEnum.SeekExact(text); if (result == null) return false; var pair1 = result.Output; var pair2 = pair1.Output2; _docsStart = pair1.Output1.Value; _docFreq = (int) pair2.Output1; _totalTermFreq = pair2.Output2.Value; return true; }
private bool EqualsAt(BytesRef a, BytesRef b, int bOffset) { return a.Length == b.Length - bOffset && ArrayUtil.Equals(a.Bytes, a.Offset, b.Bytes, b.Offset + bOffset, b.Length - bOffset); }
private string ReadString(int offset, BytesRef scratch) { UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + offset, scratch.Length - offset, _scratchUtf16); return _scratchUtf16.ToString(); }
public override void FinishTerm(BytesRef text, TermStats stats) { // write term meta data into fst var state = _outerInstance._postingsWriter.NewTermState(); var meta = new FSTTermOutputs.TermData { LONGS = new long[_longsSize], BYTES = null, DOC_FREQ = state.DocFreq = stats.DocFreq, TOTAL_TERM_FREQ = state.TotalTermFreq = stats.TotalTermFreq }; _outerInstance._postingsWriter.FinishTerm(state); _outerInstance._postingsWriter.EncodeTerm(meta.LONGS, _metaWriter, _fieldInfo, state, true); var bytesSize = (int) _metaWriter.FilePointer; if (bytesSize > 0) { meta.BYTES = new byte[bytesSize]; _metaWriter.WriteTo(meta.BYTES, 0); _metaWriter.Reset(); } _builder.Add(Util.ToIntsRef(text, _scratchTerm), meta); _numTerms++; }
public override PostingsConsumer StartTerm(BytesRef text) { _outerInstance._postingsWriter.StartTerm(); return _outerInstance._postingsWriter; }
public override SeekStatus SeekCeil(BytesRef text) { var newTerms = new SortedDictionary<BytesRef, SimpleTVPostings>(); foreach (var p in _terms.Where(p => p.Key.CompareTo(text) >= 0)) newTerms.Add(p.Key, p.Value); _iterator = newTerms.EntrySet().GetEnumerator(); try { _iterator.MoveNext(); return _iterator.Current.Key.Equals(text) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND; } catch { return SeekStatus.END; } }