public override void LookupOrd(long ord, BytesRef result) { if (ord < 0 || ord >= _field.NumValues) { throw new IndexOutOfRangeException("ord must be 0 .. " + (_field.NumValues - 1) + "; got " + ord); } _input.Seek(_field.DataStartFilePointer + ord * (9 + _field.Pattern.Length + _field.MaxLength)); SimpleTextUtil.ReadLine(_input, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH), "got " + _scratch.Utf8ToString() + " in=" + _input); int len; try { len = (int) _decoderFormat.parse(_scratch.Bytes.SubList( _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length, _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString()); } catch (Exception pe) { var e = new CorruptIndexException("failed to parse int length (resource=" + _input + ")", pe); throw e; } result.Bytes = new byte[len]; result.Offset = 0; result.Length = len; _input.ReadBytes(result.Bytes, 0, len); }
public static void CheckFooter(ChecksumIndexInput input) { var scratch = new BytesRef(); var expectedChecksum = string.Format("{0:D}", input.Checksum); ReadLine(input, scratch); if (StringHelper.StartsWith(scratch, CHECKSUM) == false) { throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.Utf8ToString() + " (resource=" + input + ")"); } var actualChecksum = (new BytesRef(scratch.Bytes, CHECKSUM.Length, scratch.Length - CHECKSUM.Length)).Utf8ToString(); if (!expectedChecksum.Equals(actualChecksum)) { throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")"); } if (input.Length() != input.FilePointer) { throw new CorruptIndexException( "Unexpected stuff at the end of file, please be careful with your text editor! (resource=" + input + ")"); } }
public override void LookupOrd(long ord, BytesRef result) { if (ord < 0 || ord >= _field.NumValues) { throw new IndexOutOfRangeException("ord must be 0 .. " + (_field.NumValues - 1) + "; got " + ord); } _input.Seek(_field.DataStartFilePointer + ord * (9 + _field.Pattern.Length + _field.MaxLength)); SimpleTextUtil.ReadLine(_input, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH), "got " + _scratch.Utf8ToString() + " in=" + _input); int len; try { // LUCNENENET: .NET doesn't have a way to specify a pattern with integer, but all of the standard ones are built in. len = int.Parse(Encoding.UTF8.GetString(_scratch.Bytes, _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length, _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length), NumberStyles.Integer, CultureInfo.InvariantCulture); } catch (Exception pe) { var e = new CorruptIndexException("failed to parse int length (resource=" + _input + ")", pe); throw e; } result.Bytes = new byte[len]; result.Offset = 0; result.Length = len; _input.ReadBytes(result.Bytes, 0, len); }
public bool Get(int index) { _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2) * index); SimpleTextUtil.ReadLine(_input, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH)); int len; try { len = int.Parse(_scratch.Bytes.SubList(_scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length, _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString()); } catch (FormatException ex) { throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex); } // skip past bytes var bytes = new byte[len]; _input.ReadBytes(bytes, 0, len); SimpleTextUtil.ReadLine(_input, _scratch); // newline SimpleTextUtil.ReadLine(_input, _scratch); // 'T' or 'F' return(_scratch.Bytes[_scratch.Offset] == (sbyte)'T'); }
public override void Get(int docId, BytesRef result) { if (docId < 0 || docId >= _outerInstance.MAX_DOC) { throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) + "; got " + docId); } _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2) * docId); SimpleTextUtil.ReadLine(_input, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH)); int len; try { // LUCNENENET: .NET doesn't have a way to specify a pattern with integer, but all of the standard ones are built in. len = int.Parse(Encoding.UTF8.GetString(_scratch.Bytes, _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length, _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length), NumberStyles.Integer, CultureInfo.InvariantCulture); } catch (FormatException ex) { throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex); } result.Bytes = new byte[len]; result.Offset = 0; result.Length = len; _input.ReadBytes(result.Bytes, 0, len); }
public override void Get(int docId, BytesRef result) { if (docId < 0 || docId >= _outerInstance.MAX_DOC) { throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) + "; got " + docId); } _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2) * docId); SimpleTextUtil.ReadLine(_input, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH)); int len; try { len = int.Parse(_scratch.Bytes.SubList( _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length, _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString()); } catch (FormatException ex) { throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex); } result.Bytes = new byte[len]; result.Offset = 0; result.Length = len; _input.ReadBytes(result.Bytes, 0, len); }
public override void LookupOrd(long ord, BytesRef result) { if (ord < 0 || ord >= field.NumValues) { throw new IndexOutOfRangeException("ord must be 0 .. " + (field.NumValues - 1) + "; got " + ord); } @in.Seek(field.DataStartFilePointer + ord * (9 + field.Pattern.Length + field.MaxLength)); SimpleTextUtil.ReadLine(@in, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextDocValuesWriter.LENGTH), "got " + scratch.Utf8ToString() + " in=" + @in); int len; try { len = (int) decoder.parse(scratch.Bytes.SubList( scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length, scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString()); } catch (ParseException pe) { CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + @in + ")"); e.initCause(pe); throw e; } result.Bytes = new sbyte[len]; result.Offset = 0; result.Length = len; @in.ReadBytes(result.Bytes, 0, len); }
public override int NextPosition() { int pos; if (_readPositions) { SimpleTextUtil.ReadLine(_in, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), "got line=" + _scratch.Utf8ToString()); UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length, _scratchUtf162); pos = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length); } else { pos = -1; } if (_readOffsets) { SimpleTextUtil.ReadLine(_in, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), "got line=" + _scratch.Utf8ToString()); UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length, _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162); _startOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length); SimpleTextUtil.ReadLine(_in, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), "got line=" + _scratch.Utf8ToString()); UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length, _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162); _endOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length); } long fp = _in.FilePointer; SimpleTextUtil.ReadLine(_in, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length; if (_scratch2.Bytes.Length < len) { _scratch2.Grow(len); } Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len); _scratch2.Length = len; _payload = _scratch2; } else { _payload = null; _in.Seek(fp); } return(pos); }
public override Bits ReadLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) { Debug.Assert(info.HasDeletions()); var scratch = new BytesRef(); var scratchUtf16 = new CharsRef(); var fileName = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.DelGen); ChecksumIndexInput input = null; var success = false; try { input = dir.OpenChecksumInput(fileName, context); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SIZE)); var size = ParseIntAt(scratch, SIZE.Length, scratchUtf16); var bits = new BitArray(size); SimpleTextUtil.ReadLine(input, scratch); while (!scratch.Equals(END)) { Debug.Assert(StringHelper.StartsWith(scratch, DOC)); var docid = ParseIntAt(scratch, DOC.Length, scratchUtf16); bits.Set(docid, true); SimpleTextUtil.ReadLine(input, scratch); } SimpleTextUtil.CheckFooter(input); success = true; return(new SimpleTextBits(bits, size)); } finally { if (success) { IOUtils.Close(input); } else { IOUtils.CloseWhileHandlingException(input); } } }
private void ReadField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE)); if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_STRING)) { visitor.StringField(fieldInfo, _scratch.Bytes.SubList(_scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length).ToString()); // new string(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, // Encoding.UTF8)); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_BINARY)) { var copy = new byte[_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length]; Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, copy, 0, copy.Length); visitor.BinaryField(fieldInfo, copy); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_INT)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.IntField(fieldInfo, Convert.ToInt32(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_LONG)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.LongField(fieldInfo, Convert.ToInt64(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_FLOAT)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.FloatField(fieldInfo, Convert.ToSingle(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_DOUBLE)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.DoubleField(fieldInfo, Convert.ToDouble(_scratchUtf16.ToString())); } }
// we don't actually write a .tvx-like index, instead we read the // vectors file in entirety up-front and save the offsets // so we can seek to the data later. private void ReadIndex(int maxDoc) { ChecksumIndexInput input = new BufferedChecksumIndexInput(_input); _offsets = new long[maxDoc]; int upto = 0; while (!_scratch.Equals(END)) { SimpleTextUtil.ReadLine(input, _scratch); if (StringHelper.StartsWith(_scratch, DOC)) { _offsets[upto] = input.FilePointer; upto++; } } SimpleTextUtil.CheckFooter(input); Debug.Assert(upto == _offsets.Length); }
/// <remarks> /// we don't actually write a .fdx-like index, instead we read the /// stored fields file in entirety up-front and save the offsets /// so we can seek to the documents later. /// </remarks> private void ReadIndex(int size) { ChecksumIndexInput input = new BufferedChecksumIndexInput(_input); _offsets = new long[size]; var upto = 0; while (!_scratch.Equals(SimpleTextStoredFieldsWriter.END)) { SimpleTextUtil.ReadLine(input, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.DOC)) { _offsets[upto] = input.FilePointer; upto++; } } SimpleTextUtil.CheckFooter(input); Debug.Assert(upto == _offsets.Length); }
private SortedDictionary <string, long?> ReadFields(IndexInput @in) { ChecksumIndexInput input = new BufferedChecksumIndexInput(@in); var scratch = new BytesRef(10); var fields = new SortedDictionary <string, long?>(); while (true) { SimpleTextUtil.ReadLine(input, scratch); if (scratch.Equals(SimpleTextFieldsWriter.END)) { SimpleTextUtil.CheckFooter(input); return(fields); } if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD)) { var fieldName = Encoding.UTF8.GetString(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.FIELD.Length, scratch.Length - SimpleTextFieldsWriter.FIELD.Length); fields[fieldName] = input.FilePointer; } } }
/// <summary> Used only in ctor: </summary> private bool StartsWith(BytesRef prefix) { return(StringHelper.StartsWith(SCRATCH, prefix)); }
public override int NextDoc() { bool first = true; _in.Seek(_nextDocStart); long posStart = 0; while (true) { long lineStart = _in.FilePointer; SimpleTextUtil.ReadLine(_in, _scratch); //System.out.println("NEXT DOC: " + scratch.utf8ToString()); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _nextDocStart = lineStart; _in.Seek(posStart); return(_docId); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); _docId = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); _tf = 0; first = false; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); _tf = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); posStart = _in.FilePointer; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { // skip } else { Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END)); if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _nextDocStart = lineStart; _in.Seek(posStart); return(_docId); } return(_docId = NO_MORE_DOCS); } } }
public override SegmentInfo Read(Directory directory, string segmentName, IOContext context) { BytesRef scratch = new BytesRef(); string segFileName = IndexFileNames.SegmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); ChecksumIndexInput input = directory.OpenChecksumInput(segFileName, context); bool success = false; try { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SI_VERSION)); string version = ReadString(SI_VERSION.length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SI_DOCCOUNT)); int docCount = Convert.ToInt32(ReadString(SI_DOCCOUNT.length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SI_USECOMPOUND)); bool isCompoundFile = Convert.ToBoolean(ReadString(SI_USECOMPOUND.length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SI_NUM_DIAG)); int numDiag = Convert.ToInt32(ReadString(SI_NUM_DIAG.length, scratch)); IDictionary <string, string> diagnostics = new Dictionary <string, string>(); for (int i = 0; i < numDiag; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SI_DIAG_KEY)); string key = ReadString(SI_DIAG_KEY.length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SI_DIAG_VALUE)); string value = ReadString(SI_DIAG_VALUE.length, scratch); diagnostics[key] = value; } SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SI_NUM_FILES)); int numFiles = Convert.ToInt32(ReadString(SI_NUM_FILES.length, scratch)); HashSet <string> files = new HashSet <string>(); for (int i = 0; i < numFiles; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SI_FILE)); string fileName = ReadString(SI_FILE.length, scratch); files.Add(fileName); } SimpleTextUtil.CheckFooter(input); SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount, isCompoundFile, null, diagnostics); info.Files = files; success = true; return(info); } finally { if (!success) { IOUtils.CloseWhileHandlingException(input); } else { input.Close(); } } }
public override void VisitDocument(int n, StoredFieldVisitor visitor) { _input.Seek(_offsets[n]); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.NUM)); var numFields = ParseIntAt(SimpleTextStoredFieldsWriter.NUM.Length); for (var i = 0; i < numFields; i++) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.FIELD)); int fieldNumber = ParseIntAt(SimpleTextStoredFieldsWriter.FIELD.Length); FieldInfo fieldInfo = _fieldInfos.FieldInfo(fieldNumber); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.NAME)); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.TYPE)); BytesRef type; if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_STRING, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_STRING; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_BINARY, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_BINARY; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_INT, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_INT; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_LONG, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_LONG; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_FLOAT, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_FLOAT; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_DOUBLE, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_DOUBLE; } else { throw new Exception("unknown field type"); } switch (visitor.NeedsField(fieldInfo)) { case StoredFieldVisitor.Status.YES: ReadField(type, fieldInfo, visitor); break; case StoredFieldVisitor.Status.NO: ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE)); break; case StoredFieldVisitor.Status.STOP: return; } } }
private void LoadTerms() { var posIntOutputs = PositiveIntOutputs.Singleton; var outputsInner = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs); var outputs = new PairOutputs <long?, PairOutputs <long?, long?> .Pair>(posIntOutputs, outputsInner); // honestly, wtf kind of generic mess is this. var b = new Builder <PairOutputs <long?, PairOutputs <long?, long?> .Pair> .Pair>(FST.INPUT_TYPE.BYTE1, outputs); var input = (IndexInput)_outerInstance._input.Clone(); input.Seek(_termsStart); var lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; var visitedDocs = new FixedBitSet(_maxDoc); var scratchIntsRef = new IntsRef(); while (true) { SimpleTextUtil.ReadLine(input, _scratch); if (_scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq))); _sumTotalTermFreq += totalTermFreq; } break; } if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { docFreq++; _sumDocFreq++; UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); int docId = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); visitedDocs.Set(docId); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); totalTermFreq += ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq))); } lastDocsStart = input.FilePointer; int len = _scratch.Length - SimpleTextFieldsWriter.TERM.Length; if (len > lastTerm.Length) { lastTerm.Grow(len); } Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len); lastTerm.Length = len; docFreq = 0; _sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; _termCount++; } } _docCount = visitedDocs.Cardinality(); _fst = b.Finish(); }
public override Fields Get(int doc) { SortedMap <string, SimpleTVTerms> fields = new SortedDictionary <string, SimpleTVTerms>(); _input.Seek(_offsets[doc]); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, NUMFIELDS)); int numFields = ParseIntAt(NUMFIELDS.length); if (numFields == 0) { return(null); // no vectors for this doc } for (int i = 0; i < numFields; i++) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, FIELD)); // skip fieldNumber: ParseIntAt(FIELD.length); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, FIELDNAME)); string fieldName = ReadString(FIELDNAME.length, _scratch); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, FIELDPOSITIONS)); bool positions = Convert.ToBoolean(ReadString(FIELDPOSITIONS.length, _scratch)); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, FIELDOFFSETS)); bool offsets = Convert.ToBoolean(ReadString(FIELDOFFSETS.length, _scratch)); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, FIELDPAYLOADS)); bool payloads = Convert.ToBoolean(ReadString(FIELDPAYLOADS.length, _scratch)); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, FIELDTERMCOUNT)); int termCount = ParseIntAt(FIELDTERMCOUNT.length); SimpleTVTerms terms = new SimpleTVTerms(offsets, positions, payloads); fields.put(fieldName, terms); for (int j = 0; j < termCount; j++) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, TERMTEXT)); BytesRef term = new BytesRef(); int termLength = _scratch.length - TERMTEXT.length; term.grow(termLength); term.length = termLength; Array.Copy(_scratch.bytes, _scratch.offset + TERMTEXT.length, term.bytes, term.offset, termLength); SimpleTVPostings postings = new SimpleTVPostings(); terms.TERMS.put(term, postings); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, TERMFREQ)); postings.FREQ = ParseIntAt(TERMFREQ.length); if (positions || offsets) { if (positions) { postings.POSITIONS = new int[postings.FREQ]; if (payloads) { postings.PAYLOADS = new BytesRef[postings.FREQ]; } } if (offsets) { postings.START_OFFSETS = new int[postings.FREQ]; postings.END_OFFSETS = new int[postings.FREQ]; } for (int k = 0; k < postings.FREQ; k++) { if (positions) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, POSITION)); postings.POSITIONS[k] = ParseIntAt(POSITION.length); if (payloads) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, PAYLOAD)); if (_scratch.length - PAYLOAD.length == 0) { postings.PAYLOADS[k] = null; } else { sbyte[] payloadBytes = new sbyte[_scratch.length - PAYLOAD.length]; Array.Copy(_scratch.bytes, _scratch.offset + PAYLOAD.length, payloadBytes, 0, payloadBytes.Length); postings.PAYLOADS[k] = new BytesRef(payloadBytes); } } } if (offsets) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, STARTOFFSET)); postings.START_OFFSETS[k] = ParseIntAt(STARTOFFSET.length); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, ENDOFFSET)); postings.END_OFFSETS[k] = ParseIntAt(ENDOFFSET.length); } } } } } return(new SimpleTVFields(this, fields)); }
public override int NextDoc() { if (_docId == NO_MORE_DOCS) { return(_docId); } bool first = true; int termFreq = 0; while (true) { long lineStart = _in.FilePointer; SimpleTextUtil.ReadLine(_in, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _in.Seek(lineStart); if (!_omitTf) { _tf = termFreq; } return(_docId); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); _docId = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); termFreq = 0; first = false; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); termFreq = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS)) { // skip termFreq++; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { // skip } else { Debug.Assert( StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END), "scratch=" + _scratch.Utf8ToString()); if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _in.Seek(lineStart); if (!_omitTf) { _tf = termFreq; } return(_docId); } return(_docId = NO_MORE_DOCS); } } }