public override int NextPosition() { int pos; if (_readPositions) { SimpleTextUtil.ReadLine(_in, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), "got line=" + _scratch.Utf8ToString()); UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length, _scratchUtf162); pos = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length); } else { pos = -1; } if (_readOffsets) { SimpleTextUtil.ReadLine(_in, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), "got line=" + _scratch.Utf8ToString()); UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length, _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162); _startOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length); SimpleTextUtil.ReadLine(_in, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), "got line=" + _scratch.Utf8ToString()); UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length, _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162); _endOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length); } long fp = _in.FilePointer; SimpleTextUtil.ReadLine(_in, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length; if (_scratch2.Bytes.Length < len) { _scratch2.Grow(len); } Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len); _scratch2.Length = len; _payload = _scratch2; } else { _payload = null; _in.Seek(fp); } return(pos); }
private void ReadField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE)); if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_STRING)) { visitor.StringField(fieldInfo, _scratch.Bytes.SubList(_scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length).ToString()); // new string(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, // Encoding.UTF8)); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_BINARY)) { var copy = new byte[_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length]; Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, copy, 0, copy.Length); visitor.BinaryField(fieldInfo, copy); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_INT)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.IntField(fieldInfo, Convert.ToInt32(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_LONG)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.LongField(fieldInfo, Convert.ToInt64(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_FLOAT)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.FloatField(fieldInfo, Convert.ToSingle(_scratchUtf16.ToString())); } else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_DOUBLE)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length, _scratchUtf16); visitor.DoubleField(fieldInfo, Convert.ToDouble(_scratchUtf16.ToString())); } }
public static void Write(DataOutput output, string s, BytesRef scratch) { UnicodeUtil.UTF16toUTF8(s.ToCharArray(), 0, s.Length, scratch); Write(output, scratch); }
private string ReadString(int offset, BytesRef scratch) { UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + offset, scratch.Length - offset, _scratchUtf16); return(_scratchUtf16.ToString()); }
private int ParseIntAt(int offset) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + offset, _scratch.Length - offset, _scratchUtf16); return(ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.length)); }
private static int ParseIntAt(BytesRef bytes, int offset, CharsRef scratch) { UnicodeUtil.UTF8toUTF16(bytes.Bytes, bytes.Offset + offset, bytes.Length - offset, scratch); return(ArrayUtil.ParseInt(scratch.Chars, 0, scratch.Length)); }
private void LoadTerms() { var posIntOutputs = PositiveIntOutputs.Singleton; var outputsInner = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs); var outputs = new PairOutputs <long?, PairOutputs <long?, long?> .Pair>(posIntOutputs, outputsInner); // honestly, wtf kind of generic mess is this. var b = new Builder <PairOutputs <long?, PairOutputs <long?, long?> .Pair> .Pair>(FST.INPUT_TYPE.BYTE1, outputs); var input = (IndexInput)_outerInstance._input.Clone(); input.Seek(_termsStart); var lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; var visitedDocs = new FixedBitSet(_maxDoc); var scratchIntsRef = new IntsRef(); while (true) { SimpleTextUtil.ReadLine(input, _scratch); if (_scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq))); _sumTotalTermFreq += totalTermFreq; } break; } if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { docFreq++; _sumDocFreq++; UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); int docId = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); visitedDocs.Set(docId); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); totalTermFreq += ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq))); } lastDocsStart = input.FilePointer; int len = _scratch.Length - SimpleTextFieldsWriter.TERM.Length; if (len > lastTerm.Length) { lastTerm.Grow(len); } Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len); lastTerm.Length = len; docFreq = 0; _sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; _termCount++; } } _docCount = visitedDocs.Cardinality(); _fst = b.Finish(); }
public override int NextDoc() { bool first = true; _in.Seek(_nextDocStart); long posStart = 0; while (true) { long lineStart = _in.FilePointer; SimpleTextUtil.ReadLine(_in, _scratch); //System.out.println("NEXT DOC: " + scratch.utf8ToString()); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _nextDocStart = lineStart; _in.Seek(posStart); return(_docId); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); _docId = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); _tf = 0; first = false; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); _tf = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); posStart = _in.FilePointer; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { // skip } else { Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END)); if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _nextDocStart = lineStart; _in.Seek(posStart); return(_docId); } return(_docId = NO_MORE_DOCS); } } }
public override int NextDoc() { if (_docId == NO_MORE_DOCS) { return(_docId); } bool first = true; int termFreq = 0; while (true) { long lineStart = _in.FilePointer; SimpleTextUtil.ReadLine(_in, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _in.Seek(lineStart); if (!_omitTf) { _tf = termFreq; } return(_docId); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); _docId = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); termFreq = 0; first = false; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); termFreq = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS)) { // skip termFreq++; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { // skip } else { Debug.Assert( StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END), "scratch=" + _scratch.Utf8ToString()); if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _in.Seek(lineStart); if (!_omitTf) { _tf = termFreq; } return(_docId); } return(_docId = NO_MORE_DOCS); } } }