public override void AddSortedSetField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long> docToOrdCount, IEnumerable <long> ords) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.SORTED_SET); WriteFieldEntry(field, FieldInfo.DocValuesType_e.SORTED_SET); long valueCount = 0; int maxLength = 0; foreach (BytesRef value in values) { maxLength = Math.Max(maxLength, value.Length); valueCount++; } // write numValues SimpleTextUtil.Write(data, NUMVALUES); SimpleTextUtil.Write(data, Convert.ToString(valueCount), scratch); SimpleTextUtil.WriteNewline(data); // write maxLength SimpleTextUtil.Write(data, MAXLENGTH); SimpleTextUtil.Write(data, Convert.ToString(maxLength), scratch); SimpleTextUtil.WriteNewline(data); int maxBytesLength = Convert.ToString(maxLength).Length; StringBuilder sb = new StringBuilder(); for (int i = 0; i < maxBytesLength; i++) { sb.Append('0'); } // write our pattern for encoding lengths SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); DecimalFormat encoder = new DecimalFormat(sb.ToString(), new DecimalFormatSymbols(Locale.ROOT)); // compute ord pattern: this is funny, we encode all values for all docs to find the maximum length var maxOrdListLength = 0; var sb2 = new StringBuilder(); var ordStream = ords.GetEnumerator(); foreach (var n in docToOrdCount) { sb2.Length = 0; int count = (int)n; for (int i = 0; i < count; i++) { long ord = (long)ordStream.next(); if (sb2.Length > 0) { sb2.Append(","); } sb2.Append(Convert.ToString(ord)); } maxOrdListLength = Math.Max(maxOrdListLength, sb2.Length); } sb2.Length = 0; for (int i = 0; i < maxOrdListLength; i++) { sb2.Append('X'); } // write our pattern for ord lists SimpleTextUtil.Write(data, ORDPATTERN); SimpleTextUtil.Write(data, sb2.ToString(), scratch); SimpleTextUtil.WriteNewline(data); // for asserts: long valuesSeen = 0; foreach (BytesRef value in values) { // write length SimpleTextUtil.Write(data, LENGTH); SimpleTextUtil.Write(data, encoder.format(value.Length), scratch); SimpleTextUtil.WriteNewline(data); // write bytes -- don't use SimpleText.Write // because it escapes: data.WriteBytes(value.Bytes, value.Offset, value.Length); // pad to fit for (int i = value.Length; i < maxLength; i++) { data.WriteByte((sbyte)' '); } SimpleTextUtil.WriteNewline(data); valuesSeen++; Debug.Assert(valuesSeen <= valueCount); } Debug.Assert(valuesSeen == valueCount); ordStream = ords.GetEnumerator(); // write the ords for each doc comma-separated foreach (var n in docToOrdCount) { sb2.Length = 0; var count = (int)n; for (var i = 0; i < count; i++) { var ord = (long)ordStream.Next(); if (sb2.Length > 0) { sb2.Append(","); } sb2.Append(Convert.ToString(ord)); } // now pad to fit: these are numbers so spaces work well. reader calls trim() var numPadding = maxOrdListLength - sb2.Length; for (var i = 0; i < numPadding; i++) { sb2.Append(' '); } SimpleTextUtil.Write(data, sb2.ToString(), scratch); SimpleTextUtil.WriteNewline(data); } }
private void ReadLine() { SimpleTextUtil.ReadLine(_input, _scratch); }
public override void AddBinaryField(FieldInfo field, IEnumerable <BytesRef> values) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == DocValuesType.BINARY); var maxLength = 0; foreach (var value in values) { var length = value == null ? 0 : value.Length; maxLength = Math.Max(maxLength, length); } WriteFieldEntry(field, DocValuesType.BINARY); // write maxLength SimpleTextUtil.Write(data, MAXLENGTH); SimpleTextUtil.Write(data, maxLength.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); var maxBytesLength = maxLength.ToString(CultureInfo.InvariantCulture).Length; var sb = new StringBuilder(); for (var i = 0; i < maxBytesLength; i++) { sb.Append('0'); } // write our pattern for encoding lengths var patternString = sb.ToString(); SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, patternString, scratch); SimpleTextUtil.WriteNewline(data); int numDocsWritten = 0; foreach (BytesRef value in values) { int length = value == null ? 0 : value.Length; SimpleTextUtil.Write(data, LENGTH); SimpleTextUtil.Write(data, length.ToString(patternString, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); // write bytes -- don't use SimpleText.Write // because it escapes: if (value != null) { data.WriteBytes(value.Bytes, value.Offset, value.Length); } // pad to fit for (int i = length; i < maxLength; i++) { data.WriteByte((byte)(sbyte)' '); } SimpleTextUtil.WriteNewline(data); SimpleTextUtil.Write(data, value == null ? "F" : "T", scratch); SimpleTextUtil.WriteNewline(data); numDocsWritten++; } Debug.Assert(numDocs == numDocsWritten); }
private void LoadTerms() { var posIntOutputs = PositiveInt32Outputs.Singleton; var outputsInner = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs); var outputs = new PairOutputs <long?, PairOutputs <long?, long?> .Pair>(posIntOutputs, outputsInner); // honestly, wtf kind of generic mess is this. var b = new Builder <PairOutputs <long?, PairOutputs <long?, long?> .Pair> .Pair>(FST.INPUT_TYPE.BYTE1, outputs); var input = (IndexInput)_outerInstance._input.Clone(); input.Seek(_termsStart); var lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; var visitedDocs = new FixedBitSet(_maxDoc); var scratchIntsRef = new Int32sRef(); while (true) { SimpleTextUtil.ReadLine(input, _scratch); if (_scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD)) { if (lastDocsStart != -1) { b.Add(Util.ToInt32sRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq))); _sumTotalTermFreq += totalTermFreq; } break; } if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { docFreq++; _sumDocFreq++; UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); int docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); visitedDocs.Set(docId); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); totalTermFreq += ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM)) { if (lastDocsStart != -1) { b.Add(Util.ToInt32sRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq))); } lastDocsStart = input.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream int len = _scratch.Length - SimpleTextFieldsWriter.TERM.Length; if (len > lastTerm.Length) { lastTerm.Grow(len); } Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len); lastTerm.Length = len; docFreq = 0; _sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; _termCount++; } } _docCount = visitedDocs.Cardinality; _fst = b.Finish(); }
public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) { var segFileName = IndexFileNames.SegmentFileName(si.Name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); si.AddFile(segFileName); var success = false; var output = dir.CreateOutput(segFileName, ioContext); try { var scratch = new BytesRef(); SimpleTextUtil.Write(output, SI_VERSION); SimpleTextUtil.Write(output, si.Version, scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, SI_DOCCOUNT); SimpleTextUtil.Write(output, Convert.ToString(si.DocCount, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, SI_USECOMPOUND); SimpleTextUtil.Write(output, Convert.ToString(si.UseCompoundFile, CultureInfo.InvariantCulture).ToLowerInvariant(), scratch); SimpleTextUtil.WriteNewline(output); IDictionary <string, string> diagnostics = si.Diagnostics; int numDiagnostics = diagnostics == null ? 0 : diagnostics.Count; SimpleTextUtil.Write(output, SI_NUM_DIAG); SimpleTextUtil.Write(output, Convert.ToString(numDiagnostics, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); if (numDiagnostics > 0) { foreach (var diagEntry in diagnostics) { SimpleTextUtil.Write(output, SI_DIAG_KEY); SimpleTextUtil.Write(output, diagEntry.Key, scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, SI_DIAG_VALUE); SimpleTextUtil.Write(output, diagEntry.Value, scratch); SimpleTextUtil.WriteNewline(output); } } var files = si.GetFiles(); var numFiles = files == null ? 0 : files.Count; SimpleTextUtil.Write(output, SI_NUM_FILES); SimpleTextUtil.Write(output, Convert.ToString(numFiles, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); if (numFiles > 0) { foreach (var fileName in files) { SimpleTextUtil.Write(output, SI_FILE); SimpleTextUtil.Write(output, fileName, scratch); SimpleTextUtil.WriteNewline(output); } } SimpleTextUtil.WriteChecksum(output, scratch); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(output); try { dir.DeleteFile(segFileName); } catch (Exception) { //Esnure we throw original exeception } } else { output.Dispose(); } } }
internal virtual void LoadTerms() { PositiveIntOutputs posIntOutputs = PositiveIntOutputs.Singleton; Builder <PairOutputs.Pair <long?, PairOutputs.Pair <long?, long?> > > b; PairOutputs <long?, long?> outputsInner = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs); PairOutputs <long?, PairOutputs.Pair <long?, long?> > outputs = new PairOutputs <long?, PairOutputs.Pair <long?, long?> >(posIntOutputs, outputsInner); b = new Builder <>(FST.INPUT_TYPE.BYTE1, outputs); IndexInput @in = (IndexInput)outerInstance._input.Clone(); @in.Seek(termsStart); BytesRef lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; FixedBitSet visitedDocs = new FixedBitSet(maxDoc); IntsRef scratchIntsRef = new IntsRef(); while (true) { SimpleTextUtil.ReadLine(@in, scratch); if (scratch.Equals(END) || StringHelper.StartsWith(scratch, FIELD)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq))); sumTotalTermFreq += totalTermFreq; } break; } else if (StringHelper.StartsWith(scratch, DOC)) { docFreq++; sumDocFreq++; UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + DOC.length, scratch.Length - DOC.length, scratchUTF16); int docID = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length); visitedDocs.Set(docID); } else if (StringHelper.StartsWith(scratch, FREQ)) { UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + FREQ.length, scratch.Length - FREQ.length, scratchUTF16); totalTermFreq += ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length); } else if (StringHelper.StartsWith(scratch, TERM)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq))); } lastDocsStart = @in.FilePointer; int len = scratch.Length - TERM.length; if (len > lastTerm.Length) { lastTerm.Grow(len); } Array.Copy(scratch.Bytes, TERM.length, lastTerm.Bytes, 0, len); lastTerm.Length = len; docFreq = 0; sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; termCount++; } } docCount = visitedDocs.Cardinality(); fst = b.Finish(); }
public override int NextDoc() { bool first = true; _in.Seek(_nextDocStart); long posStart = 0; while (true) { long lineStart = _in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream SimpleTextUtil.ReadLine(_in, _scratch); //System.out.println("NEXT DOC: " + scratch.utf8ToString()); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _nextDocStart = lineStart; _in.Seek(posStart); return(_docId); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); _docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); _tf = 0; first = false; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); _tf = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); posStart = _in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { // skip } else { if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END)); } if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _nextDocStart = lineStart; _in.Seek(posStart); return(_docId); } return(_docId = NO_MORE_DOCS); } } }
public override int NextDoc() { if (_docId == NO_MORE_DOCS) { return(_docId); } bool first = true; int termFreq = 0; while (true) { long lineStart = _in.GetFilePointer(); SimpleTextUtil.ReadLine(_in, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _in.Seek(lineStart); if (!_omitTf) { _tf = termFreq; } return(_docId); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); _docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); termFreq = 0; first = false; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); termFreq = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS)) { // skip termFreq++; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { // skip } else { Debug.Assert( StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) || // LUCENENET TODO: This assert fails sometimes, which in turns causes _scratch.Utf8ToString() to throw an index out of range exception StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END) /*, "scratch=" + _scratch.Utf8ToString()*/); if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _in.Seek(lineStart); if (!_omitTf) { _tf = termFreq; } return(_docId); } return(_docId = NO_MORE_DOCS); } } }
public override int NextPosition() { int pos; if (_readPositions) { SimpleTextUtil.ReadLine(_in, _scratch); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), () => "got line=" + _scratch.Utf8ToString()); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length, _scratchUtf162); pos = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length); } else { pos = -1; } if (_readOffsets) { SimpleTextUtil.ReadLine(_in, _scratch); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), () => "got line=" + _scratch.Utf8ToString()); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length, _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162); _startOffset = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length); SimpleTextUtil.ReadLine(_in, _scratch); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), () => "got line=" + _scratch.Utf8ToString()); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length, _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162); _endOffset = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length); } long fp = _in.GetFilePointer(); SimpleTextUtil.ReadLine(_in, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length; if (_scratch2.Bytes.Length < len) { _scratch2.Grow(len); } Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len); _scratch2.Length = len; _payload = _scratch2; } else { _payload = null; _in.Seek(fp); } return(pos); }
public override void AddNumericField(FieldInfo field, IEnumerable <long?> values) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.NUMERIC || field.NormType == FieldInfo.DocValuesType_e.NUMERIC); WriteFieldEntry(field, FieldInfo.DocValuesType_e.NUMERIC); // first pass to find min/max var minValue = long.MaxValue; var maxValue = long.MinValue; foreach (var n in values) { var v = n.GetValueOrDefault(); minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } // write our minimum value to the .dat, all entries are deltas from that SimpleTextUtil.Write(data, MINVALUE); SimpleTextUtil.Write(data, minValue.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); // build up our fixed-width "simple text packed ints" format BigInteger maxBig = maxValue; BigInteger minBig = minValue; var diffBig = BigInteger.Subtract(maxBig, minBig); var maxBytesPerValue = diffBig.ToString(CultureInfo.InvariantCulture).Length; var sb = new StringBuilder(); for (var i = 0; i < maxBytesPerValue; i++) { sb.Append('0'); } // write our pattern to the .dat SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); var patternString = sb.ToString(); int numDocsWritten = 0; // second pass to write the values foreach (var n in values) { long value = n == null ? 0 : n.Value; Debug.Assert(value >= minValue); var delta = BigInteger.Subtract(value, minValue); string s = delta.ToString(patternString, CultureInfo.InvariantCulture); Debug.Assert(s.Length == patternString.Length); SimpleTextUtil.Write(data, s, scratch); SimpleTextUtil.WriteNewline(data); SimpleTextUtil.Write(data, n == null ? "F" : "T", scratch); SimpleTextUtil.WriteNewline(data); numDocsWritten++; Debug.Assert(numDocsWritten <= numDocs); } Debug.Assert(numDocs == numDocsWritten, "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten); }
private void Write(BytesRef b) { SimpleTextUtil.Write(output, b); }
private void LoadTerms() { PositiveInt32Outputs posIntOutputs = PositiveInt32Outputs.Singleton; var outputsInner = new PairOutputs <Int64, Int64>(posIntOutputs, posIntOutputs); var outputs = new PairOutputs <Int64, PairOutputs <Int64, Int64> .Pair>(posIntOutputs, outputsInner); var b = new Builder <PairOutputs <Int64, PairOutputs <Int64, Int64> .Pair> .Pair>(FST.INPUT_TYPE.BYTE1, outputs); IndexInput @in = (IndexInput)outerInstance.input.Clone(); @in.Seek(termsStart); BytesRef lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; FixedBitSet visitedDocs = new FixedBitSet(maxDoc); Int32sRef scratchIntsRef = new Int32sRef(); while (true) { SimpleTextUtil.ReadLine(@in, scratch); if (scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD)) { if (lastDocsStart != -1) { b.Add(Util.ToInt32sRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq))); sumTotalTermFreq += totalTermFreq; } break; } else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.DOC)) { docFreq++; sumDocFreq++; UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.DOC.Length, scratch.Length - SimpleTextFieldsWriter.DOC.Length, scratchUTF16); int docID = ArrayUtil.ParseInt32(scratchUTF16.Chars, 0, scratchUTF16.Length); visitedDocs.Set(docID); } else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, scratch.Length - SimpleTextFieldsWriter.FREQ.Length, scratchUTF16); totalTermFreq += ArrayUtil.ParseInt32(scratchUTF16.Chars, 0, scratchUTF16.Length); } else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.TERM)) { if (lastDocsStart != -1) { b.Add(Util.ToInt32sRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq))); } lastDocsStart = @in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream int len = scratch.Length - SimpleTextFieldsWriter.TERM.Length; if (len > lastTerm.Length) { lastTerm.Grow(len); } System.Array.Copy(scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len); lastTerm.Length = len; docFreq = 0; sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; termCount++; } } docCount = visitedDocs.Cardinality; fst = b.Finish(); /* * PrintStream ps = new PrintStream("out.dot"); * fst.toDot(ps); * ps.close(); * System.out.println("SAVED out.dot"); */ //System.out.println("FST " + fst.sizeInBytes()); }
public override int NextDoc() { if (docID == NO_MORE_DOCS) { return(docID); } bool first = true; int termFreq = 0; while (true) { long lineStart = input.Position; SimpleTextUtil.ReadLine(input, scratch); if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.DOC)) { if (!first && (liveDocs is null || liveDocs.Get(docID))) { input.Seek(lineStart); if (!omitTF) { tf = termFreq; } return(docID); } UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.DOC.Length, scratch.Length - SimpleTextFieldsWriter.DOC.Length, scratchUTF16); docID = ArrayUtil.ParseInt32(scratchUTF16.Chars, 0, scratchUTF16.Length); termFreq = 0; first = false; } else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, scratch.Length - SimpleTextFieldsWriter.FREQ.Length, scratchUTF16); termFreq = ArrayUtil.ParseInt32(scratchUTF16.Chars, 0, scratchUTF16.Length); } else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.POS)) { // skip termFreq++; } else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.START_OFFSET)) { // skip } else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.END_OFFSET)) { // skip } else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.PAYLOAD)) { // skip } else { if (Debugging.AssertsEnabled) { Debugging.Assert( StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD) || StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.END), "scratch={0}", new BytesRefFormatter(scratch, BytesRefFormat.UTF8)); } if (!first && (liveDocs is null || liveDocs.Get(docID))) { input.Seek(lineStart); if (!omitTF) { tf = termFreq; } return(docID); } return(docID = NO_MORE_DOCS); } } }
public override void AddNumericField(FieldInfo field, IEnumerable <long> values) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.NUMERIC || field.NormType == FieldInfo.DocValuesType_e.NUMERIC); WriteFieldEntry(field, FieldInfo.DocValuesType_e.NUMERIC); // first pass to find min/max var minValue = long.MaxValue; var maxValue = long.MinValue; foreach (var n in values) { var v = n; minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } // write our minimum value to the .dat, all entries are deltas from that SimpleTextUtil.Write(data, MINVALUE); SimpleTextUtil.Write(data, Convert.ToString(minValue), scratch); SimpleTextUtil.WriteNewline(data); // build up our fixed-width "simple text packed ints" format System.Numerics.BigInteger maxBig = maxValue; System.Numerics.BigInteger minBig = minValue; var diffBig = maxBig - minBig; var maxBytesPerValue = diffBig.ToString().Length; var sb = new StringBuilder(); for (var i = 0; i < maxBytesPerValue; i++) { sb.Append('0'); } // write our pattern to the .dat SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); var patternString = sb.ToString(); DecimalFormat encoder = new DecimalFormat(patternString, new DecimalFormatSymbols(Locale.ROOT)); int numDocsWritten = 0; // second pass to write the values foreach (var value in values) { Debug.Assert(value >= minValue); var delta = value - minValue; string s = encoder.format(delta); Debug.Assert(s.Length == patternString.Length); SimpleTextUtil.Write(data, s, scratch); SimpleTextUtil.WriteNewline(data); SimpleTextUtil.Write(data, n == null ? "F" : "T", scratch); SimpleTextUtil.WriteNewline(data); numDocsWritten++; Debug.Assert(numDocsWritten <= numDocs); } Debug.Assert(numDocs == numDocsWritten, "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten); }
public override int NextDoc() { if (docID_Renamed == NO_MORE_DOCS) { return(docID_Renamed); } bool first = true; int termFreq = 0; while (true) { long lineStart = @in.FilePointer; SimpleTextUtil.ReadLine(@in, scratch); if (StringHelper.StartsWith(scratch, DOC)) { if (!first && (liveDocs == null || liveDocs.Get(docID_Renamed))) { @in.Seek(lineStart); if (!omitTF) { tf = termFreq; } return(docID_Renamed); } UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + DOC.length, scratch.Length - DOC.length, scratchUTF16); docID_Renamed = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length); termFreq = 0; first = false; } else if (StringHelper.StartsWith(scratch, FREQ)) { UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + FREQ.length, scratch.Length - FREQ.length, scratchUTF16); termFreq = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length); } else if (StringHelper.StartsWith(scratch, POS)) { // skip termFreq++; } else if (StringHelper.StartsWith(scratch, START_OFFSET)) { // skip } else if (StringHelper.StartsWith(scratch, END_OFFSET)) { // skip } else if (StringHelper.StartsWith(scratch, PAYLOAD)) { // skip } else { Debug.Assert( StringHelper.StartsWith(scratch, TERM) || StringHelper.StartsWith(scratch, FIELD) || StringHelper.StartsWith(scratch, END), "scratch=" + scratch.Utf8ToString()); if (!first && (liveDocs == null || liveDocs.Get(docID_Renamed))) { @in.Seek(lineStart); if (!omitTF) { tf = termFreq; } return(docID_Renamed); } return(docID_Renamed = NO_MORE_DOCS); } } }
/// <summary> /// Used only in ctor: </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void readLine() throws java.io.IOException private void readLine() { SimpleTextUtil.ReadLine(data, scratch); //System.out.println("line: " + scratch.utf8ToString()); }
public override int NextDoc() { bool first = true; @in.Seek(nextDocStart); long posStart = 0; while (true) { long lineStart = @in.FilePointer; SimpleTextUtil.ReadLine(@in, scratch); //System.out.println("NEXT DOC: " + scratch.utf8ToString()); if (StringHelper.StartsWith(scratch, DOC)) { if (!first && (liveDocs == null || liveDocs.Get(docID_Renamed))) { nextDocStart = lineStart; @in.Seek(posStart); return(docID_Renamed); } UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + DOC.length, scratch.Length - DOC.length, scratchUTF16); docID_Renamed = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length); tf = 0; first = false; } else if (StringHelper.StartsWith(scratch, FREQ)) { UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + FREQ.length, scratch.Length - FREQ.length, scratchUTF16); tf = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length); posStart = @in.FilePointer; } else if (StringHelper.StartsWith(scratch, POS)) { // skip } else if (StringHelper.StartsWith(scratch, START_OFFSET)) { // skip } else if (StringHelper.StartsWith(scratch, END_OFFSET)) { // skip } else if (StringHelper.StartsWith(scratch, PAYLOAD)) { // skip } else { Debug.Assert(StringHelper.StartsWith(scratch, TERM) || StringHelper.StartsWith(scratch, FIELD) || StringHelper.StartsWith(scratch, END)); if (!first && (liveDocs == null || liveDocs.Get(docID_Renamed))) { nextDocStart = lineStart; @in.Seek(posStart); return(docID_Renamed); } return(docID_Renamed = NO_MORE_DOCS); } } }
public override void AddSortedSetField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrdCount, IEnumerable <long?> ords) { if (Debugging.AssertsEnabled) { Debugging.Assert(FieldSeen(field.Name)); Debugging.Assert(field.DocValuesType == DocValuesType.SORTED_SET); } WriteFieldEntry(field, DocValuesType.SORTED_SET); long valueCount = 0; int maxLength = 0; foreach (var value in values) { maxLength = Math.Max(maxLength, value.Length); valueCount++; } // write numValues SimpleTextUtil.Write(data, NUMVALUES); SimpleTextUtil.Write(data, valueCount.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); // write maxLength SimpleTextUtil.Write(data, MAXLENGTH); SimpleTextUtil.Write(data, maxLength.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); int maxBytesLength = maxLength.ToString(CultureInfo.InvariantCulture).Length; var sb = new StringBuilder(); for (int i = 0; i < maxBytesLength; i++) { sb.Append('0'); } // write our pattern for encoding lengths SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); string encoderFormat = sb.ToString(); // compute ord pattern: this is funny, we encode all values for all docs to find the maximum length var maxOrdListLength = 0; var sb2 = new StringBuilder(); using (var ordStream = ords.GetEnumerator()) { foreach (var n in docToOrdCount) { sb2.Length = 0; var count = (int)n; for (int i = 0; i < count; i++) { ordStream.MoveNext(); var ord = ordStream.Current; if (sb2.Length > 0) { sb2.Append(","); } sb2.Append(ord.GetValueOrDefault().ToString(CultureInfo.InvariantCulture)); } maxOrdListLength = Math.Max(maxOrdListLength, sb2.Length); } } sb2.Length = 0; for (int i = 0; i < maxOrdListLength; i++) { sb2.Append('X'); } // write our pattern for ord lists SimpleTextUtil.Write(data, ORDPATTERN); SimpleTextUtil.Write(data, sb2.ToString(), scratch); SimpleTextUtil.WriteNewline(data); // for asserts: long valuesSeen = 0; foreach (var value in values) { // write length SimpleTextUtil.Write(data, LENGTH); SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); // write bytes -- don't use SimpleText.Write // because it escapes: data.WriteBytes(value.Bytes, value.Offset, value.Length); // pad to fit for (var i = value.Length; i < maxLength; i++) { data.WriteByte((byte)' '); } SimpleTextUtil.WriteNewline(data); valuesSeen++; if (Debugging.AssertsEnabled) { Debugging.Assert(valuesSeen <= valueCount); } } if (Debugging.AssertsEnabled) { Debugging.Assert(valuesSeen == valueCount); } using (var ordStream = ords.GetEnumerator()) { // write the ords for each doc comma-separated foreach (var n in docToOrdCount) { sb2.Length = 0; var count = (int)n; for (var i = 0; i < count; i++) { ordStream.MoveNext(); var ord = ordStream.Current; if (sb2.Length > 0) { sb2.Append(","); } sb2.Append(ord); } // now pad to fit: these are numbers so spaces work well. reader calls trim() var numPadding = maxOrdListLength - sb2.Length; for (var i = 0; i < numPadding; i++) { sb2.Append(' '); } SimpleTextUtil.Write(data, sb2.ToString(), scratch); SimpleTextUtil.WriteNewline(data); } } }
public override int NextDoc() { if (_docId == NO_MORE_DOCS) { return(_docId); } bool first = true; int termFreq = 0; while (true) { long lineStart = _in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream SimpleTextUtil.ReadLine(_in, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _in.Seek(lineStart); if (!_omitTf) { _tf = termFreq; } return(_docId); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); _docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); termFreq = 0; first = false; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); termFreq = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS)) { // skip termFreq++; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { // skip } else { if (Debugging.AssertsEnabled) { Debugging.Assert( StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END), "scratch={0}", _scratch.Utf8ToString()); } if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _in.Seek(lineStart); if (!_omitTf) { _tf = termFreq; } return(_docId); } return(_docId = NO_MORE_DOCS); } } }
public override void AddNumericField(FieldInfo field, IEnumerable <long?> values) { if (Debugging.AssertsEnabled) { Debugging.Assert(FieldSeen(field.Name)); Debugging.Assert(field.DocValuesType == DocValuesType.NUMERIC || field.NormType == DocValuesType.NUMERIC); } WriteFieldEntry(field, DocValuesType.NUMERIC); // first pass to find min/max var minValue = long.MaxValue; var maxValue = long.MinValue; foreach (var n in values) { var v = n.GetValueOrDefault(); minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } // write our minimum value to the .dat, all entries are deltas from that SimpleTextUtil.Write(data, MINVALUE); SimpleTextUtil.Write(data, minValue.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); // build up our fixed-width "simple text packed ints" format var diffBig = (decimal)maxValue - (decimal)minValue; // LUCENENET specific - use decimal rather than BigInteger var maxBytesPerValue = diffBig.ToString(CultureInfo.InvariantCulture).Length; var sb = new StringBuilder(); for (var i = 0; i < maxBytesPerValue; i++) { sb.Append('0'); } var patternString = sb.ToString(); // LUCENENET specific - only get the string once // write our pattern to the .dat SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, patternString, scratch); SimpleTextUtil.WriteNewline(data); int numDocsWritten = 0; // second pass to write the values foreach (var n in values) { long value = n.GetValueOrDefault(); if (Debugging.AssertsEnabled) { Debugging.Assert(value >= minValue); } var delta = (decimal)value - (decimal)minValue; // LUCENENET specific - use decimal rather than BigInteger string s = delta.ToString(patternString, CultureInfo.InvariantCulture); if (Debugging.AssertsEnabled) { Debugging.Assert(s.Length == patternString.Length); } SimpleTextUtil.Write(data, s, scratch); SimpleTextUtil.WriteNewline(data); SimpleTextUtil.Write(data, n == null ? "F" : "T", scratch); SimpleTextUtil.WriteNewline(data); numDocsWritten++; if (Debugging.AssertsEnabled) { Debugging.Assert(numDocsWritten <= numDocs); } } if (Debugging.AssertsEnabled) { Debugging.Assert(numDocs == numDocsWritten, "numDocs={0} numDocsWritten={1}", numDocs, numDocsWritten); } }
public override int NextPosition() { int pos; if (_readPositions) { SimpleTextUtil.ReadLine(_in, _scratch); // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), "got line={0}", new BytesRefFormatter(_scratch, BytesRefFormat.UTF8)); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length, _scratchUtf162); pos = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length); } else { pos = -1; } if (_readOffsets) { SimpleTextUtil.ReadLine(_in, _scratch); // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), "got line={0}", new BytesRefFormatter(_scratch, BytesRefFormat.UTF8)); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length, _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162); _startOffset = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length); SimpleTextUtil.ReadLine(_in, _scratch); // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), "got line={0}", new BytesRefFormatter(_scratch, BytesRefFormat.UTF8)); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length, _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162); _endOffset = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length); } long fp = _in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream SimpleTextUtil.ReadLine(_in, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length; if (_scratch2.Bytes.Length < len) { _scratch2.Grow(len); } Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len); _scratch2.Length = len; _payload = _scratch2; } else { _payload = null; _in.Seek(fp); } return(pos); }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, SimpleTextFieldInfosWriter.FIELD_INFOS_EXTENSION); var input = directory.OpenChecksumInput(fileName, iocontext); var scratch = new BytesRef(); var success = false; try { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUMFIELDS)); var size = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUMFIELDS.Length, scratch), CultureInfo.InvariantCulture); var infos = new FieldInfo[size]; for (var i = 0; i < size; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NAME)); string name = ReadString(SimpleTextFieldInfosWriter.NAME.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUMBER)); int fieldNumber = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUMBER.Length, scratch), CultureInfo.InvariantCulture); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ISINDEXED)); bool isIndexed = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.ISINDEXED.Length, scratch), CultureInfo.InvariantCulture); IndexOptions indexOptions; if (isIndexed) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.INDEXOPTIONS)); indexOptions = (IndexOptions)Enum.Parse(typeof(IndexOptions), ReadString(SimpleTextFieldInfosWriter.INDEXOPTIONS.Length, scratch)); } else { indexOptions = IndexOptions.NONE; } SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.STORETV)); bool storeTermVector = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.STORETV.Length, scratch), CultureInfo.InvariantCulture); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.PAYLOADS)); bool storePayloads = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.PAYLOADS.Length, scratch), CultureInfo.InvariantCulture); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NORMS)); bool omitNorms = !Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.NORMS.Length, scratch), CultureInfo.InvariantCulture); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NORMS_TYPE)); string nrmType = ReadString(SimpleTextFieldInfosWriter.NORMS_TYPE.Length, scratch); Index.DocValuesType normsType = DocValuesType(nrmType); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.DOCVALUES)); string dvType = ReadString(SimpleTextFieldInfosWriter.DOCVALUES.Length, scratch); Index.DocValuesType docValuesType = DocValuesType(dvType); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.DOCVALUES_GEN)); long dvGen = Convert.ToInt64(ReadString(SimpleTextFieldInfosWriter.DOCVALUES_GEN.Length, scratch), CultureInfo.InvariantCulture); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUM_ATTS)); int numAtts = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUM_ATTS.Length, scratch), CultureInfo.InvariantCulture); IDictionary <string, string> atts = new Dictionary <string, string>(); for (int j = 0; j < numAtts; j++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ATT_KEY)); string key = ReadString(SimpleTextFieldInfosWriter.ATT_KEY.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ATT_VALUE)); string value = ReadString(SimpleTextFieldInfosWriter.ATT_VALUE.Length, scratch); atts[key] = value; } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normsType, new ReadOnlyDictionary <string, string>(atts)) { DocValuesGen = dvGen }; } SimpleTextUtil.CheckFooter(input); var fieldInfos = new FieldInfos(infos); success = true; return(fieldInfos); } finally { if (success) { input.Dispose(); } else { IOUtils.DisposeWhileHandlingException(input); } } }
public override SegmentInfo Read(Directory directory, string segmentName, IOContext context) { var scratch = new BytesRef(); string segFileName = IndexFileNames.SegmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); ChecksumIndexInput input = directory.OpenChecksumInput(segFileName, context); bool success = false; try { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_VERSION)); string version = ReadString(SimpleTextSegmentInfoWriter.SI_VERSION.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DOCCOUNT)); int docCount = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_DOCCOUNT.Length, scratch), CultureInfo.InvariantCulture); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_USECOMPOUND)); bool isCompoundFile = Convert.ToBoolean(ReadString(SimpleTextSegmentInfoWriter.SI_USECOMPOUND.Length, scratch), CultureInfo.InvariantCulture); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_DIAG)); int numDiag = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_DIAG.Length, scratch), CultureInfo.InvariantCulture); IDictionary <string, string> diagnostics = new Dictionary <string, string>(); for (int i = 0; i < numDiag; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_KEY)); string key = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_KEY.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_VALUE)); string value = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_VALUE.Length, scratch); diagnostics[key] = value; } SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_FILES)); int numFiles = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_FILES.Length, scratch), CultureInfo.InvariantCulture); var files = new HashSet <string>(); for (int i = 0; i < numFiles; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_FILE)); string fileName = ReadString(SimpleTextSegmentInfoWriter.SI_FILE.Length, scratch); files.Add(fileName); } SimpleTextUtil.CheckFooter(input); var info = new SegmentInfo(directory, version, segmentName, docCount, isCompoundFile, null, diagnostics) { Files = files }; success = true; return(info); } finally { if (!success) { IOUtils.CloseWhileHandlingException(input); } else { input.Dispose(); } } }
private void Write(string s) { SimpleTextUtil.Write(_output, s, _scratch); }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION); var output = directory.CreateOutput(fileName, context); var scratch = new BytesRef(); var success = false; try { SimpleTextUtil.Write(output, NUMFIELDS); SimpleTextUtil.Write(output, infos.Count.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); foreach (FieldInfo fi in infos) { SimpleTextUtil.Write(output, NAME); SimpleTextUtil.Write(output, fi.Name, scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, NUMBER); SimpleTextUtil.Write(output, fi.Number.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, ISINDEXED); SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower(fi.IsIndexed.ToString()), scratch); SimpleTextUtil.WriteNewline(output); if (fi.IsIndexed) { Debug.Assert(fi.IndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads); SimpleTextUtil.Write(output, INDEXOPTIONS); SimpleTextUtil.Write(output, fi.IndexOptions != IndexOptions.NONE ? fi.IndexOptions.ToString() : string.Empty, scratch); SimpleTextUtil.WriteNewline(output); } SimpleTextUtil.Write(output, STORETV); SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower(fi.HasVectors.ToString()), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, PAYLOADS); SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower(fi.HasPayloads.ToString()), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, NORMS); SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower((!fi.OmitsNorms).ToString()), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, NORMS_TYPE); SimpleTextUtil.Write(output, GetDocValuesType(fi.NormType), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, DOCVALUES); SimpleTextUtil.Write(output, GetDocValuesType(fi.DocValuesType), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, DOCVALUES_GEN); SimpleTextUtil.Write(output, fi.DocValuesGen.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); IDictionary <string, string> atts = fi.Attributes; int numAtts = atts == null ? 0 : atts.Count; SimpleTextUtil.Write(output, NUM_ATTS); SimpleTextUtil.Write(output, numAtts.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); if (numAtts <= 0 || atts == null) { continue; } foreach (var entry in atts) { SimpleTextUtil.Write(output, ATT_KEY); SimpleTextUtil.Write(output, entry.Key, scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, ATT_VALUE); SimpleTextUtil.Write(output, entry.Value, scratch); SimpleTextUtil.WriteNewline(output); } } SimpleTextUtil.WriteChecksum(output, scratch); success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.CloseWhileHandlingException(output); } } }
private void Write(BytesRef bytes) { SimpleTextUtil.Write(_output, bytes); }
/// <summary> Used only in ctor: </summary> private void ReadLine() { SimpleTextUtil.ReadLine(data, scratch); }
private void NewLine() { SimpleTextUtil.WriteNewline(_output); }
public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == DocValuesType.SORTED); WriteFieldEntry(field, DocValuesType.SORTED); int valueCount = 0; int maxLength = -1; foreach (BytesRef value in values) { maxLength = Math.Max(maxLength, value.Length); valueCount++; } // write numValues SimpleTextUtil.Write(data, NUMVALUES); SimpleTextUtil.Write(data, valueCount.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); // write maxLength SimpleTextUtil.Write(data, MAXLENGTH); SimpleTextUtil.Write(data, maxLength.ToString(CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); int maxBytesLength = maxLength.ToString(CultureInfo.InvariantCulture).Length; var sb = new StringBuilder(); for (int i = 0; i < maxBytesLength; i++) { sb.Append('0'); } // write our pattern for encoding lengths SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); var encoderFormat = sb.ToString(); int maxOrdBytes = (valueCount + 1L).ToString(CultureInfo.InvariantCulture).Length; sb.Length = 0; for (int i = 0; i < maxOrdBytes; i++) { sb.Append('0'); } // write our pattern for ords SimpleTextUtil.Write(data, ORDPATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); var ordEncoderFormat = sb.ToString(); // for asserts: int valuesSeen = 0; foreach (BytesRef value in values) { // write length SimpleTextUtil.Write(data, LENGTH); SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); // write bytes -- don't use SimpleText.Write // because it escapes: data.WriteBytes(value.Bytes, value.Offset, value.Length); // pad to fit for (int i = value.Length; i < maxLength; i++) { data.WriteByte((byte)' '); } SimpleTextUtil.WriteNewline(data); valuesSeen++; Debug.Assert(valuesSeen <= valueCount); } Debug.Assert(valuesSeen == valueCount); foreach (var ord in docToOrd) { SimpleTextUtil.Write(data, (ord + 1).GetValueOrDefault().ToString(ordEncoderFormat, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(data); } }
public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long> docToOrd) { Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.SORTED); WriteFieldEntry(field, FieldInfo.DocValuesType_e.SORTED); int valueCount = 0; int maxLength = -1; foreach (BytesRef value in values) { maxLength = Math.Max(maxLength, value.Length); valueCount++; } // write numValues SimpleTextUtil.Write(data, NUMVALUES); SimpleTextUtil.Write(data, Convert.ToString(valueCount), scratch); SimpleTextUtil.WriteNewline(data); // write maxLength SimpleTextUtil.Write(data, MAXLENGTH); SimpleTextUtil.Write(data, Convert.ToString(maxLength), scratch); SimpleTextUtil.WriteNewline(data); int maxBytesLength = Convert.ToString(maxLength).Length; StringBuilder sb = new StringBuilder(); for (int i = 0; i < maxBytesLength; i++) { sb.Append('0'); } // write our pattern for encoding lengths SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); DecimalFormat encoder = new DecimalFormat(sb.ToString(), new DecimalFormatSymbols(Locale.ROOT)); int maxOrdBytes = Convert.ToString(valueCount + 1L).Length; sb.Length = 0; for (int i = 0; i < maxOrdBytes; i++) { sb.Append('0'); } // write our pattern for ords SimpleTextUtil.Write(data, ORDPATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); DecimalFormat ordEncoder = new DecimalFormat(sb.ToString(), new DecimalFormatSymbols(Locale.ROOT)); // for asserts: int valuesSeen = 0; foreach (BytesRef value in values) { // write length SimpleTextUtil.Write(data, LENGTH); SimpleTextUtil.Write(data, encoder.format(value.Length), scratch); SimpleTextUtil.WriteNewline(data); // write bytes -- don't use SimpleText.Write // because it escapes: data.WriteBytes(value.Bytes, value.Offset, value.Length); // pad to fit for (int i = value.Length; i < maxLength; i++) { data.WriteByte((sbyte)' '); } SimpleTextUtil.WriteNewline(data); valuesSeen++; Debug.Assert(valuesSeen <= valueCount); } Debug.Assert(valuesSeen == valueCount); foreach (var ord in docToOrd) { SimpleTextUtil.Write(data, ordEncoder.format(ord + 1), scratch); SimpleTextUtil.WriteNewline(data); } }