Beispiel #1
0
        public override void AddSortedSetField(FieldInfo field, IEnumerable <BytesRef> values,
                                               IEnumerable <long> docToOrdCount, IEnumerable <long> ords)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.SORTED_SET);
            WriteFieldEntry(field, FieldInfo.DocValuesType_e.SORTED_SET);

            long valueCount = 0;
            int  maxLength  = 0;

            foreach (BytesRef value in values)
            {
                maxLength = Math.Max(maxLength, value.Length);
                valueCount++;
            }

            // write numValues
            SimpleTextUtil.Write(data, NUMVALUES);
            SimpleTextUtil.Write(data, Convert.ToString(valueCount), scratch);
            SimpleTextUtil.WriteNewline(data);

            // write maxLength
            SimpleTextUtil.Write(data, MAXLENGTH);
            SimpleTextUtil.Write(data, Convert.ToString(maxLength), scratch);
            SimpleTextUtil.WriteNewline(data);

            int           maxBytesLength = Convert.ToString(maxLength).Length;
            StringBuilder sb             = new StringBuilder();

            for (int i = 0; i < maxBytesLength; i++)
            {
                sb.Append('0');
            }

            // write our pattern for encoding lengths
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            DecimalFormat encoder = new DecimalFormat(sb.ToString(), new DecimalFormatSymbols(Locale.ROOT));

            // compute ord pattern: this is funny, we encode all values for all docs to find the maximum length
            var maxOrdListLength = 0;
            var sb2       = new StringBuilder();
            var ordStream = ords.GetEnumerator();

            foreach (var n in docToOrdCount)
            {
                sb2.Length = 0;
                int count = (int)n;
                for (int i = 0; i < count; i++)
                {
                    long ord = (long)ordStream.next();
                    if (sb2.Length > 0)
                    {
                        sb2.Append(",");
                    }
                    sb2.Append(Convert.ToString(ord));
                }
                maxOrdListLength = Math.Max(maxOrdListLength, sb2.Length);
            }

            sb2.Length = 0;
            for (int i = 0; i < maxOrdListLength; i++)
            {
                sb2.Append('X');
            }

            // write our pattern for ord lists
            SimpleTextUtil.Write(data, ORDPATTERN);
            SimpleTextUtil.Write(data, sb2.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            // for asserts:
            long valuesSeen = 0;

            foreach (BytesRef value in values)
            {
                // write length
                SimpleTextUtil.Write(data, LENGTH);
                SimpleTextUtil.Write(data, encoder.format(value.Length), scratch);
                SimpleTextUtil.WriteNewline(data);

                // write bytes -- don't use SimpleText.Write
                // because it escapes:
                data.WriteBytes(value.Bytes, value.Offset, value.Length);

                // pad to fit
                for (int i = value.Length; i < maxLength; i++)
                {
                    data.WriteByte((sbyte)' ');
                }
                SimpleTextUtil.WriteNewline(data);
                valuesSeen++;
                Debug.Assert(valuesSeen <= valueCount);
            }

            Debug.Assert(valuesSeen == valueCount);

            ordStream = ords.GetEnumerator();

            // write the ords for each doc comma-separated
            foreach (var n in docToOrdCount)
            {
                sb2.Length = 0;
                var count = (int)n;
                for (var i = 0; i < count; i++)
                {
                    var ord = (long)ordStream.Next();
                    if (sb2.Length > 0)
                    {
                        sb2.Append(",");
                    }

                    sb2.Append(Convert.ToString(ord));
                }
                // now pad to fit: these are numbers so spaces work well. reader calls trim()
                var numPadding = maxOrdListLength - sb2.Length;
                for (var i = 0; i < numPadding; i++)
                {
                    sb2.Append(' ');
                }
                SimpleTextUtil.Write(data, sb2.ToString(), scratch);
                SimpleTextUtil.WriteNewline(data);
            }
        }
Beispiel #2
0
 private void ReadLine()
 {
     SimpleTextUtil.ReadLine(_input, _scratch);
 }
Beispiel #3
0
        public override void AddBinaryField(FieldInfo field, IEnumerable <BytesRef> values)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == DocValuesType.BINARY);

            var maxLength = 0;

            foreach (var value in values)
            {
                var length = value == null ? 0 : value.Length;
                maxLength = Math.Max(maxLength, length);
            }
            WriteFieldEntry(field, DocValuesType.BINARY);

            // write maxLength
            SimpleTextUtil.Write(data, MAXLENGTH);
            SimpleTextUtil.Write(data, maxLength.ToString(CultureInfo.InvariantCulture), scratch);
            SimpleTextUtil.WriteNewline(data);

            var maxBytesLength = maxLength.ToString(CultureInfo.InvariantCulture).Length;
            var sb             = new StringBuilder();

            for (var i = 0; i < maxBytesLength; i++)
            {
                sb.Append('0');
            }
            // write our pattern for encoding lengths
            var patternString = sb.ToString();

            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, patternString, scratch);
            SimpleTextUtil.WriteNewline(data);


            int numDocsWritten = 0;

            foreach (BytesRef value in values)
            {
                int length = value == null ? 0 : value.Length;
                SimpleTextUtil.Write(data, LENGTH);
                SimpleTextUtil.Write(data, length.ToString(patternString, CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(data);

                // write bytes -- don't use SimpleText.Write
                // because it escapes:
                if (value != null)
                {
                    data.WriteBytes(value.Bytes, value.Offset, value.Length);
                }

                // pad to fit
                for (int i = length; i < maxLength; i++)
                {
                    data.WriteByte((byte)(sbyte)' ');
                }
                SimpleTextUtil.WriteNewline(data);
                SimpleTextUtil.Write(data, value == null ? "F" : "T", scratch);
                SimpleTextUtil.WriteNewline(data);
                numDocsWritten++;
            }

            Debug.Assert(numDocs == numDocsWritten);
        }
            private void LoadTerms()
            {
                var posIntOutputs = PositiveInt32Outputs.Singleton;
                var outputsInner  = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs);
                var outputs       = new PairOutputs <long?, PairOutputs <long?, long?> .Pair>(posIntOutputs, outputsInner);

                // honestly, wtf kind of generic mess is this.
                var b     = new Builder <PairOutputs <long?, PairOutputs <long?, long?> .Pair> .Pair>(FST.INPUT_TYPE.BYTE1, outputs);
                var input = (IndexInput)_outerInstance._input.Clone();

                input.Seek(_termsStart);

                var  lastTerm      = new BytesRef(10);
                long lastDocsStart = -1;
                int  docFreq       = 0;
                long totalTermFreq = 0;
                var  visitedDocs   = new FixedBitSet(_maxDoc);

                var scratchIntsRef = new Int32sRef();

                while (true)
                {
                    SimpleTextUtil.ReadLine(input, _scratch);
                    if (_scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToInt32sRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq)));
                            _sumTotalTermFreq += totalTermFreq;
                        }
                        break;
                    }

                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        docFreq++;
                        _sumDocFreq++;
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                                                _scratchUtf16);
                        int docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        visitedDocs.Set(docId);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                                                _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        totalTermFreq += ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToInt32sRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq)));
                        }
                        lastDocsStart = input.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                        int len = _scratch.Length - SimpleTextFieldsWriter.TERM.Length;
                        if (len > lastTerm.Length)
                        {
                            lastTerm.Grow(len);
                        }
                        Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len);
                        lastTerm.Length    = len;
                        docFreq            = 0;
                        _sumTotalTermFreq += totalTermFreq;
                        totalTermFreq      = 0;
                        _termCount++;
                    }
                }
                _docCount = visitedDocs.Cardinality;
                _fst      = b.Finish();
            }
Beispiel #5
0
        public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext)
        {
            var segFileName = IndexFileNames.SegmentFileName(si.Name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);

            si.AddFile(segFileName);

            var success = false;
            var output  = dir.CreateOutput(segFileName, ioContext);

            try
            {
                var scratch = new BytesRef();

                SimpleTextUtil.Write(output, SI_VERSION);
                SimpleTextUtil.Write(output, si.Version, scratch);
                SimpleTextUtil.WriteNewline(output);

                SimpleTextUtil.Write(output, SI_DOCCOUNT);
                SimpleTextUtil.Write(output, Convert.ToString(si.DocCount, CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(output);

                SimpleTextUtil.Write(output, SI_USECOMPOUND);
                SimpleTextUtil.Write(output, Convert.ToString(si.UseCompoundFile, CultureInfo.InvariantCulture).ToLowerInvariant(), scratch);
                SimpleTextUtil.WriteNewline(output);

                IDictionary <string, string> diagnostics = si.Diagnostics;
                int numDiagnostics = diagnostics == null ? 0 : diagnostics.Count;
                SimpleTextUtil.Write(output, SI_NUM_DIAG);
                SimpleTextUtil.Write(output, Convert.ToString(numDiagnostics, CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(output);

                if (numDiagnostics > 0)
                {
                    foreach (var diagEntry in diagnostics)
                    {
                        SimpleTextUtil.Write(output, SI_DIAG_KEY);
                        SimpleTextUtil.Write(output, diagEntry.Key, scratch);
                        SimpleTextUtil.WriteNewline(output);

                        SimpleTextUtil.Write(output, SI_DIAG_VALUE);
                        SimpleTextUtil.Write(output, diagEntry.Value, scratch);
                        SimpleTextUtil.WriteNewline(output);
                    }
                }

                var files    = si.GetFiles();
                var numFiles = files == null ? 0 : files.Count;
                SimpleTextUtil.Write(output, SI_NUM_FILES);
                SimpleTextUtil.Write(output, Convert.ToString(numFiles, CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(output);

                if (numFiles > 0)
                {
                    foreach (var fileName in files)
                    {
                        SimpleTextUtil.Write(output, SI_FILE);
                        SimpleTextUtil.Write(output, fileName, scratch);
                        SimpleTextUtil.WriteNewline(output);
                    }
                }

                SimpleTextUtil.WriteChecksum(output, scratch);
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(output);
                    try
                    {
                        dir.DeleteFile(segFileName);
                    }
                    catch (Exception)
                    {
                        //Esnure we throw original exeception
                    }
                }
                else
                {
                    output.Dispose();
                }
            }
        }
Beispiel #6
0
            internal virtual void LoadTerms()
            {
                PositiveIntOutputs posIntOutputs = PositiveIntOutputs.Singleton;
                Builder <PairOutputs.Pair <long?, PairOutputs.Pair <long?, long?> > > b;
                PairOutputs <long?, long?> outputsInner = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs);
                PairOutputs <long?, PairOutputs.Pair <long?, long?> > outputs =
                    new PairOutputs <long?, PairOutputs.Pair <long?, long?> >(posIntOutputs, outputsInner);

                b = new Builder <>(FST.INPUT_TYPE.BYTE1, outputs);
                IndexInput @in = (IndexInput)outerInstance._input.Clone();

                @in.Seek(termsStart);

                BytesRef    lastTerm      = new BytesRef(10);
                long        lastDocsStart = -1;
                int         docFreq       = 0;
                long        totalTermFreq = 0;
                FixedBitSet visitedDocs   = new FixedBitSet(maxDoc);

                IntsRef scratchIntsRef = new IntsRef();

                while (true)
                {
                    SimpleTextUtil.ReadLine(@in, scratch);
                    if (scratch.Equals(END) || StringHelper.StartsWith(scratch, FIELD))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq)));
                            sumTotalTermFreq += totalTermFreq;
                        }
                        break;
                    }
                    else if (StringHelper.StartsWith(scratch, DOC))
                    {
                        docFreq++;
                        sumDocFreq++;
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + DOC.length, scratch.Length - DOC.length,
                                                scratchUTF16);
                        int docID = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length);
                        visitedDocs.Set(docID);
                    }
                    else if (StringHelper.StartsWith(scratch, FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + FREQ.length,
                                                scratch.Length - FREQ.length, scratchUTF16);
                        totalTermFreq += ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length);
                    }
                    else if (StringHelper.StartsWith(scratch, TERM))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq)));
                        }
                        lastDocsStart = @in.FilePointer;
                        int len = scratch.Length - TERM.length;
                        if (len > lastTerm.Length)
                        {
                            lastTerm.Grow(len);
                        }
                        Array.Copy(scratch.Bytes, TERM.length, lastTerm.Bytes, 0, len);
                        lastTerm.Length   = len;
                        docFreq           = 0;
                        sumTotalTermFreq += totalTermFreq;
                        totalTermFreq     = 0;
                        termCount++;
                    }
                }
                docCount = visitedDocs.Cardinality();
                fst      = b.Finish();
            }
            public override int NextDoc()
            {
                bool first = true;

                _in.Seek(_nextDocStart);
                long posStart = 0;

                while (true)
                {
                    long lineStart = _in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    //System.out.println("NEXT DOC: " + scratch.utf8ToString());
                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _nextDocStart = lineStart;
                            _in.Seek(posStart);
                            return(_docId);
                        }
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                                                _scratchUtf16);
                        _docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        _tf    = 0;
                        first  = false;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                                                _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        _tf      = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        posStart = _in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                    {
                        // skip
                    }
                    else
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) ||
                                             StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END));
                        }

                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _nextDocStart = lineStart;
                            _in.Seek(posStart);
                            return(_docId);
                        }
                        return(_docId = NO_MORE_DOCS);
                    }
                }
            }
Beispiel #8
0
            public override int NextDoc()
            {
                if (_docId == NO_MORE_DOCS)
                {
                    return(_docId);
                }
                bool first    = true;
                int  termFreq = 0;

                while (true)
                {
                    long lineStart = _in.GetFilePointer();
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _in.Seek(lineStart);
                            if (!_omitTf)
                            {
                                _tf = termFreq;
                            }
                            return(_docId);
                        }
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                                                _scratchUtf16);
                        _docId   = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        termFreq = 0;
                        first    = false;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                                                _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        termFreq = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS))
                    {
                        // skip termFreq++;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                    {
                        // skip
                    }
                    else
                    {
                        Debug.Assert(
                            StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) ||
                            // LUCENENET TODO: This assert fails sometimes, which in turns causes _scratch.Utf8ToString() to throw an index out of range exception
                            StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END) /*, "scratch=" + _scratch.Utf8ToString()*/);

                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _in.Seek(lineStart);
                            if (!_omitTf)
                            {
                                _tf = termFreq;
                            }
                            return(_docId);
                        }
                        return(_docId = NO_MORE_DOCS);
                    }
                }
            }
Beispiel #9
0
            public override int NextPosition()
            {
                int pos;

                if (_readPositions)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), () => "got line=" + _scratch.Utf8ToString());
                    }
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length,
                                            _scratchUtf162);
                    pos = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }
                else
                {
                    pos = -1;
                }

                if (_readOffsets)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), () => "got line=" + _scratch.Utf8ToString());
                    }
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length,
                                            _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162);
                    _startOffset = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), () => "got line=" + _scratch.Utf8ToString());
                    }
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length,
                                            _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162);
                    _endOffset = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }

                long fp = _in.GetFilePointer();

                SimpleTextUtil.ReadLine(_in, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                {
                    int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length;
                    if (_scratch2.Bytes.Length < len)
                    {
                        _scratch2.Grow(len);
                    }
                    Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len);
                    _scratch2.Length = len;
                    _payload         = _scratch2;
                }
                else
                {
                    _payload = null;
                    _in.Seek(fp);
                }
                return(pos);
            }
        public override void AddNumericField(FieldInfo field, IEnumerable <long?> values)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.NUMERIC ||
                         field.NormType == FieldInfo.DocValuesType_e.NUMERIC);
            WriteFieldEntry(field, FieldInfo.DocValuesType_e.NUMERIC);

            // first pass to find min/max
            var minValue = long.MaxValue;
            var maxValue = long.MinValue;

            foreach (var n in values)
            {
                var v = n.GetValueOrDefault();
                minValue = Math.Min(minValue, v);
                maxValue = Math.Max(maxValue, v);
            }

            // write our minimum value to the .dat, all entries are deltas from that
            SimpleTextUtil.Write(data, MINVALUE);
            SimpleTextUtil.Write(data, minValue.ToString(CultureInfo.InvariantCulture), scratch);
            SimpleTextUtil.WriteNewline(data);

            // build up our fixed-width "simple text packed ints" format
            BigInteger maxBig  = maxValue;
            BigInteger minBig  = minValue;
            var        diffBig = BigInteger.Subtract(maxBig, minBig);

            var maxBytesPerValue = diffBig.ToString(CultureInfo.InvariantCulture).Length;
            var sb = new StringBuilder();

            for (var i = 0; i < maxBytesPerValue; i++)
            {
                sb.Append('0');
            }

            // write our pattern to the .dat
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            var patternString = sb.ToString();

            int numDocsWritten = 0;

            // second pass to write the values
            foreach (var n in values)
            {
                long value = n == null ? 0 : n.Value;

                Debug.Assert(value >= minValue);

                var    delta = BigInteger.Subtract(value, minValue);
                string s     = delta.ToString(patternString, CultureInfo.InvariantCulture);
                Debug.Assert(s.Length == patternString.Length);
                SimpleTextUtil.Write(data, s, scratch);
                SimpleTextUtil.WriteNewline(data);
                SimpleTextUtil.Write(data, n == null ? "F" : "T", scratch);
                SimpleTextUtil.WriteNewline(data);
                numDocsWritten++;
                Debug.Assert(numDocsWritten <= numDocs);
            }

            Debug.Assert(numDocs == numDocsWritten, "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten);
        }
Beispiel #11
0
 private void Write(BytesRef b)
 {
     SimpleTextUtil.Write(output, b);
 }
Beispiel #12
0
            private void LoadTerms()
            {
                PositiveInt32Outputs posIntOutputs = PositiveInt32Outputs.Singleton;
                var outputsInner = new PairOutputs <Int64, Int64>(posIntOutputs, posIntOutputs);
                var outputs      = new PairOutputs <Int64, PairOutputs <Int64, Int64> .Pair>(posIntOutputs,
                                                                                             outputsInner);
                var        b   = new Builder <PairOutputs <Int64, PairOutputs <Int64, Int64> .Pair> .Pair>(FST.INPUT_TYPE.BYTE1, outputs);
                IndexInput @in = (IndexInput)outerInstance.input.Clone();

                @in.Seek(termsStart);
                BytesRef    lastTerm       = new BytesRef(10);
                long        lastDocsStart  = -1;
                int         docFreq        = 0;
                long        totalTermFreq  = 0;
                FixedBitSet visitedDocs    = new FixedBitSet(maxDoc);
                Int32sRef   scratchIntsRef = new Int32sRef();

                while (true)
                {
                    SimpleTextUtil.ReadLine(@in, scratch);
                    if (scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToInt32sRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart,
                                                  outputsInner.NewPair((long)docFreq, totalTermFreq)));
                            sumTotalTermFreq += totalTermFreq;
                        }
                        break;
                    }
                    else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.DOC))
                    {
                        docFreq++;
                        sumDocFreq++;
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.DOC.Length, scratch.Length - SimpleTextFieldsWriter.DOC.Length, scratchUTF16);
                        int docID = ArrayUtil.ParseInt32(scratchUTF16.Chars, 0, scratchUTF16.Length);
                        visitedDocs.Set(docID);
                    }
                    else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, scratch.Length - SimpleTextFieldsWriter.FREQ.Length, scratchUTF16);
                        totalTermFreq += ArrayUtil.ParseInt32(scratchUTF16.Chars, 0, scratchUTF16.Length);
                    }
                    else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.TERM))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToInt32sRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart,
                                                                                              outputsInner.NewPair((long)docFreq, totalTermFreq)));
                        }
                        lastDocsStart = @in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                        int len = scratch.Length - SimpleTextFieldsWriter.TERM.Length;
                        if (len > lastTerm.Length)
                        {
                            lastTerm.Grow(len);
                        }
                        System.Array.Copy(scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len);
                        lastTerm.Length   = len;
                        docFreq           = 0;
                        sumTotalTermFreq += totalTermFreq;
                        totalTermFreq     = 0;
                        termCount++;
                    }
                }
                docCount = visitedDocs.Cardinality;
                fst      = b.Finish();

                /*
                 * PrintStream ps = new PrintStream("out.dot");
                 * fst.toDot(ps);
                 * ps.close();
                 * System.out.println("SAVED out.dot");
                 */
                //System.out.println("FST " + fst.sizeInBytes());
            }
Beispiel #13
0
            public override int NextDoc()
            {
                if (docID == NO_MORE_DOCS)
                {
                    return(docID);
                }
                bool first    = true;
                int  termFreq = 0;

                while (true)
                {
                    long lineStart = input.Position;
                    SimpleTextUtil.ReadLine(input, scratch);
                    if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.DOC))
                    {
                        if (!first && (liveDocs is null || liveDocs.Get(docID)))
                        {
                            input.Seek(lineStart);
                            if (!omitTF)
                            {
                                tf = termFreq;
                            }
                            return(docID);
                        }
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.DOC.Length, scratch.Length - SimpleTextFieldsWriter.DOC.Length, scratchUTF16);
                        docID    = ArrayUtil.ParseInt32(scratchUTF16.Chars, 0, scratchUTF16.Length);
                        termFreq = 0;
                        first    = false;
                    }
                    else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, scratch.Length - SimpleTextFieldsWriter.FREQ.Length, scratchUTF16);
                        termFreq = ArrayUtil.ParseInt32(scratchUTF16.Chars, 0, scratchUTF16.Length);
                    }
                    else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.POS))
                    {
                        // skip termFreq++;
                    }
                    else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.START_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.END_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.PAYLOAD))
                    {
                        // skip
                    }
                    else
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(
                                StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.TERM) ||
                                StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD) ||
                                StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.END),
                                "scratch={0}", new BytesRefFormatter(scratch, BytesRefFormat.UTF8));
                        }
                        if (!first && (liveDocs is null || liveDocs.Get(docID)))
                        {
                            input.Seek(lineStart);
                            if (!omitTF)
                            {
                                tf = termFreq;
                            }
                            return(docID);
                        }
                        return(docID = NO_MORE_DOCS);
                    }
                }
            }
Beispiel #14
0
        public override void AddNumericField(FieldInfo field, IEnumerable <long> values)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.NUMERIC ||
                         field.NormType == FieldInfo.DocValuesType_e.NUMERIC);
            WriteFieldEntry(field, FieldInfo.DocValuesType_e.NUMERIC);

            // first pass to find min/max
            var minValue = long.MaxValue;
            var maxValue = long.MinValue;

            foreach (var n in values)
            {
                var v = n;
                minValue = Math.Min(minValue, v);
                maxValue = Math.Max(maxValue, v);
            }

            // write our minimum value to the .dat, all entries are deltas from that
            SimpleTextUtil.Write(data, MINVALUE);
            SimpleTextUtil.Write(data, Convert.ToString(minValue), scratch);
            SimpleTextUtil.WriteNewline(data);

            // build up our fixed-width "simple text packed ints" format
            System.Numerics.BigInteger maxBig = maxValue;
            System.Numerics.BigInteger minBig = minValue;
            var diffBig = maxBig - minBig;

            var maxBytesPerValue = diffBig.ToString().Length;
            var sb = new StringBuilder();

            for (var i = 0; i < maxBytesPerValue; i++)
            {
                sb.Append('0');
            }

            // write our pattern to the .dat
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            var           patternString = sb.ToString();
            DecimalFormat encoder       = new DecimalFormat(patternString, new DecimalFormatSymbols(Locale.ROOT));

            int numDocsWritten = 0;

            // second pass to write the values
            foreach (var value in values)
            {
                Debug.Assert(value >= minValue);

                var    delta = value - minValue;
                string s     = encoder.format(delta);
                Debug.Assert(s.Length == patternString.Length);
                SimpleTextUtil.Write(data, s, scratch);
                SimpleTextUtil.WriteNewline(data);
                SimpleTextUtil.Write(data, n == null ? "F" : "T", scratch);
                SimpleTextUtil.WriteNewline(data);
                numDocsWritten++;
                Debug.Assert(numDocsWritten <= numDocs);
            }

            Debug.Assert(numDocs == numDocsWritten, "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten);
        }
Beispiel #15
0
            public override int NextDoc()
            {
                if (docID_Renamed == NO_MORE_DOCS)
                {
                    return(docID_Renamed);
                }
                bool first    = true;
                int  termFreq = 0;

                while (true)
                {
                    long lineStart = @in.FilePointer;
                    SimpleTextUtil.ReadLine(@in, scratch);
                    if (StringHelper.StartsWith(scratch, DOC))
                    {
                        if (!first && (liveDocs == null || liveDocs.Get(docID_Renamed)))
                        {
                            @in.Seek(lineStart);
                            if (!omitTF)
                            {
                                tf = termFreq;
                            }
                            return(docID_Renamed);
                        }
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + DOC.length, scratch.Length - DOC.length,
                                                scratchUTF16);
                        docID_Renamed = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length);
                        termFreq      = 0;
                        first         = false;
                    }
                    else if (StringHelper.StartsWith(scratch, FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + FREQ.length,
                                                scratch.Length - FREQ.length, scratchUTF16);
                        termFreq = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length);
                    }
                    else if (StringHelper.StartsWith(scratch, POS))
                    {
                        // skip termFreq++;
                    }
                    else if (StringHelper.StartsWith(scratch, START_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(scratch, END_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(scratch, PAYLOAD))
                    {
                        // skip
                    }
                    else
                    {
                        Debug.Assert(
                            StringHelper.StartsWith(scratch, TERM) || StringHelper.StartsWith(scratch, FIELD) ||
                            StringHelper.StartsWith(scratch, END), "scratch=" + scratch.Utf8ToString());
                        if (!first && (liveDocs == null || liveDocs.Get(docID_Renamed)))
                        {
                            @in.Seek(lineStart);
                            if (!omitTF)
                            {
                                tf = termFreq;
                            }
                            return(docID_Renamed);
                        }
                        return(docID_Renamed = NO_MORE_DOCS);
                    }
                }
            }
        /// <summary>
        /// Used only in ctor: </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void readLine() throws java.io.IOException
        private void readLine()
        {
            SimpleTextUtil.ReadLine(data, scratch);
            //System.out.println("line: " + scratch.utf8ToString());
        }
Beispiel #17
0
            public override int NextDoc()
            {
                bool first = true;

                @in.Seek(nextDocStart);
                long posStart = 0;

                while (true)
                {
                    long lineStart = @in.FilePointer;
                    SimpleTextUtil.ReadLine(@in, scratch);
                    //System.out.println("NEXT DOC: " + scratch.utf8ToString());
                    if (StringHelper.StartsWith(scratch, DOC))
                    {
                        if (!first && (liveDocs == null || liveDocs.Get(docID_Renamed)))
                        {
                            nextDocStart = lineStart;
                            @in.Seek(posStart);
                            return(docID_Renamed);
                        }
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + DOC.length, scratch.Length - DOC.length,
                                                scratchUTF16);
                        docID_Renamed = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length);
                        tf            = 0;
                        first         = false;
                    }
                    else if (StringHelper.StartsWith(scratch, FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + FREQ.length,
                                                scratch.Length - FREQ.length, scratchUTF16);
                        tf       = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length);
                        posStart = @in.FilePointer;
                    }
                    else if (StringHelper.StartsWith(scratch, POS))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(scratch, START_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(scratch, END_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(scratch, PAYLOAD))
                    {
                        // skip
                    }
                    else
                    {
                        Debug.Assert(StringHelper.StartsWith(scratch, TERM) || StringHelper.StartsWith(scratch, FIELD) ||
                                     StringHelper.StartsWith(scratch, END));

                        if (!first && (liveDocs == null || liveDocs.Get(docID_Renamed)))
                        {
                            nextDocStart = lineStart;
                            @in.Seek(posStart);
                            return(docID_Renamed);
                        }
                        return(docID_Renamed = NO_MORE_DOCS);
                    }
                }
            }
        public override void AddSortedSetField(FieldInfo field, IEnumerable <BytesRef> values,
                                               IEnumerable <long?> docToOrdCount, IEnumerable <long?> ords)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(FieldSeen(field.Name));
                Debugging.Assert(field.DocValuesType == DocValuesType.SORTED_SET);
            }
            WriteFieldEntry(field, DocValuesType.SORTED_SET);

            long valueCount = 0;
            int  maxLength  = 0;

            foreach (var value in values)
            {
                maxLength = Math.Max(maxLength, value.Length);
                valueCount++;
            }

            // write numValues
            SimpleTextUtil.Write(data, NUMVALUES);
            SimpleTextUtil.Write(data, valueCount.ToString(CultureInfo.InvariantCulture), scratch);
            SimpleTextUtil.WriteNewline(data);

            // write maxLength
            SimpleTextUtil.Write(data, MAXLENGTH);
            SimpleTextUtil.Write(data, maxLength.ToString(CultureInfo.InvariantCulture), scratch);
            SimpleTextUtil.WriteNewline(data);

            int maxBytesLength = maxLength.ToString(CultureInfo.InvariantCulture).Length;
            var sb             = new StringBuilder();

            for (int i = 0; i < maxBytesLength; i++)
            {
                sb.Append('0');
            }

            // write our pattern for encoding lengths
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            string encoderFormat = sb.ToString();

            // compute ord pattern: this is funny, we encode all values for all docs to find the maximum length
            var maxOrdListLength = 0;
            var sb2 = new StringBuilder();

            using (var ordStream = ords.GetEnumerator())
            {
                foreach (var n in docToOrdCount)
                {
                    sb2.Length = 0;
                    var count = (int)n;
                    for (int i = 0; i < count; i++)
                    {
                        ordStream.MoveNext();

                        var ord = ordStream.Current;
                        if (sb2.Length > 0)
                        {
                            sb2.Append(",");
                        }
                        sb2.Append(ord.GetValueOrDefault().ToString(CultureInfo.InvariantCulture));
                    }
                    maxOrdListLength = Math.Max(maxOrdListLength, sb2.Length);
                }
            }

            sb2.Length = 0;
            for (int i = 0; i < maxOrdListLength; i++)
            {
                sb2.Append('X');
            }

            // write our pattern for ord lists
            SimpleTextUtil.Write(data, ORDPATTERN);
            SimpleTextUtil.Write(data, sb2.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            // for asserts:
            long valuesSeen = 0;

            foreach (var value in values)
            {
                // write length
                SimpleTextUtil.Write(data, LENGTH);
                SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat, CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(data);

                // write bytes -- don't use SimpleText.Write
                // because it escapes:
                data.WriteBytes(value.Bytes, value.Offset, value.Length);

                // pad to fit
                for (var i = value.Length; i < maxLength; i++)
                {
                    data.WriteByte((byte)' ');
                }
                SimpleTextUtil.WriteNewline(data);
                valuesSeen++;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(valuesSeen <= valueCount);
                }
            }

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(valuesSeen == valueCount);
            }

            using (var ordStream = ords.GetEnumerator())
            {
                // write the ords for each doc comma-separated
                foreach (var n in docToOrdCount)
                {
                    sb2.Length = 0;
                    var count = (int)n;
                    for (var i = 0; i < count; i++)
                    {
                        ordStream.MoveNext();
                        var ord = ordStream.Current;
                        if (sb2.Length > 0)
                        {
                            sb2.Append(",");
                        }

                        sb2.Append(ord);
                    }
                    // now pad to fit: these are numbers so spaces work well. reader calls trim()
                    var numPadding = maxOrdListLength - sb2.Length;
                    for (var i = 0; i < numPadding; i++)
                    {
                        sb2.Append(' ');
                    }
                    SimpleTextUtil.Write(data, sb2.ToString(), scratch);
                    SimpleTextUtil.WriteNewline(data);
                }
            }
        }
            public override int NextDoc()
            {
                if (_docId == NO_MORE_DOCS)
                {
                    return(_docId);
                }
                bool first    = true;
                int  termFreq = 0;

                while (true)
                {
                    long lineStart = _in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _in.Seek(lineStart);
                            if (!_omitTf)
                            {
                                _tf = termFreq;
                            }
                            return(_docId);
                        }
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                                                _scratchUtf16);
                        _docId   = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        termFreq = 0;
                        first    = false;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                                                _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        termFreq = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS))
                    {
                        // skip termFreq++;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                    {
                        // skip
                    }
                    else
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(
                                StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) ||
                                StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END), "scratch={0}", _scratch.Utf8ToString());
                        }

                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _in.Seek(lineStart);
                            if (!_omitTf)
                            {
                                _tf = termFreq;
                            }
                            return(_docId);
                        }
                        return(_docId = NO_MORE_DOCS);
                    }
                }
            }
        public override void AddNumericField(FieldInfo field, IEnumerable <long?> values)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(FieldSeen(field.Name));
                Debugging.Assert(field.DocValuesType == DocValuesType.NUMERIC ||
                                 field.NormType == DocValuesType.NUMERIC);
            }
            WriteFieldEntry(field, DocValuesType.NUMERIC);

            // first pass to find min/max
            var minValue = long.MaxValue;
            var maxValue = long.MinValue;

            foreach (var n in values)
            {
                var v = n.GetValueOrDefault();
                minValue = Math.Min(minValue, v);
                maxValue = Math.Max(maxValue, v);
            }

            // write our minimum value to the .dat, all entries are deltas from that
            SimpleTextUtil.Write(data, MINVALUE);
            SimpleTextUtil.Write(data, minValue.ToString(CultureInfo.InvariantCulture), scratch);
            SimpleTextUtil.WriteNewline(data);

            // build up our fixed-width "simple text packed ints" format
            var diffBig = (decimal)maxValue - (decimal)minValue; // LUCENENET specific - use decimal rather than BigInteger

            var maxBytesPerValue = diffBig.ToString(CultureInfo.InvariantCulture).Length;
            var sb = new StringBuilder();

            for (var i = 0; i < maxBytesPerValue; i++)
            {
                sb.Append('0');
            }

            var patternString = sb.ToString(); // LUCENENET specific - only get the string once

            // write our pattern to the .dat
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, patternString, scratch);
            SimpleTextUtil.WriteNewline(data);

            int numDocsWritten = 0;

            // second pass to write the values
            foreach (var n in values)
            {
                long value = n.GetValueOrDefault();

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(value >= minValue);
                }

                var    delta = (decimal)value - (decimal)minValue; // LUCENENET specific - use decimal rather than BigInteger
                string s     = delta.ToString(patternString, CultureInfo.InvariantCulture);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(s.Length == patternString.Length);
                }
                SimpleTextUtil.Write(data, s, scratch);
                SimpleTextUtil.WriteNewline(data);
                SimpleTextUtil.Write(data, n == null ? "F" : "T", scratch);
                SimpleTextUtil.WriteNewline(data);
                numDocsWritten++;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(numDocsWritten <= numDocs);
                }
            }

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(numDocs == numDocsWritten, "numDocs={0} numDocsWritten={1}", numDocs, numDocsWritten);
            }
        }
            public override int NextPosition()
            {
                int pos;

                if (_readPositions)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), "got line={0}", new BytesRefFormatter(_scratch, BytesRefFormat.UTF8));
                    }
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length,
                                            _scratchUtf162);
                    pos = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }
                else
                {
                    pos = -1;
                }

                if (_readOffsets)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), "got line={0}", new BytesRefFormatter(_scratch, BytesRefFormat.UTF8));
                    }
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length,
                                            _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162);
                    _startOffset = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), "got line={0}", new BytesRefFormatter(_scratch, BytesRefFormat.UTF8));
                    }
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length,
                                            _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162);
                    _endOffset = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }

                long fp = _in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream

                SimpleTextUtil.ReadLine(_in, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                {
                    int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length;
                    if (_scratch2.Bytes.Length < len)
                    {
                        _scratch2.Grow(len);
                    }
                    Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len);
                    _scratch2.Length = len;
                    _payload         = _scratch2;
                }
                else
                {
                    _payload = null;
                    _in.Seek(fp);
                }
                return(pos);
            }
        public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix,
                                        IOContext iocontext)
        {
            var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix,
                                                          SimpleTextFieldInfosWriter.FIELD_INFOS_EXTENSION);
            var input   = directory.OpenChecksumInput(fileName, iocontext);
            var scratch = new BytesRef();

            var success = false;

            try
            {
                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUMFIELDS));
                var size  = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUMFIELDS.Length, scratch), CultureInfo.InvariantCulture);
                var infos = new FieldInfo[size];

                for (var i = 0; i < size; i++)
                {
                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NAME));
                    string name = ReadString(SimpleTextFieldInfosWriter.NAME.Length, scratch);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUMBER));
                    int fieldNumber = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUMBER.Length, scratch), CultureInfo.InvariantCulture);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ISINDEXED));
                    bool isIndexed = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.ISINDEXED.Length, scratch), CultureInfo.InvariantCulture);

                    IndexOptions indexOptions;
                    if (isIndexed)
                    {
                        SimpleTextUtil.ReadLine(input, scratch);
                        Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.INDEXOPTIONS));
                        indexOptions = (IndexOptions)Enum.Parse(typeof(IndexOptions), ReadString(SimpleTextFieldInfosWriter.INDEXOPTIONS.Length,
                                                                                                 scratch));
                    }
                    else
                    {
                        indexOptions = IndexOptions.NONE;
                    }

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.STORETV));
                    bool storeTermVector =
                        Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.STORETV.Length, scratch), CultureInfo.InvariantCulture);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.PAYLOADS));
                    bool storePayloads =
                        Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.PAYLOADS.Length, scratch), CultureInfo.InvariantCulture);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NORMS));
                    bool omitNorms = !Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.NORMS.Length, scratch), CultureInfo.InvariantCulture);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NORMS_TYPE));
                    string nrmType = ReadString(SimpleTextFieldInfosWriter.NORMS_TYPE.Length, scratch);
                    Index.DocValuesType normsType = DocValuesType(nrmType);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.DOCVALUES));
                    string dvType = ReadString(SimpleTextFieldInfosWriter.DOCVALUES.Length, scratch);
                    Index.DocValuesType docValuesType = DocValuesType(dvType);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.DOCVALUES_GEN));
                    long dvGen = Convert.ToInt64(ReadString(SimpleTextFieldInfosWriter.DOCVALUES_GEN.Length, scratch), CultureInfo.InvariantCulture);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUM_ATTS));
                    int numAtts = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUM_ATTS.Length, scratch), CultureInfo.InvariantCulture);
                    IDictionary <string, string> atts = new Dictionary <string, string>();

                    for (int j = 0; j < numAtts; j++)
                    {
                        SimpleTextUtil.ReadLine(input, scratch);
                        Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ATT_KEY));
                        string key = ReadString(SimpleTextFieldInfosWriter.ATT_KEY.Length, scratch);

                        SimpleTextUtil.ReadLine(input, scratch);
                        Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ATT_VALUE));
                        string value = ReadString(SimpleTextFieldInfosWriter.ATT_VALUE.Length, scratch);
                        atts[key] = value;
                    }

                    infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads,
                                             indexOptions, docValuesType, normsType, new ReadOnlyDictionary <string, string>(atts))
                    {
                        DocValuesGen = dvGen
                    };
                }

                SimpleTextUtil.CheckFooter(input);

                var fieldInfos = new FieldInfos(infos);
                success = true;
                return(fieldInfos);
            }
            finally
            {
                if (success)
                {
                    input.Dispose();
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(input);
                }
            }
        }
Beispiel #23
0
        public override SegmentInfo Read(Directory directory, string segmentName, IOContext context)
        {
            var    scratch     = new BytesRef();
            string segFileName = IndexFileNames.SegmentFileName(segmentName, "",
                                                                SimpleTextSegmentInfoFormat.SI_EXTENSION);
            ChecksumIndexInput input = directory.OpenChecksumInput(segFileName, context);
            bool success             = false;

            try
            {
                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_VERSION));
                string version = ReadString(SimpleTextSegmentInfoWriter.SI_VERSION.Length, scratch);

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DOCCOUNT));
                int docCount = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_DOCCOUNT.Length, scratch), CultureInfo.InvariantCulture);

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_USECOMPOUND));
                bool isCompoundFile = Convert.ToBoolean(ReadString(SimpleTextSegmentInfoWriter.SI_USECOMPOUND.Length, scratch), CultureInfo.InvariantCulture);

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_DIAG));
                int numDiag = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_DIAG.Length, scratch), CultureInfo.InvariantCulture);
                IDictionary <string, string> diagnostics = new Dictionary <string, string>();

                for (int i = 0; i < numDiag; i++)
                {
                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_KEY));
                    string key = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_KEY.Length, scratch);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_VALUE));
                    string value = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_VALUE.Length, scratch);
                    diagnostics[key] = value;
                }

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_FILES));
                int numFiles = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_FILES.Length, scratch), CultureInfo.InvariantCulture);
                var files    = new HashSet <string>();

                for (int i = 0; i < numFiles; i++)
                {
                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_FILE));
                    string fileName = ReadString(SimpleTextSegmentInfoWriter.SI_FILE.Length, scratch);
                    files.Add(fileName);
                }

                SimpleTextUtil.CheckFooter(input);

                var info = new SegmentInfo(directory, version, segmentName, docCount, isCompoundFile, null,
                                           diagnostics)
                {
                    Files = files
                };
                success = true;
                return(info);
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
                else
                {
                    input.Dispose();
                }
            }
        }
Beispiel #24
0
 private void Write(string s)
 {
     SimpleTextUtil.Write(_output, s, _scratch);
 }
        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos,
                                   IOContext context)
        {
            var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION);
            var output   = directory.CreateOutput(fileName, context);
            var scratch  = new BytesRef();
            var success  = false;

            try
            {
                SimpleTextUtil.Write(output, NUMFIELDS);
                SimpleTextUtil.Write(output, infos.Count.ToString(CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(output);

                foreach (FieldInfo fi in infos)
                {
                    SimpleTextUtil.Write(output, NAME);
                    SimpleTextUtil.Write(output, fi.Name, scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, NUMBER);
                    SimpleTextUtil.Write(output, fi.Number.ToString(CultureInfo.InvariantCulture), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, ISINDEXED);
                    SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower(fi.IsIndexed.ToString()), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    if (fi.IsIndexed)
                    {
                        Debug.Assert(fi.IndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads);
                        SimpleTextUtil.Write(output, INDEXOPTIONS);
                        SimpleTextUtil.Write(output,
                                             fi.IndexOptions != IndexOptions.NONE ? fi.IndexOptions.ToString() : string.Empty,
                                             scratch);
                        SimpleTextUtil.WriteNewline(output);
                    }

                    SimpleTextUtil.Write(output, STORETV);
                    SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower(fi.HasVectors.ToString()), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, PAYLOADS);
                    SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower(fi.HasPayloads.ToString()), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, NORMS);
                    SimpleTextUtil.Write(output, CultureInfo.InvariantCulture.TextInfo.ToLower((!fi.OmitsNorms).ToString()), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, NORMS_TYPE);
                    SimpleTextUtil.Write(output, GetDocValuesType(fi.NormType), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, DOCVALUES);
                    SimpleTextUtil.Write(output, GetDocValuesType(fi.DocValuesType), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, DOCVALUES_GEN);
                    SimpleTextUtil.Write(output, fi.DocValuesGen.ToString(CultureInfo.InvariantCulture), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    IDictionary <string, string> atts = fi.Attributes;
                    int numAtts = atts == null ? 0 : atts.Count;
                    SimpleTextUtil.Write(output, NUM_ATTS);
                    SimpleTextUtil.Write(output, numAtts.ToString(CultureInfo.InvariantCulture), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    if (numAtts <= 0 || atts == null)
                    {
                        continue;
                    }
                    foreach (var entry in atts)
                    {
                        SimpleTextUtil.Write(output, ATT_KEY);
                        SimpleTextUtil.Write(output, entry.Key, scratch);
                        SimpleTextUtil.WriteNewline(output);

                        SimpleTextUtil.Write(output, ATT_VALUE);
                        SimpleTextUtil.Write(output, entry.Value, scratch);
                        SimpleTextUtil.WriteNewline(output);
                    }
                }
                SimpleTextUtil.WriteChecksum(output, scratch);
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(output);
                }
            }
        }
Beispiel #26
0
 private void Write(BytesRef bytes)
 {
     SimpleTextUtil.Write(_output, bytes);
 }
Beispiel #27
0
 /// <summary> Used only in ctor: </summary>
 private void ReadLine()
 {
     SimpleTextUtil.ReadLine(data, scratch);
 }
Beispiel #28
0
 private void NewLine()
 {
     SimpleTextUtil.WriteNewline(_output);
 }
Beispiel #29
0
        public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == DocValuesType.SORTED);
            WriteFieldEntry(field, DocValuesType.SORTED);

            int valueCount = 0;
            int maxLength  = -1;

            foreach (BytesRef value in values)
            {
                maxLength = Math.Max(maxLength, value.Length);
                valueCount++;
            }

            // write numValues
            SimpleTextUtil.Write(data, NUMVALUES);
            SimpleTextUtil.Write(data, valueCount.ToString(CultureInfo.InvariantCulture), scratch);
            SimpleTextUtil.WriteNewline(data);

            // write maxLength
            SimpleTextUtil.Write(data, MAXLENGTH);
            SimpleTextUtil.Write(data, maxLength.ToString(CultureInfo.InvariantCulture), scratch);
            SimpleTextUtil.WriteNewline(data);

            int maxBytesLength = maxLength.ToString(CultureInfo.InvariantCulture).Length;
            var sb             = new StringBuilder();

            for (int i = 0; i < maxBytesLength; i++)
            {
                sb.Append('0');
            }

            // write our pattern for encoding lengths
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            var encoderFormat = sb.ToString();

            int maxOrdBytes = (valueCount + 1L).ToString(CultureInfo.InvariantCulture).Length;

            sb.Length = 0;
            for (int i = 0; i < maxOrdBytes; i++)
            {
                sb.Append('0');
            }

            // write our pattern for ords
            SimpleTextUtil.Write(data, ORDPATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            var ordEncoderFormat = sb.ToString();

            // for asserts:
            int valuesSeen = 0;

            foreach (BytesRef value in values)
            {
                // write length
                SimpleTextUtil.Write(data, LENGTH);
                SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat, CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(data);

                // write bytes -- don't use SimpleText.Write
                // because it escapes:
                data.WriteBytes(value.Bytes, value.Offset, value.Length);

                // pad to fit
                for (int i = value.Length; i < maxLength; i++)
                {
                    data.WriteByte((byte)' ');
                }
                SimpleTextUtil.WriteNewline(data);
                valuesSeen++;
                Debug.Assert(valuesSeen <= valueCount);
            }

            Debug.Assert(valuesSeen == valueCount);

            foreach (var ord in docToOrd)
            {
                SimpleTextUtil.Write(data, (ord + 1).GetValueOrDefault().ToString(ordEncoderFormat, CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(data);
            }
        }
Beispiel #30
0
        public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long> docToOrd)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.SORTED);
            WriteFieldEntry(field, FieldInfo.DocValuesType_e.SORTED);

            int valueCount = 0;
            int maxLength  = -1;

            foreach (BytesRef value in values)
            {
                maxLength = Math.Max(maxLength, value.Length);
                valueCount++;
            }

            // write numValues
            SimpleTextUtil.Write(data, NUMVALUES);
            SimpleTextUtil.Write(data, Convert.ToString(valueCount), scratch);
            SimpleTextUtil.WriteNewline(data);

            // write maxLength
            SimpleTextUtil.Write(data, MAXLENGTH);
            SimpleTextUtil.Write(data, Convert.ToString(maxLength), scratch);
            SimpleTextUtil.WriteNewline(data);

            int           maxBytesLength = Convert.ToString(maxLength).Length;
            StringBuilder sb             = new StringBuilder();

            for (int i = 0; i < maxBytesLength; i++)
            {
                sb.Append('0');
            }

            // write our pattern for encoding lengths
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            DecimalFormat encoder = new DecimalFormat(sb.ToString(), new DecimalFormatSymbols(Locale.ROOT));

            int maxOrdBytes = Convert.ToString(valueCount + 1L).Length;

            sb.Length = 0;
            for (int i = 0; i < maxOrdBytes; i++)
            {
                sb.Append('0');
            }

            // write our pattern for ords
            SimpleTextUtil.Write(data, ORDPATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            DecimalFormat ordEncoder = new DecimalFormat(sb.ToString(), new DecimalFormatSymbols(Locale.ROOT));

            // for asserts:
            int valuesSeen = 0;

            foreach (BytesRef value in values)
            {
                // write length
                SimpleTextUtil.Write(data, LENGTH);
                SimpleTextUtil.Write(data, encoder.format(value.Length), scratch);
                SimpleTextUtil.WriteNewline(data);

                // write bytes -- don't use SimpleText.Write
                // because it escapes:
                data.WriteBytes(value.Bytes, value.Offset, value.Length);

                // pad to fit
                for (int i = value.Length; i < maxLength; i++)
                {
                    data.WriteByte((sbyte)' ');
                }
                SimpleTextUtil.WriteNewline(data);
                valuesSeen++;
                Debug.Assert(valuesSeen <= valueCount);
            }

            Debug.Assert(valuesSeen == valueCount);

            foreach (var ord in docToOrd)
            {
                SimpleTextUtil.Write(data, ordEncoder.format(ord + 1), scratch);
                SimpleTextUtil.WriteNewline(data);
            }
        }