Ejemplo n.º 1
0
            public override void LookupOrd(long ord, BytesRef result)
            {
                if (ord < 0 || ord >= _field.NumValues)
                {
                    throw new IndexOutOfRangeException("ord must be 0 .. " + (_field.NumValues - 1) + "; got " + ord);
                }

                _input.Seek(_field.DataStartFilePointer + ord * (9 + _field.Pattern.Length + _field.MaxLength));
                SimpleTextUtil.ReadLine(_input, _scratch);
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH),
                             "got " + _scratch.Utf8ToString() + " in=" + _input);
                int len;

                try
                {
                    len =
                        (int)
                        _decoderFormat.parse(_scratch.Bytes.SubList(
                                                 _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
                                                 _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
                }
                catch (Exception pe)
                {
                    var e = new CorruptIndexException("failed to parse int length (resource=" + _input + ")", pe);
                    throw e;
                }

                result.Bytes  = new byte[len];
                result.Offset = 0;
                result.Length = len;
                _input.ReadBytes(result.Bytes, 0, len);
            }
Ejemplo n.º 2
0
        public static void CheckFooter(ChecksumIndexInput input)
        {
            var scratch          = new BytesRef();
            var expectedChecksum = string.Format("{0:D}", input.Checksum);

            ReadLine(input, scratch);

            if (StringHelper.StartsWith(scratch, CHECKSUM) == false)
            {
                throw new CorruptIndexException("SimpleText failure: expected checksum line but got " +
                                                scratch.Utf8ToString() + " (resource=" + input + ")");
            }
            var actualChecksum =
                (new BytesRef(scratch.Bytes, CHECKSUM.Length, scratch.Length - CHECKSUM.Length)).Utf8ToString();

            if (!expectedChecksum.Equals(actualChecksum))
            {
                throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " +
                                                expectedChecksum + " (resource=" + input + ")");
            }
            if (input.Length() != input.FilePointer)
            {
                throw new CorruptIndexException(
                          "Unexpected stuff at the end of file, please be careful with your text editor! (resource=" + input +
                          ")");
            }
        }
            public override void LookupOrd(long ord, BytesRef result)
            {
                if (ord < 0 || ord >= _field.NumValues)
                {
                    throw new IndexOutOfRangeException("ord must be 0 .. " + (_field.NumValues - 1) + "; got " + ord);
                }

                _input.Seek(_field.DataStartFilePointer + ord * (9 + _field.Pattern.Length + _field.MaxLength));
                SimpleTextUtil.ReadLine(_input, _scratch);
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH),
                             "got " + _scratch.Utf8ToString() + " in=" + _input);
                int len;

                try
                {
                    // LUCNENENET: .NET doesn't have a way to specify a pattern with integer, but all of the standard ones are built in.
                    len = int.Parse(Encoding.UTF8.GetString(_scratch.Bytes, _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
                                                            _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length), NumberStyles.Integer, CultureInfo.InvariantCulture);
                }
                catch (Exception pe)
                {
                    var e = new CorruptIndexException("failed to parse int length (resource=" + _input + ")", pe);
                    throw e;
                }

                result.Bytes  = new byte[len];
                result.Offset = 0;
                result.Length = len;
                _input.ReadBytes(result.Bytes, 0, len);
            }
Ejemplo n.º 4
0
            public bool Get(int index)
            {
                _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2) * index);
                SimpleTextUtil.ReadLine(_input, _scratch);
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH));
                int len;

                try
                {
                    len = int.Parse(_scratch.Bytes.SubList(_scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
                                                           _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
                }
                catch (FormatException ex)
                {
                    throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex);
                }

                // skip past bytes
                var bytes = new byte[len];

                _input.ReadBytes(bytes, 0, len);
                SimpleTextUtil.ReadLine(_input, _scratch); // newline
                SimpleTextUtil.ReadLine(_input, _scratch); // 'T' or 'F'
                return(_scratch.Bytes[_scratch.Offset] == (sbyte)'T');
            }
            public override void Get(int docId, BytesRef result)
            {
                if (docId < 0 || docId >= _outerInstance.MAX_DOC)
                {
                    throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) +
                                                       "; got " + docId);
                }

                _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2) * docId);
                SimpleTextUtil.ReadLine(_input, _scratch);
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH));
                int len;

                try
                {
                    // LUCNENENET: .NET doesn't have a way to specify a pattern with integer, but all of the standard ones are built in.
                    len = int.Parse(Encoding.UTF8.GetString(_scratch.Bytes, _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
                                                            _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length), NumberStyles.Integer, CultureInfo.InvariantCulture);
                }
                catch (FormatException ex)
                {
                    throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex);
                }

                result.Bytes  = new byte[len];
                result.Offset = 0;
                result.Length = len;
                _input.ReadBytes(result.Bytes, 0, len);
            }
Ejemplo n.º 6
0
            public override void Get(int docId, BytesRef result)
            {
                if (docId < 0 || docId >= _outerInstance.MAX_DOC)
                {
                    throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) +
                                                       "; got " + docId);
                }

                _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2) * docId);
                SimpleTextUtil.ReadLine(_input, _scratch);
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH));
                int len;

                try
                {
                    len = int.Parse(_scratch.Bytes.SubList(
                                        _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
                                        _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
                }
                catch (FormatException ex)
                {
                    throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex);
                }

                result.Bytes  = new byte[len];
                result.Offset = 0;
                result.Length = len;
                _input.ReadBytes(result.Bytes, 0, len);
            }
            public override void LookupOrd(long ord, BytesRef result)
            {
                if (ord < 0 || ord >= field.NumValues)
                {
                    throw new IndexOutOfRangeException("ord must be 0 .. " + (field.NumValues - 1) + "; got " + ord);
                }

                @in.Seek(field.DataStartFilePointer + ord * (9 + field.Pattern.Length + field.MaxLength));
                SimpleTextUtil.ReadLine(@in, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextDocValuesWriter.LENGTH),
                             "got " + scratch.Utf8ToString() + " in=" + @in);
                int len;

                try
                {
                    len =
                        (int)
                        decoder.parse(scratch.Bytes.SubList(
                                          scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
                                          scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
                }
                catch (ParseException pe)
                {
                    CorruptIndexException e =
                        new CorruptIndexException("failed to parse int length (resource=" + @in + ")");
                    e.initCause(pe);
                    throw e;
                }
                result.Bytes  = new sbyte[len];
                result.Offset = 0;
                result.Length = len;
                @in.ReadBytes(result.Bytes, 0, len);
            }
Ejemplo n.º 8
0
            public override int NextPosition()
            {
                int pos;

                if (_readPositions)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), "got line=" + _scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length,
                                            _scratchUtf162);
                    pos = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }
                else
                {
                    pos = -1;
                }

                if (_readOffsets)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), "got line=" + _scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length,
                                            _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162);
                    _startOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), "got line=" + _scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length,
                                            _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162);
                    _endOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }

                long fp = _in.FilePointer;

                SimpleTextUtil.ReadLine(_in, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                {
                    int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length;
                    if (_scratch2.Bytes.Length < len)
                    {
                        _scratch2.Grow(len);
                    }
                    Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len);
                    _scratch2.Length = len;
                    _payload         = _scratch2;
                }
                else
                {
                    _payload = null;
                    _in.Seek(fp);
                }
                return(pos);
            }
        public override Bits ReadLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context)
        {
            Debug.Assert(info.HasDeletions());
            var scratch      = new BytesRef();
            var scratchUtf16 = new CharsRef();

            var fileName             = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.DelGen);
            ChecksumIndexInput input = null;
            var success = false;

            try
            {
                input = dir.OpenChecksumInput(fileName, context);

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SIZE));
                var size = ParseIntAt(scratch, SIZE.Length, scratchUtf16);

                var bits = new BitArray(size);

                SimpleTextUtil.ReadLine(input, scratch);
                while (!scratch.Equals(END))
                {
                    Debug.Assert(StringHelper.StartsWith(scratch, DOC));
                    var docid = ParseIntAt(scratch, DOC.Length, scratchUtf16);
                    bits.Set(docid, true);
                    SimpleTextUtil.ReadLine(input, scratch);
                }

                SimpleTextUtil.CheckFooter(input);

                success = true;
                return(new SimpleTextBits(bits, size));
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(input);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
            }
        }
Ejemplo n.º 10
0
        private void ReadField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor)
        {
            ReadLine();
            Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE));
            if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_STRING))
            {
                visitor.StringField(fieldInfo,
                                    _scratch.Bytes.SubList(_scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length,
                                                           _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length).ToString());

                // new string(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                //     Encoding.UTF8));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_BINARY))
            {
                var copy = new byte[_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length];
                Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, copy, 0, copy.Length);
                visitor.BinaryField(fieldInfo, copy);
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_INT))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                        _scratchUtf16);
                visitor.IntField(fieldInfo, Convert.ToInt32(_scratchUtf16.ToString()));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_LONG))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                        _scratchUtf16);
                visitor.LongField(fieldInfo, Convert.ToInt64(_scratchUtf16.ToString()));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_FLOAT))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                        _scratchUtf16);
                visitor.FloatField(fieldInfo, Convert.ToSingle(_scratchUtf16.ToString()));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_DOUBLE))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                        _scratchUtf16);
                visitor.DoubleField(fieldInfo, Convert.ToDouble(_scratchUtf16.ToString()));
            }
        }
        // we don't actually write a .tvx-like index, instead we read the
        // vectors file in entirety up-front and save the offsets
        // so we can seek to the data later.
        private void ReadIndex(int maxDoc)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(_input);

            _offsets = new long[maxDoc];
            int upto = 0;

            while (!_scratch.Equals(END))
            {
                SimpleTextUtil.ReadLine(input, _scratch);
                if (StringHelper.StartsWith(_scratch, DOC))
                {
                    _offsets[upto] = input.FilePointer;
                    upto++;
                }
            }
            SimpleTextUtil.CheckFooter(input);
            Debug.Assert(upto == _offsets.Length);
        }
Ejemplo n.º 12
0
        /// <remarks>
        /// we don't actually write a .fdx-like index, instead we read the
        /// stored fields file in entirety up-front and save the offsets
        /// so we can seek to the documents later.
        /// </remarks>
        private void ReadIndex(int size)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(_input);

            _offsets = new long[size];
            var upto = 0;

            while (!_scratch.Equals(SimpleTextStoredFieldsWriter.END))
            {
                SimpleTextUtil.ReadLine(input, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.DOC))
                {
                    _offsets[upto] = input.FilePointer;
                    upto++;
                }
            }
            SimpleTextUtil.CheckFooter(input);
            Debug.Assert(upto == _offsets.Length);
        }
Ejemplo n.º 13
0
        private SortedDictionary <string, long?> ReadFields(IndexInput @in)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(@in);
            var scratch = new BytesRef(10);
            var fields  = new SortedDictionary <string, long?>();

            while (true)
            {
                SimpleTextUtil.ReadLine(input, scratch);
                if (scratch.Equals(SimpleTextFieldsWriter.END))
                {
                    SimpleTextUtil.CheckFooter(input);
                    return(fields);
                }

                if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD))
                {
                    var fieldName = Encoding.UTF8.GetString(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.FIELD.Length,
                                                            scratch.Length - SimpleTextFieldsWriter.FIELD.Length);
                    fields[fieldName] = input.FilePointer;
                }
            }
        }
Ejemplo n.º 14
0
 /// <summary> Used only in ctor: </summary>
 private bool StartsWith(BytesRef prefix)
 {
     return(StringHelper.StartsWith(SCRATCH, prefix));
 }
Ejemplo n.º 15
0
            public override int NextDoc()
            {
                bool first = true;

                _in.Seek(_nextDocStart);
                long posStart = 0;

                while (true)
                {
                    long lineStart = _in.FilePointer;
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    //System.out.println("NEXT DOC: " + scratch.utf8ToString());
                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _nextDocStart = lineStart;
                            _in.Seek(posStart);
                            return(_docId);
                        }
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                                                _scratchUtf16);
                        _docId = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        _tf    = 0;
                        first  = false;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                                                _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        _tf      = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        posStart = _in.FilePointer;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                    {
                        // skip
                    }
                    else
                    {
                        Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) ||
                                     StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END));

                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _nextDocStart = lineStart;
                            _in.Seek(posStart);
                            return(_docId);
                        }
                        return(_docId = NO_MORE_DOCS);
                    }
                }
            }
Ejemplo n.º 16
0
        public override SegmentInfo Read(Directory directory, string segmentName, IOContext context)
        {
            BytesRef scratch     = new BytesRef();
            string   segFileName = IndexFileNames.SegmentFileName(segmentName, "",
                                                                  SimpleTextSegmentInfoFormat.SI_EXTENSION);
            ChecksumIndexInput input = directory.OpenChecksumInput(segFileName, context);
            bool success             = false;

            try
            {
                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SI_VERSION));
                string version = ReadString(SI_VERSION.length, scratch);

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SI_DOCCOUNT));
                int docCount = Convert.ToInt32(ReadString(SI_DOCCOUNT.length, scratch));

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SI_USECOMPOUND));
                bool isCompoundFile = Convert.ToBoolean(ReadString(SI_USECOMPOUND.length, scratch));

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SI_NUM_DIAG));
                int numDiag = Convert.ToInt32(ReadString(SI_NUM_DIAG.length, scratch));
                IDictionary <string, string> diagnostics = new Dictionary <string, string>();

                for (int i = 0; i < numDiag; i++)
                {
                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SI_DIAG_KEY));
                    string key = ReadString(SI_DIAG_KEY.length, scratch);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SI_DIAG_VALUE));
                    string value = ReadString(SI_DIAG_VALUE.length, scratch);
                    diagnostics[key] = value;
                }

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SI_NUM_FILES));
                int numFiles           = Convert.ToInt32(ReadString(SI_NUM_FILES.length, scratch));
                HashSet <string> files = new HashSet <string>();

                for (int i = 0; i < numFiles; i++)
                {
                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SI_FILE));
                    string fileName = ReadString(SI_FILE.length, scratch);
                    files.Add(fileName);
                }

                SimpleTextUtil.CheckFooter(input);

                SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount, isCompoundFile, null,
                                                   diagnostics);
                info.Files = files;
                success    = true;
                return(info);
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
                else
                {
                    input.Close();
                }
            }
        }
Ejemplo n.º 17
0
        public override void VisitDocument(int n, StoredFieldVisitor visitor)
        {
            _input.Seek(_offsets[n]);
            ReadLine();
            Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.NUM));
            var numFields = ParseIntAt(SimpleTextStoredFieldsWriter.NUM.Length);

            for (var i = 0; i < numFields; i++)
            {
                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.FIELD));
                int       fieldNumber = ParseIntAt(SimpleTextStoredFieldsWriter.FIELD.Length);
                FieldInfo fieldInfo   = _fieldInfos.FieldInfo(fieldNumber);
                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.NAME));
                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.TYPE));

                BytesRef type;
                if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_STRING, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
                {
                    type = SimpleTextStoredFieldsWriter.TYPE_STRING;
                }
                else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_BINARY, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
                {
                    type = SimpleTextStoredFieldsWriter.TYPE_BINARY;
                }
                else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_INT, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
                {
                    type = SimpleTextStoredFieldsWriter.TYPE_INT;
                }
                else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_LONG, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
                {
                    type = SimpleTextStoredFieldsWriter.TYPE_LONG;
                }
                else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_FLOAT, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
                {
                    type = SimpleTextStoredFieldsWriter.TYPE_FLOAT;
                }
                else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_DOUBLE, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
                {
                    type = SimpleTextStoredFieldsWriter.TYPE_DOUBLE;
                }
                else
                {
                    throw new Exception("unknown field type");
                }

                switch (visitor.NeedsField(fieldInfo))
                {
                case StoredFieldVisitor.Status.YES:
                    ReadField(type, fieldInfo, visitor);
                    break;

                case StoredFieldVisitor.Status.NO:
                    ReadLine();
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE));
                    break;

                case StoredFieldVisitor.Status.STOP:
                    return;
                }
            }
        }
Ejemplo n.º 18
0
            private void LoadTerms()
            {
                var posIntOutputs = PositiveIntOutputs.Singleton;
                var outputsInner  = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs);
                var outputs       = new PairOutputs <long?, PairOutputs <long?, long?> .Pair>(posIntOutputs, outputsInner);

                // honestly, wtf kind of generic mess is this.
                var b     = new Builder <PairOutputs <long?, PairOutputs <long?, long?> .Pair> .Pair>(FST.INPUT_TYPE.BYTE1, outputs);
                var input = (IndexInput)_outerInstance._input.Clone();

                input.Seek(_termsStart);

                var  lastTerm      = new BytesRef(10);
                long lastDocsStart = -1;
                int  docFreq       = 0;
                long totalTermFreq = 0;
                var  visitedDocs   = new FixedBitSet(_maxDoc);

                var scratchIntsRef = new IntsRef();

                while (true)
                {
                    SimpleTextUtil.ReadLine(input, _scratch);
                    if (_scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq)));
                            _sumTotalTermFreq += totalTermFreq;
                        }
                        break;
                    }

                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        docFreq++;
                        _sumDocFreq++;
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                                                _scratchUtf16);
                        int docId = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        visitedDocs.Set(docId);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                                                _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        totalTermFreq += ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq)));
                        }
                        lastDocsStart = input.FilePointer;
                        int len = _scratch.Length - SimpleTextFieldsWriter.TERM.Length;
                        if (len > lastTerm.Length)
                        {
                            lastTerm.Grow(len);
                        }
                        Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len);
                        lastTerm.Length    = len;
                        docFreq            = 0;
                        _sumTotalTermFreq += totalTermFreq;
                        totalTermFreq      = 0;
                        _termCount++;
                    }
                }
                _docCount = visitedDocs.Cardinality();
                _fst      = b.Finish();
            }
        public override Fields Get(int doc)
        {
            SortedMap <string, SimpleTVTerms> fields = new SortedDictionary <string, SimpleTVTerms>();

            _input.Seek(_offsets[doc]);
            ReadLine();
            Debug.Assert(StringHelper.StartsWith(_scratch, NUMFIELDS));
            int numFields = ParseIntAt(NUMFIELDS.length);

            if (numFields == 0)
            {
                return(null); // no vectors for this doc
            }
            for (int i = 0; i < numFields; i++)
            {
                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, FIELD));
                // skip fieldNumber:
                ParseIntAt(FIELD.length);

                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, FIELDNAME));
                string fieldName = ReadString(FIELDNAME.length, _scratch);

                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, FIELDPOSITIONS));
                bool positions = Convert.ToBoolean(ReadString(FIELDPOSITIONS.length, _scratch));

                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, FIELDOFFSETS));
                bool offsets = Convert.ToBoolean(ReadString(FIELDOFFSETS.length, _scratch));

                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, FIELDPAYLOADS));
                bool payloads = Convert.ToBoolean(ReadString(FIELDPAYLOADS.length, _scratch));

                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, FIELDTERMCOUNT));
                int termCount = ParseIntAt(FIELDTERMCOUNT.length);

                SimpleTVTerms terms = new SimpleTVTerms(offsets, positions, payloads);
                fields.put(fieldName, terms);

                for (int j = 0; j < termCount; j++)
                {
                    ReadLine();
                    Debug.Assert(StringHelper.StartsWith(_scratch, TERMTEXT));
                    BytesRef term       = new BytesRef();
                    int      termLength = _scratch.length - TERMTEXT.length;
                    term.grow(termLength);
                    term.length = termLength;
                    Array.Copy(_scratch.bytes, _scratch.offset + TERMTEXT.length, term.bytes, term.offset, termLength);

                    SimpleTVPostings postings = new SimpleTVPostings();
                    terms.TERMS.put(term, postings);

                    ReadLine();
                    Debug.Assert(StringHelper.StartsWith(_scratch, TERMFREQ));
                    postings.FREQ = ParseIntAt(TERMFREQ.length);

                    if (positions || offsets)
                    {
                        if (positions)
                        {
                            postings.POSITIONS = new int[postings.FREQ];
                            if (payloads)
                            {
                                postings.PAYLOADS = new BytesRef[postings.FREQ];
                            }
                        }

                        if (offsets)
                        {
                            postings.START_OFFSETS = new int[postings.FREQ];
                            postings.END_OFFSETS   = new int[postings.FREQ];
                        }

                        for (int k = 0; k < postings.FREQ; k++)
                        {
                            if (positions)
                            {
                                ReadLine();
                                Debug.Assert(StringHelper.StartsWith(_scratch, POSITION));
                                postings.POSITIONS[k] = ParseIntAt(POSITION.length);
                                if (payloads)
                                {
                                    ReadLine();
                                    Debug.Assert(StringHelper.StartsWith(_scratch, PAYLOAD));
                                    if (_scratch.length - PAYLOAD.length == 0)
                                    {
                                        postings.PAYLOADS[k] = null;
                                    }
                                    else
                                    {
                                        sbyte[] payloadBytes = new sbyte[_scratch.length - PAYLOAD.length];
                                        Array.Copy(_scratch.bytes, _scratch.offset + PAYLOAD.length, payloadBytes, 0,
                                                   payloadBytes.Length);
                                        postings.PAYLOADS[k] = new BytesRef(payloadBytes);
                                    }
                                }
                            }

                            if (offsets)
                            {
                                ReadLine();
                                Debug.Assert(StringHelper.StartsWith(_scratch, STARTOFFSET));
                                postings.START_OFFSETS[k] = ParseIntAt(STARTOFFSET.length);

                                ReadLine();
                                Debug.Assert(StringHelper.StartsWith(_scratch, ENDOFFSET));
                                postings.END_OFFSETS[k] = ParseIntAt(ENDOFFSET.length);
                            }
                        }
                    }
                }
            }
            return(new SimpleTVFields(this, fields));
        }
Ejemplo n.º 20
0
            public override int NextDoc()
            {
                if (_docId == NO_MORE_DOCS)
                {
                    return(_docId);
                }
                bool first    = true;
                int  termFreq = 0;

                while (true)
                {
                    long lineStart = _in.FilePointer;
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _in.Seek(lineStart);
                            if (!_omitTf)
                            {
                                _tf = termFreq;
                            }
                            return(_docId);
                        }
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                                                _scratchUtf16);
                        _docId   = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        termFreq = 0;
                        first    = false;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                                                _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        termFreq = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS))
                    {
                        // skip termFreq++;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                    {
                        // skip
                    }
                    else
                    {
                        Debug.Assert(
                            StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) ||
                            StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END), "scratch=" + _scratch.Utf8ToString());
                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _in.Seek(lineStart);
                            if (!_omitTf)
                            {
                                _tf = termFreq;
                            }
                            return(_docId);
                        }
                        return(_docId = NO_MORE_DOCS);
                    }
                }
            }