public override void Get(int docId, BytesRef result)
            {
                if (docId < 0 || docId >= _outerInstance.MAX_DOC)
                {
                    throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) +
                                                       "; got " + docId);
                }

                _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2) * docId);
                SimpleTextUtil.ReadLine(_input, _scratch);
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH));
                int len;

                try
                {
                    // LUCNENENET: .NET doesn't have a way to specify a pattern with integer, but all of the standard ones are built in.
                    len = int.Parse(Encoding.UTF8.GetString(_scratch.Bytes, _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
                                                            _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length), NumberStyles.Integer, CultureInfo.InvariantCulture);
                }
                catch (FormatException ex)
                {
                    throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex);
                }

                result.Bytes  = new byte[len];
                result.Offset = 0;
                result.Length = len;
                _input.ReadBytes(result.Bytes, 0, len);
            }
            public override void LookupOrd(long ord, BytesRef result)
            {
                if (ord < 0 || ord >= field.NumValues)
                {
                    throw new IndexOutOfRangeException("ord must be 0 .. " + (field.NumValues - 1) + "; got " + ord);
                }

                @in.Seek(field.DataStartFilePointer + ord * (9 + field.Pattern.Length + field.MaxLength));
                SimpleTextUtil.ReadLine(@in, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextDocValuesWriter.LENGTH),
                             "got " + scratch.Utf8ToString() + " in=" + @in);
                int len;

                try
                {
                    len =
                        (int)
                        decoder.parse(scratch.Bytes.SubList(
                                          scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
                                          scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
                }
                catch (ParseException pe)
                {
                    CorruptIndexException e =
                        new CorruptIndexException("failed to parse int length (resource=" + @in + ")");
                    e.initCause(pe);
                    throw e;
                }
                result.Bytes  = new sbyte[len];
                result.Offset = 0;
                result.Length = len;
                @in.ReadBytes(result.Bytes, 0, len);
            }
예제 #3
0
 public static void ReadLine(DataInput input, BytesRef scratch)
 {
     var upto = 0;
     while (true)
     {
         var b = input.ReadSByte();
         if (scratch.Bytes.Length == upto)
         {
             scratch.Grow(1 + upto);
         }
         if (b == ESCAPE)
         {
             scratch.Bytes[upto++] = input.ReadSByte();
         }
         else
         {
             if (b == NEWLINE)
             {
                 break;
             }
             
             scratch.Bytes[upto++] = b;
         }
     }
     scratch.Offset = 0;
     scratch.Length = upto;
 }
예제 #4
0
            public override void LookupOrd(long ord, BytesRef result)
            {
                if (ord < 0 || ord >= _field.NumValues)
                {
                    throw new IndexOutOfRangeException("ord must be 0 .. " + (_field.NumValues - 1) + "; got " + ord);
                }

                _input.Seek(_field.DataStartFilePointer + ord * (9 + _field.Pattern.Length + _field.MaxLength));
                SimpleTextUtil.ReadLine(_input, _scratch);
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH),
                             "got " + _scratch.Utf8ToString() + " in=" + _input);
                int len;

                try
                {
                    len =
                        (int)
                        _decoderFormat.parse(_scratch.Bytes.SubList(
                                                 _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
                                                 _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
                }
                catch (Exception pe)
                {
                    var e = new CorruptIndexException("failed to parse int length (resource=" + _input + ")", pe);
                    throw e;
                }

                result.Bytes  = new byte[len];
                result.Offset = 0;
                result.Length = len;
                _input.ReadBytes(result.Bytes, 0, len);
            }
예제 #5
0
            public override void Get(int docId, BytesRef result)
            {
                if (docId < 0 || docId >= _outerInstance.MAX_DOC)
                {
                    throw new IndexOutOfRangeException("docID must be 0 .. " + (_outerInstance.MAX_DOC - 1) +
                                                       "; got " + docId);
                }

                _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2) * docId);
                SimpleTextUtil.ReadLine(_input, _scratch);
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH));
                int len;

                try
                {
                    len = int.Parse(_scratch.Bytes.SubList(
                                        _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
                                        _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString());
                }
                catch (FormatException ex)
                {
                    throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex);
                }

                result.Bytes  = new byte[len];
                result.Offset = 0;
                result.Length = len;
                _input.ReadBytes(result.Bytes, 0, len);
            }
예제 #6
0
        public static void ReadLine(DataInput input, BytesRef scratch)
        {
            var upto = 0;

            while (true)
            {
                var b = input.ReadByte();
                if (scratch.Bytes.Length == upto)
                {
                    scratch.Grow(1 + upto);
                }
                if (b == ESCAPE)
                {
                    scratch.Bytes[upto++] = input.ReadByte();
                }
                else
                {
                    if (b == NEWLINE)
                    {
                        break;
                    }

                    scratch.Bytes[upto++] = b;
                }
            }
            scratch.Offset = 0;
            scratch.Length = upto;
        }
예제 #7
0
            public override void FinishTerm(BytesRef text, TermStats stats)
            {
                // write term meta data into fst

                var state = _outerInstance._postingsWriter.NewTermState();

                var meta = new FSTTermOutputs.TermData
                {
                    LONGS           = new long[_longsSize],
                    BYTES           = null,
                    DOC_FREQ        = state.DocFreq = stats.DocFreq,
                    TOTAL_TERM_FREQ = state.TotalTermFreq = stats.TotalTermFreq
                };

                _outerInstance._postingsWriter.FinishTerm(state);
                _outerInstance._postingsWriter.EncodeTerm(meta.LONGS, _metaWriter, _fieldInfo, state, true);
                var bytesSize = (int)_metaWriter.FilePointer;

                if (bytesSize > 0)
                {
                    meta.BYTES = new byte[bytesSize];
                    _metaWriter.WriteTo(meta.BYTES, 0);
                    _metaWriter.Reset();
                }
                _builder.Add(Util.ToIntsRef(text, _scratchTerm), meta);
                _numTerms++;
            }
예제 #8
0
        public static void CheckFooter(ChecksumIndexInput input)
        {
            var scratch = new BytesRef();
            var expectedChecksum = string.Format("{0:D}", input.Checksum);
            ReadLine(input, scratch);

            if (StringHelper.StartsWith(scratch, CHECKSUM) == false)
            {
                throw new CorruptIndexException("SimpleText failure: expected checksum line but got " +
                                                scratch.Utf8ToString() + " (resource=" + input + ")");
            }
            var actualChecksum =
                (new BytesRef(scratch.Bytes, CHECKSUM.Length, scratch.Length - CHECKSUM.Length)).Utf8ToString();
            if (!expectedChecksum.Equals(actualChecksum))
            {
                throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " +
                                                expectedChecksum + " (resource=" + input + ")");
            }
            if (input.Length() != input.FilePointer)
            {
                throw new CorruptIndexException(
                    "Unexpected stuff at the end of file, please be careful with your text editor! (resource=" + input +
                    ")");
            }
        }
            public override void LookupOrd(long ord, BytesRef result)
            {
                if (ord < 0 || ord >= _field.NumValues)
                {
                    throw new IndexOutOfRangeException("ord must be 0 .. " + (_field.NumValues - 1) + "; got " + ord);
                }

                _input.Seek(_field.DataStartFilePointer + ord * (9 + _field.Pattern.Length + _field.MaxLength));
                SimpleTextUtil.ReadLine(_input, _scratch);
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH),
                             "got " + _scratch.Utf8ToString() + " in=" + _input);
                int len;

                try
                {
                    // LUCNENENET: .NET doesn't have a way to specify a pattern with integer, but all of the standard ones are built in.
                    len = int.Parse(Encoding.UTF8.GetString(_scratch.Bytes, _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
                                                            _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length), NumberStyles.Integer, CultureInfo.InvariantCulture);
                }
                catch (Exception pe)
                {
                    var e = new CorruptIndexException("failed to parse int length (resource=" + _input + ")", pe);
                    throw e;
                }

                result.Bytes  = new byte[len];
                result.Offset = 0;
                result.Length = len;
                _input.ReadBytes(result.Bytes, 0, len);
            }
예제 #10
0
        public static void CheckFooter(ChecksumIndexInput input)
        {
            var scratch          = new BytesRef();
            var expectedChecksum = string.Format("{0:D}", input.Checksum);

            ReadLine(input, scratch);

            if (StringHelper.StartsWith(scratch, CHECKSUM) == false)
            {
                throw new CorruptIndexException("SimpleText failure: expected checksum line but got " +
                                                scratch.Utf8ToString() + " (resource=" + input + ")");
            }
            var actualChecksum =
                (new BytesRef(scratch.Bytes, CHECKSUM.Length, scratch.Length - CHECKSUM.Length)).Utf8ToString();

            if (!expectedChecksum.Equals(actualChecksum))
            {
                throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " +
                                                expectedChecksum + " (resource=" + input + ")");
            }
            if (input.Length() != input.FilePointer)
            {
                throw new CorruptIndexException(
                          "Unexpected stuff at the end of file, please be careful with your text editor! (resource=" + input +
                          ")");
            }
        }
 public NumericDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
                                                  OneField field, IndexInput @in, BytesRef scratch)
 {
     _outerInstance = outerInstance;
     _field         = field;
     _input         = @in;
     _scratch       = scratch;
 }
예제 #12
0
        private Bits GetBinaryDocsWithField(FieldInfo fieldInfo)
        {
            var field   = FIELDS[fieldInfo.Name];
            var input   = (IndexInput)DATA.Clone();
            var scratch = new BytesRef();

            return(new BitsAnonymousInnerClassHelper2(this, field, input, scratch));
        }
예제 #13
0
 internal BaseTermsEnum(FSTTermsReader.TermsReader outerInstance)
 {
     this.outerInstance = outerInstance;
     this.state         = outerInstance.outerInstance.postingsReader.NewTermState();
     this.bytesReader   = new ByteArrayDataInput();
     this.term_Renamed  = null;
     // NOTE: metadata will only be initialized in child class
 }
예제 #14
0
 public BinaryDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance, OneField field,
                                                 IndexInput input, BytesRef scratch)
 {
     _outerInstance = outerInstance;
     _field         = field;
     _input         = input;
     _scratch       = scratch;
 }
예제 #15
0
        /// <summary>
        /// Add a new position & payload </summary>
        public override void AddPosition(int position, BytesRef payload, int startOffset, int endOffset)
        {
            // if (DEBUG) {
            //   System.out.println("FPW.addPosition pos=" + position + " posBufferUpto=" + posBufferUpto + (fieldHasPayloads ? " payloadByteUpto=" + payloadByteUpto: ""));
            // }
            PosDeltaBuffer[PosBufferUpto] = position - LastPosition;
            if (FieldHasPayloads)
            {
                if (payload == null || payload.Length == 0)
                {
                    // no payload
                    PayloadLengthBuffer[PosBufferUpto] = 0;
                }
                else
                {
                    PayloadLengthBuffer[PosBufferUpto] = payload.Length;
                    if (PayloadByteUpto + payload.Length > PayloadBytes.Length)
                    {
                        PayloadBytes = ArrayUtil.Grow(PayloadBytes, PayloadByteUpto + payload.Length);
                    }
                    Array.Copy(payload.Bytes, payload.Offset, PayloadBytes, PayloadByteUpto, payload.Length);
                    PayloadByteUpto += payload.Length;
                }
            }

            if (FieldHasOffsets)
            {
                Debug.Assert(startOffset >= LastStartOffset);
                Debug.Assert(endOffset >= startOffset);
                OffsetStartDeltaBuffer[PosBufferUpto] = startOffset - LastStartOffset;
                OffsetLengthBuffer[PosBufferUpto]     = endOffset - startOffset;
                LastStartOffset = startOffset;
            }

            PosBufferUpto++;
            LastPosition = position;
            if (PosBufferUpto == Lucene41PostingsFormat.BLOCK_SIZE)
            {
                // if (DEBUG) {
                //   System.out.println("  write pos bulk block @ fp=" + posOut.getFilePointer());
                // }
                ForUtil.WriteBlock(PosDeltaBuffer, Encoded, PosOut);

                if (FieldHasPayloads)
                {
                    ForUtil.WriteBlock(PayloadLengthBuffer, Encoded, PayOut);
                    PayOut.WriteVInt(PayloadByteUpto);
                    PayOut.WriteBytes(PayloadBytes, 0, PayloadByteUpto);
                    PayloadByteUpto = 0;
                }
                if (FieldHasOffsets)
                {
                    ForUtil.WriteBlock(OffsetStartDeltaBuffer, Encoded, PayOut);
                    ForUtil.WriteBlock(OffsetLengthBuffer, Encoded, PayOut);
                }
                PosBufferUpto = 0;
            }
        }
예제 #16
0
 public override void SeekExact(BytesRef target, TermState otherState)
 {
     if (!target.Equals(term_Renamed))
     {
         state.CopyFrom(otherState);
         term_Renamed = BytesRef.DeepCopyOf(target);
         seekPending  = true;
     }
 }
예제 #17
0
        public override void StartTerm(BytesRef term, int freq)
        {
            Write(TERMTEXT);
            Write(term);
            NewLine();

            Write(TERMFREQ);
            Write(Convert.ToString(freq));
            NewLine();
        }
예제 #18
0
 public SortedDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
                                                 OneField field, IndexInput input, BytesRef scratch)
 {
     _outerInstance    = outerInstance;
     _field            = field;
     _input            = input;
     _scratch          = scratch;
     _decoderFormat    = field.Pattern;
     _ordDecoderFormat = field.OrdPattern;
 }
예제 #19
0
        public override BinaryDocValues GetBinary(FieldInfo fieldInfo)
        {
            var field = FIELDS[fieldInfo.Name];

            Debug.Assert(field != null);
            var input   = (IndexInput)DATA.Clone();
            var scratch = new BytesRef();

            return(new BinaryDocValuesAnonymousInnerClassHelper(this, field, input, scratch));
        }
예제 #20
0
        public override void StartTerm(BytesRef term, int freq)
        {
            Write(TERMTEXT);
            Write(term);
            NewLine();

            Write(TERMFREQ);
            Write(Convert.ToString(freq, CultureInfo.InvariantCulture));
            NewLine();
        }
 public SortedDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
                                                 Lucene.Net.Codecs.SimpleText.SimpleTextDocValuesReader.OneField field, IndexInput @in, BytesRef scratch,
                                                 DecimalFormat decoder, DecimalFormat ordDecoder)
 {
     this.outerInstance = outerInstance;
     this.field         = field;
     this.@in           = @in;
     this.scratch       = scratch;
     this.decoder       = decoder;
     this.ordDecoder    = ordDecoder;
 }
예제 #22
0
        public static void WriteChecksum(IndexOutput output, BytesRef scratch)
        {
            // Pad with zeros so different checksum values use the
            // same number of bytes
            // (BaseIndexFileFormatTestCase.testMergeStability cares):
            var checksum = string.Format("{0:D}", output.Checksum);

            Write(output, CHECKSUM);
            Write(output, checksum, scratch);
            WriteNewline(output);
        }
예제 #23
0
 public SortedSetDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
                                                    OneField field, IndexInput input, BytesRef scratch)
 {
     _outerInstance = outerInstance;
     _field         = field;
     _input         = input;
     _scratch       = scratch;
     _decoderFormat = field.Pattern;
     _currentOrds   = new string[0];
     _currentIndex  = 0;
 }
예제 #24
0
        public override SortedDocValues GetSorted(FieldInfo fieldInfo)
        {
            var field = FIELDS[fieldInfo.Name];

            // SegmentCoreReaders already verifies this field is valid:
            Debug.Assert(field != null);
            var input   = (IndexInput)DATA.Clone();
            var scratch = new BytesRef();

            return(new SortedDocValuesAnonymousInnerClassHelper(this, field, input, scratch));
        }
예제 #25
0
 public override SeekStatus SeekCeil(BytesRef target)
 {
     UpdateEnum(fstEnum.SeekCeil(target));
     if (term_Renamed == null)
     {
         return(SeekStatus.END);
     }
     else
     {
         return(term_Renamed.Equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND);
     }
 }
예제 #26
0
 public static void Write(DataOutput output, BytesRef b)
 {
     for (var i = 0; i < b.Length; i++)
     {
         var bx = b.Bytes[b.Offset + i];
         if (bx == NEWLINE || bx == ESCAPE)
         {
             output.WriteByte(ESCAPE);
         }
         output.WriteByte(bx);
     }
 }
 public SortedSetDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
                                                    Lucene.Net.Codecs.SimpleText.SimpleTextDocValuesReader.OneField field, IndexInput @in, BytesRef scratch,
                                                    DecimalFormat decoder)
 {
     this.outerInstance = outerInstance;
     this.field         = field;
     this.@in           = @in;
     this.scratch       = scratch;
     this.decoder       = decoder;
     currentOrds        = new string[0];
     currentIndex       = 0;
 }
예제 #28
0
 public static void Write(DataOutput output, BytesRef b)
 {
     for (var i = 0; i < b.Length; i++)
     {
         var bx = b.Bytes[b.Offset + i];
         if (bx == NEWLINE || bx == ESCAPE)
         {
             output.WriteByte(ESCAPE);
         }
         output.WriteByte(bx);
     }
 }
예제 #29
0
            public override int NextPosition()
            {
                int pos;

                if (_readPositions)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), "got line=" + _scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length,
                                            _scratchUtf162);
                    pos = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }
                else
                {
                    pos = -1;
                }

                if (_readOffsets)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), "got line=" + _scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length,
                                            _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162);
                    _startOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), "got line=" + _scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length,
                                            _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162);
                    _endOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }

                long fp = _in.FilePointer;

                SimpleTextUtil.ReadLine(_in, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                {
                    int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length;
                    if (_scratch2.Bytes.Length < len)
                    {
                        _scratch2.Grow(len);
                    }
                    Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len);
                    _scratch2.Length = len;
                    _payload         = _scratch2;
                }
                else
                {
                    _payload = null;
                    _in.Seek(fp);
                }
                return(pos);
            }
 public override SeekStatus SeekCeil(BytesRef text)
 {
     _iterator = terms.TailMap(text).entrySet().GetEnumerator();
     //JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
     if (!_iterator.HasNext())
     {
         return(SeekStatus.END);
     }
     else
     {
         return(Next().Equals(text) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND);
     }
 }
예제 #31
0
 public override SeekStatus SeekCeil(BytesRef target)
 {
     decoded      = false;
     term_Renamed = DoSeekCeil(target);
     LoadMetaData();
     if (term_Renamed == null)
     {
         return(SeekStatus.END);
     }
     else
     {
         return(term_Renamed.Equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND);
     }
 }
예제 #32
0
        public override NumericDocValues GetNumeric(FieldInfo fieldInfo)
        {
            var field = FIELDS[fieldInfo.Name];

            Debug.Assert(field != null);

            // SegmentCoreReaders already verifies this field is valid:
            Debug.Assert(field != null, "field=" + fieldInfo.Name + " fields=" + FIELDS);

            var @in     = (IndexInput)DATA.Clone();
            var scratch = new BytesRef();

            return(new NumericDocValuesAnonymousInnerClassHelper(this, field, @in, scratch));
        }
        public override SortedSetDocValues GetSortedSet(FieldInfo fieldInfo)
        {
            OneField field = FIELDS[fieldInfo.Name];

            // SegmentCoreReaders already verifies this field is
            // valid:
            Debug.Assert(field != null);

            IndexInput    @in     = (IndexInput)DATA.Clone();
            BytesRef      scratch = new BytesRef();
            DecimalFormat decoder = new DecimalFormat(field.Pattern, new DecimalFormatSymbols(Locale.ROOT));

            return(new SortedSetDocValuesAnonymousInnerClassHelper(this, field, @in, scratch, decoder));
        }
        public override Bits ReadLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context)
        {
            Debug.Assert(info.HasDeletions());
            var scratch = new BytesRef();
            var scratchUtf16 = new CharsRef();

            var fileName = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.DelGen);
            ChecksumIndexInput input = null;
            var success = false;

            try
            {
                input = dir.OpenChecksumInput(fileName, context);

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SIZE));
                var size = ParseIntAt(scratch, SIZE.Length, scratchUtf16);

                var bits = new BitArray(size);

                SimpleTextUtil.ReadLine(input, scratch);
                while (!scratch.Equals(END))
                {
                    Debug.Assert(StringHelper.StartsWith(scratch, DOC));
                    var docid = ParseIntAt(scratch, DOC.Length, scratchUtf16);
                    bits.SafeSet(docid, true);
                    SimpleTextUtil.ReadLine(input, scratch);
                }

                SimpleTextUtil.CheckFooter(input);

                success = true;
                return new SimpleTextBits(bits, size);
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(input);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
            }
        }
예제 #35
0
 public static void Write(DataOutput output, string s, BytesRef scratch)
 {
     UnicodeUtil.UTF16toUTF8(s.ToCharArray(), 0, s.Length, scratch);
     Write(output, scratch);
 }
 public void Reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets,
     BytesRef[] payloads)
 {
     _liveDocs = liveDocs;
     _positions = positions;
     _startOffsets = startOffsets;
     _endOffsets = endOffsets;
     _payloads = payloads;
     _doc = -1;
     _didNext = false;
     _nextPos = 0;
 }
        public override Fields Get(int doc)
        {
            var fields = new SortedDictionary<string, SimpleTVTerms>();

            _input.Seek(_offsets[doc]);
            ReadLine();
            Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.NUMFIELDS));
            var numFields = ParseIntAt(SimpleTextTermVectorsWriter.NUMFIELDS.Length);
            if (numFields == 0)
            {
                return null; // no vectors for this doc
            }
            for (var i = 0; i < numFields; i++)
            {
                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELD));
                // skip fieldNumber:
                ParseIntAt(SimpleTextTermVectorsWriter.FIELD.Length);

                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDNAME));
                var fieldName = ReadString(SimpleTextTermVectorsWriter.FIELDNAME.Length, _scratch);

                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDPOSITIONS));
                var positions = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDPOSITIONS.Length, _scratch));

                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDOFFSETS));
                var offsets = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDOFFSETS.Length, _scratch));

                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDPAYLOADS));
                var payloads = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDPAYLOADS.Length, _scratch));

                ReadLine();
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDTERMCOUNT));
                var termCount = ParseIntAt(SimpleTextTermVectorsWriter.FIELDTERMCOUNT.Length);

                var terms = new SimpleTVTerms(offsets, positions, payloads);
                fields.Add(fieldName, terms);

                for (var j = 0; j < termCount; j++)
                {
                    ReadLine();
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.TERMTEXT));
                    var term = new BytesRef();
                    var termLength = _scratch.Length - SimpleTextTermVectorsWriter.TERMTEXT.Length;
                    term.Grow(termLength);
                    term.Length = termLength;
                    Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextTermVectorsWriter.TERMTEXT.Length, term.Bytes, term.Offset, termLength);

                    var postings = new SimpleTVPostings();
                    terms.TERMS.Add(term, postings);

                    ReadLine();
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.TERMFREQ));
                    postings.FREQ = ParseIntAt(SimpleTextTermVectorsWriter.TERMFREQ.Length);

                    if (!positions && !offsets) continue;

                    if (positions)
                    {
                        postings.POSITIONS = new int[postings.FREQ];
                        if (payloads)
                        {
                            postings.PAYLOADS = new BytesRef[postings.FREQ];
                        }
                    }

                    if (offsets)
                    {
                        postings.START_OFFSETS = new int[postings.FREQ];
                        postings.END_OFFSETS = new int[postings.FREQ];
                    }

                    for (var k = 0; k < postings.FREQ; k++)
                    {
                        if (positions)
                        {
                            ReadLine();
                            Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.POSITION));
                            postings.POSITIONS[k] = ParseIntAt(SimpleTextTermVectorsWriter.POSITION.Length);
                            if (payloads)
                            {
                                ReadLine();
                                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.PAYLOAD));
                                if (_scratch.Length - SimpleTextTermVectorsWriter.PAYLOAD.Length == 0)
                                {
                                    postings.PAYLOADS[k] = null;
                                }
                                else
                                {
                                    var payloadBytes = new byte[_scratch.Length - SimpleTextTermVectorsWriter.PAYLOAD.Length];
                                    Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextTermVectorsWriter.PAYLOAD.Length, payloadBytes, 0,
                                        payloadBytes.Length);
                                    postings.PAYLOADS[k] = new BytesRef(payloadBytes);
                                }
                            }
                        }

                        if (!offsets) continue;

                        ReadLine();
                        Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.STARTOFFSET));
                        postings.START_OFFSETS[k] = ParseIntAt(SimpleTextTermVectorsWriter.STARTOFFSET.Length);

                        ReadLine();
                        Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.ENDOFFSET));
                        postings.END_OFFSETS[k] = ParseIntAt(SimpleTextTermVectorsWriter.ENDOFFSET.Length);
                    }
                }
            }
            return new SimpleTVFields(this, fields);
        }
        private void ReadField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor)
        {
            ReadLine();
            Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE));
            if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_STRING))
            {
                visitor.StringField(fieldInfo,
                    _scratch.Bytes.SubList(_scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length,
                        _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length).ToString());

                   // new string(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                   //     Encoding.UTF8));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_BINARY))
            {
                var copy = new sbyte[_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length];
                Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, copy, 0, copy.Length);
                visitor.BinaryField(fieldInfo, copy);
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_INT))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                    _scratchUtf16);
                visitor.IntField(fieldInfo, Convert.ToInt32(_scratchUtf16.ToString()));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_LONG))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                    _scratchUtf16);
                visitor.LongField(fieldInfo, Convert.ToInt64(_scratchUtf16.ToString()));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_FLOAT))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                    _scratchUtf16);
                visitor.FloatField(fieldInfo, Convert.ToSingle(_scratchUtf16.ToString()));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_DOUBLE))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                    _scratchUtf16);
                visitor.DoubleField(fieldInfo, Convert.ToDouble(_scratchUtf16.ToString()));
            }
        }
        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos,
            IOContext context)
        {
            var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION);
            var output = directory.CreateOutput(fileName, context);
            var scratch = new BytesRef();
            var success = false;

            try
            {
                SimpleTextUtil.Write(output, NUMFIELDS);
                SimpleTextUtil.Write(output, Convert.ToString(infos.Size()), scratch);
                SimpleTextUtil.WriteNewline(output);

                foreach (FieldInfo fi in infos)
                {
                    SimpleTextUtil.Write(output, NAME);
                    SimpleTextUtil.Write(output, fi.Name, scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, NUMBER);
                    SimpleTextUtil.Write(output, Convert.ToString(fi.Number), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, ISINDEXED);
                    SimpleTextUtil.Write(output, Convert.ToString(fi.Indexed), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    if (fi.Indexed)
                    {
                        Debug.Assert(fi.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads());
                        SimpleTextUtil.Write(output, INDEXOPTIONS);
                        SimpleTextUtil.Write(output, fi.FieldIndexOptions.ToString(), scratch);
                        SimpleTextUtil.WriteNewline(output);
                    }

                    SimpleTextUtil.Write(output, STORETV);
                    SimpleTextUtil.Write(output, Convert.ToString(fi.HasVectors()), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, PAYLOADS);
                    SimpleTextUtil.Write(output, Convert.ToString(fi.HasPayloads()), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, NORMS);
                    SimpleTextUtil.Write(output, Convert.ToString(!fi.OmitsNorms()), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, NORMS_TYPE);
                    SimpleTextUtil.Write(output, GetDocValuesType(fi.NormType), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, DOCVALUES);
                    SimpleTextUtil.Write(output, GetDocValuesType(fi.DocValuesType), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    SimpleTextUtil.Write(output, DOCVALUES_GEN);
                    SimpleTextUtil.Write(output, Convert.ToString(fi.DocValuesGen), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    IDictionary<string, string> atts = fi.Attributes();
                    int numAtts = atts == null ? 0 : atts.Count;
                    SimpleTextUtil.Write(output, NUM_ATTS);
                    SimpleTextUtil.Write(output, Convert.ToString(numAtts), scratch);
                    SimpleTextUtil.WriteNewline(output);

                    if (numAtts <= 0 || atts == null) continue;
                    foreach (var entry in atts)
                    {
                        SimpleTextUtil.Write(output, ATT_KEY);
                        SimpleTextUtil.Write(output, entry.Key, scratch);
                        SimpleTextUtil.WriteNewline(output);

                        SimpleTextUtil.Write(output, ATT_VALUE);
                        SimpleTextUtil.Write(output, entry.Value, scratch);
                        SimpleTextUtil.WriteNewline(output);
                    }
                }
                SimpleTextUtil.WriteChecksum(output, scratch);
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(output);
                }
            }
        }
        public override SegmentInfo Read(Directory directory, string segmentName, IOContext context)
        {
            var scratch = new BytesRef();
            string segFileName = IndexFileNames.SegmentFileName(segmentName, "",
                SimpleTextSegmentInfoFormat.SI_EXTENSION);
            ChecksumIndexInput input = directory.OpenChecksumInput(segFileName, context);
            bool success = false;
            try
            {
                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_VERSION));
                string version = ReadString(SimpleTextSegmentInfoWriter.SI_VERSION.Length, scratch);

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DOCCOUNT));
                int docCount = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_DOCCOUNT.Length, scratch));

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_USECOMPOUND));
                bool isCompoundFile = Convert.ToBoolean(ReadString(SimpleTextSegmentInfoWriter.SI_USECOMPOUND.Length, scratch));

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_DIAG));
                int numDiag = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_DIAG.Length, scratch));
                IDictionary<string, string> diagnostics = new Dictionary<string, string>();

                for (int i = 0; i < numDiag; i++)
                {
                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_KEY));
                    string key = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_KEY.Length, scratch);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_VALUE));
                    string value = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_VALUE.Length, scratch);
                    diagnostics[key] = value;
                }

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_FILES));
                int numFiles = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_FILES.Length, scratch));
                var files = new HashSet<string>();

                for (int i = 0; i < numFiles; i++)
                {
                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_FILE));
                    string fileName = ReadString(SimpleTextSegmentInfoWriter.SI_FILE.Length, scratch);
                    files.Add(fileName);
                }

                SimpleTextUtil.CheckFooter(input);

                var info = new SegmentInfo(directory, version, segmentName, docCount, isCompoundFile, null,
                    diagnostics) {Files = files};
                success = true;
                return info;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
                else
                {
                    input.Dispose();
                }
            }
        }
예제 #41
0
            private void LoadTerms()
            {
                var posIntOutputs = PositiveIntOutputs.Singleton;
                var outputsInner = new PairOutputs<long?, long?>(posIntOutputs, posIntOutputs);
                var outputs = new PairOutputs<long?, PairOutputs<long?,long?>.Pair>(posIntOutputs, outputsInner);

                // honestly, wtf kind of generic mess is this.
                var b = new Builder<PairOutputs<long?, PairOutputs<long?,long?>.Pair>.Pair>(FST.INPUT_TYPE.BYTE1, outputs);
                var input = (IndexInput) _outerInstance._input.Clone();
                input.Seek(_termsStart);

                var lastTerm = new BytesRef(10);
                long lastDocsStart = -1;
                int docFreq = 0;
                long totalTermFreq = 0;
                var visitedDocs = new FixedBitSet(_maxDoc);

                var scratchIntsRef = new IntsRef();
                while (true)
                {
                    SimpleTextUtil.ReadLine(input, _scratch);
                    if (_scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef),
                                outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq)));
                            _sumTotalTermFreq += totalTermFreq;
                        }
                        break;
                    }

                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        docFreq++;
                        _sumDocFreq++;
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                            _scratchUtf16);
                        int docId = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        visitedDocs.Set(docId);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                            _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        totalTermFreq += ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef),
                                outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq)));
                        }
                        lastDocsStart = input.FilePointer;
                        int len = _scratch.Length - SimpleTextFieldsWriter.TERM.Length;
                        if (len > lastTerm.Length)
                        {
                            lastTerm.Grow(len);
                        }
                        Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len);
                        lastTerm.Length = len;
                        docFreq = 0;
                        _sumTotalTermFreq += totalTermFreq;
                        totalTermFreq = 0;
                        _termCount++;
                    }
                }
                _docCount = visitedDocs.Cardinality();
                _fst = b.Finish();
            }
예제 #42
0
            public override int NextPosition()
            {
                int pos;
                if (_readPositions)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), "got line=" + _scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length,
                        _scratchUtf162);
                    pos = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }
                else
                {
                    pos = -1;
                }

                if (_readOffsets)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), "got line=" + _scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length,
                        _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162);
                    _startOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), "got line=" + _scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length,
                        _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162);
                    _endOffset = ArrayUtil.ParseInt(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }

                long fp = _in.FilePointer;
                SimpleTextUtil.ReadLine(_in, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                {
                    int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length;
                    if (_scratch2.Bytes.Length < len)
                    {
                        _scratch2.Grow(len);
                    }
                    Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len);
                    _scratch2.Length = len;
                    _payload = _scratch2;
                }
                else
                {
                    _payload = null;
                    _in.Seek(fp);
                }
                return pos;
            }
예제 #43
0
        private SortedDictionary<string, long?> ReadFields(IndexInput @in)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(@in);
            var scratch = new BytesRef(10);
            var fields = new SortedDictionary<string, long?>();

            while (true)
            {
                SimpleTextUtil.ReadLine(input, scratch);
                if (scratch.Equals(SimpleTextFieldsWriter.END))
                {
                    SimpleTextUtil.CheckFooter(input);
                    return fields;
                }

                if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD))
                {
                    var fieldName = scratch.Bytes.SubList(scratch.Offset + SimpleTextFieldsWriter.FIELD.Length,
                        scratch.Length - SimpleTextFieldsWriter.FIELD.Length).ToString();
                    fields[fieldName] = input.FilePointer;
                }
            }
        }
예제 #44
0
 public static void WriteChecksum(IndexOutput output, BytesRef scratch)
 {
     // Pad with zeros so different checksum values use the
     // same number of bytes
     // (BaseIndexFileFormatTestCase.testMergeStability cares):
     var checksum = string.Format("{0:D}", output.Checksum);
     Write(output, CHECKSUM);
     Write(output, checksum, scratch);
     WriteNewline(output);
 }
 private void Write(BytesRef bytes)
 {
     SimpleTextUtil.Write(_output, bytes);
 }
        public override void StartTerm(BytesRef term, int freq)
        {
            Write(TERMTEXT);
            Write(term);
            NewLine();

            Write(TERMFREQ);
            Write(Convert.ToString(freq, CultureInfo.InvariantCulture));
            NewLine();
        }
        public override void AddPosition(int position, int startOffset, int endOffset, BytesRef payload)
        {
            Debug.Assert(_positions || _offsets);

            if (_positions)
            {
                Write(POSITION);
                Write(Convert.ToString(position, CultureInfo.InvariantCulture));
                NewLine();

                if (_payloads)
                {
                    Write(PAYLOAD);
                    if (payload != null)
                    {
                        Debug.Assert(payload.Length > 0);
                        Write(payload);
                    }
                    NewLine();
                }
            }

            if (_offsets)
            {
                Write(STARTOFFSET);
                Write(Convert.ToString(startOffset, CultureInfo.InvariantCulture));
                NewLine();

                Write(ENDOFFSET);
                Write(Convert.ToString(endOffset, CultureInfo.InvariantCulture));
                NewLine();
            }
        }
        public override void WriteLiveDocs(MutableBits bits, Directory dir, SegmentCommitInfo info, int newDelCount,
            IOContext context)
        {
            var set = ((SimpleTextBits) bits).BITS;
            var size = bits.Length();
            var scratch = new BytesRef();

            var fileName = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.NextDelGen);
            IndexOutput output = null;
            var success = false;
            try
            {
                output = dir.CreateOutput(fileName, context);
                SimpleTextUtil.Write(output, SIZE);
                SimpleTextUtil.Write(output, Convert.ToString(size), scratch);
                SimpleTextUtil.WriteNewline(output);

                for (int i = set.NextSetBit(0); i >= 0; i = set.NextSetBit(i + 1))
                {
                    SimpleTextUtil.Write(output, DOC);
                    SimpleTextUtil.Write(output, Convert.ToString(i), scratch);
                    SimpleTextUtil.WriteNewline(output);
                }

                SimpleTextUtil.Write(output, END);
                SimpleTextUtil.WriteNewline(output);
                SimpleTextUtil.WriteChecksum(output, scratch);
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(output);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(output);
                }
            }
        }
 private static int ParseIntAt(BytesRef bytes, int offset, CharsRef scratch)
 {
     UnicodeUtil.UTF8toUTF16(bytes.Bytes, bytes.Offset + offset, bytes.Length - offset, scratch);
     return ArrayUtil.ParseInt(scratch.Chars, 0, scratch.Length);
 }
예제 #50
0
            public override SeekStatus SeekCeil(BytesRef text)
            {
                var result = _fstEnum.SeekCeil(text);
                if (result == null)
                    return SeekStatus.END;

                var pair1 = result.Output;
                var pair2 = pair1.Output2;
                _docsStart = pair1.Output1.Value;
                _docFreq = (int) pair2.Output1;
                _totalTermFreq = pair2.Output2.Value;

                return result.Input.Equals(text) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
            }
 private static string ReadString(int offset, BytesRef scratch)
 {
     return scratch.Bytes.SubList(scratch.Offset + offset, scratch.Length - offset).ToString();
     //return new string(scratch.Bytes, scratch.Offset + offset, scratch.Length - offset, StandardCharsets.UTF_8);
 }
예제 #52
0
            public override bool SeekExact(BytesRef text)
            {
                var result = _fstEnum.SeekExact(text);

                if (result == null) return false;

                var pair1 = result.Output;
                var pair2 = pair1.Output2;
                _docsStart = pair1.Output1.Value;
                _docFreq = (int) pair2.Output1;
                _totalTermFreq = pair2.Output2.Value;
                return true;
            }
            public override void LookupOrd(long ord, BytesRef result)
            {
                if (ord < 0 || ord >= _field.NumValues)
                {
                    throw new IndexOutOfRangeException("ord must be 0 .. " + (_field.NumValues - 1) + "; got " + ord);
                }

                _input.Seek(_field.DataStartFilePointer + ord * (9 + _field.Pattern.Length + _field.MaxLength));
                SimpleTextUtil.ReadLine(_input, _scratch);
                Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH),
                    "got " + _scratch.Utf8ToString() + " in=" + _input);
                int len;
                try
                {
                    // LUCNENENET: .NET doesn't have a way to specify a pattern with integer, but all of the standard ones are built in.
                    len = int.Parse(Encoding.UTF8.GetString(_scratch.Bytes, _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length,
                        _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length), NumberStyles.Integer, CultureInfo.InvariantCulture);
                }
                catch (Exception pe)
                {
                    var e = new CorruptIndexException("failed to parse int length (resource=" + _input + ")", pe);
                    throw e;
                }

                result.Bytes = new byte[len];
                result.Offset = 0;
                result.Length = len;
                _input.ReadBytes(result.Bytes, 0, len);
            }
 public SortedDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
     OneField field, IndexInput input, BytesRef scratch)
 {
     _outerInstance = outerInstance;
     _field = field;
     _input = input;
     _scratch = scratch;
     _decoderFormat = field.Pattern;
     _ordDecoderFormat = field.OrdPattern;
 }
 private bool EqualsAt(BytesRef a, BytesRef b, int bOffset)
 {
     return a.Length == b.Length - bOffset &&
            ArrayUtil.Equals(a.Bytes, a.Offset, b.Bytes, b.Offset + bOffset, b.Length - bOffset);
 }
 private string ReadString(int offset, BytesRef scratch)
 {
     UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + offset, scratch.Length - offset, _scratchUtf16);
     return _scratchUtf16.ToString();
 }
 public SortedSetDocValuesAnonymousInnerClassHelper(SimpleTextDocValuesReader outerInstance,
     OneField field, IndexInput input, BytesRef scratch)
 {
     _outerInstance = outerInstance;
     _field = field;
     _input = input;
     _scratch = scratch;
     _decoderFormat = field.Pattern;
     _currentOrds = new string[0];
     _currentIndex = 0;
 }
예제 #58
0
            public override void FinishTerm(BytesRef text, TermStats stats)
            {
                // write term meta data into fst

                var state = _outerInstance._postingsWriter.NewTermState();

                var meta = new FSTTermOutputs.TermData
                {
                    LONGS = new long[_longsSize],
                    BYTES = null,
                    DOC_FREQ = state.DocFreq = stats.DocFreq,
                    TOTAL_TERM_FREQ = state.TotalTermFreq = stats.TotalTermFreq
                };
                _outerInstance._postingsWriter.FinishTerm(state);
                _outerInstance._postingsWriter.EncodeTerm(meta.LONGS, _metaWriter, _fieldInfo, state, true);
                var bytesSize = (int) _metaWriter.FilePointer;
                if (bytesSize > 0)
                {
                    meta.BYTES = new byte[bytesSize];
                    _metaWriter.WriteTo(meta.BYTES, 0);
                    _metaWriter.Reset();
                }
                _builder.Add(Util.ToIntsRef(text, _scratchTerm), meta);
                _numTerms++;
            }
예제 #59
0
 public override PostingsConsumer StartTerm(BytesRef text)
 {
     _outerInstance._postingsWriter.StartTerm();
     return _outerInstance._postingsWriter;
 }
            public override SeekStatus SeekCeil(BytesRef text)
            {
                var newTerms = new SortedDictionary<BytesRef, SimpleTVPostings>();
                foreach (var p in _terms.Where(p => p.Key.CompareTo(text) >= 0))
                    newTerms.Add(p.Key, p.Value);

                _iterator = newTerms.EntrySet().GetEnumerator();

                try
                {
                    _iterator.MoveNext();
                    return _iterator.Current.Key.Equals(text) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
                }
                catch
                {
                    return SeekStatus.END;
                }
            }