Ejemplo n.º 1
0
        public override void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads)
        {
            Write(FIELD);
            Write(Convert.ToString(info.Number));
            NewLine();

            Write(FIELDNAME);
            Write(info.Name);
            NewLine();

            Write(FIELDPOSITIONS);
            Write(Convert.ToString(positions));
            NewLine();

            Write(FIELDOFFSETS);
            Write(Convert.ToString(offsets));
            NewLine();

            Write(FIELDPAYLOADS);
            Write(Convert.ToString(payloads));
            NewLine();

            Write(FIELDTERMCOUNT);
            Write(Convert.ToString(numTerms));
            NewLine();

            _positions = positions;
            _offsets   = offsets;
            _payloads  = payloads;
        }
Ejemplo n.º 2
0
 public override int SetField(FieldInfo fieldInfo)
 {
     FieldInfo.IndexOptions?indexOptions = fieldInfo.FieldIndexOptions;
     FieldHasFreqs     = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS;
     FieldHasPositions = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
     FieldHasOffsets   = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
     FieldHasPayloads  = fieldInfo.HasPayloads();
     SkipWriter.SetField(FieldHasPositions, FieldHasOffsets, FieldHasPayloads);
     LastState = EmptyState;
     if (FieldHasPositions)
     {
         if (FieldHasPayloads || FieldHasOffsets)
         {
             return(3); // doc + pos + pay FP
         }
         else
         {
             return(2); // doc + pos FP
         }
     }
     else
     {
         return(1); // doc FP
     }
 }
Ejemplo n.º 3
0
        public override void EncodeTerm(long[] longs, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute)
        {
            IntBlockTermState state = (IntBlockTermState)_state;

            if (absolute)
            {
                LastState = EmptyState;
            }
            longs[0] = state.DocStartFP - LastState.DocStartFP;
            if (FieldHasPositions)
            {
                longs[1] = state.PosStartFP - LastState.PosStartFP;
                if (FieldHasPayloads || FieldHasOffsets)
                {
                    longs[2] = state.PayStartFP - LastState.PayStartFP;
                }
            }
            if (state.SingletonDocID != -1)
            {
                @out.WriteVInt(state.SingletonDocID);
            }
            if (FieldHasPositions)
            {
                if (state.LastPosBlockOffset != -1)
                {
                    @out.WriteVLong(state.LastPosBlockOffset);
                }
            }
            if (state.SkipOffset != -1)
            {
                @out.WriteVLong(state.SkipOffset);
            }
            LastState = state;
        }
        public override void AddNumericField(FieldInfo field, IEnumerable<long> values)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.NUMERIC ||
                         field.NormType == FieldInfo.DocValuesType_e.NUMERIC);
            WriteFieldEntry(field, FieldInfo.DocValuesType_e.NUMERIC);

            // first pass to find min/max
            var minValue = long.MaxValue;
            var maxValue = long.MinValue;
            foreach (var n in values)
            {
                var v = n;
                minValue = Math.Min(minValue, v);
                maxValue = Math.Max(maxValue, v);
            }

            // write our minimum value to the .dat, all entries are deltas from that
            SimpleTextUtil.Write(data, MINVALUE);
            SimpleTextUtil.Write(data, Convert.ToString(minValue), scratch);
            SimpleTextUtil.WriteNewline(data);

            // build up our fixed-width "simple text packed ints" format
            System.Numerics.BigInteger maxBig = maxValue;
            System.Numerics.BigInteger minBig = minValue;
            var diffBig = maxBig - minBig;

            var maxBytesPerValue = diffBig.ToString().Length;
            var sb = new StringBuilder();
            for (var i = 0; i < maxBytesPerValue; i++)
                sb.Append('0');
         
            // write our pattern to the .dat
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            var patternString = sb.ToString();
            
            int numDocsWritten = 0;

            // second pass to write the values
            foreach (var value in values)
            {
                Debug.Assert(value >= minValue);

                var delta = value - minValue;
                string s = delta.ToString(patternString);
                Debug.Assert(s.Length == patternString.Length);
                SimpleTextUtil.Write(data, s, scratch);
                SimpleTextUtil.WriteNewline(data);
                SimpleTextUtil.Write(data, n == null ? "F" : "T", scratch);
                SimpleTextUtil.WriteNewline(data);
                numDocsWritten++;
                Debug.Assert(numDocsWritten <= numDocs);
            }

            Debug.Assert(numDocs == numDocsWritten, "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten);
        }
Ejemplo n.º 5
0
        private Bits GetBinaryDocsWithField(FieldInfo fieldInfo)
        {
            var field   = FIELDS[fieldInfo.Name];
            var input   = (IndexInput)DATA.Clone();
            var scratch = new BytesRef();

            return(new BitsAnonymousInnerClassHelper2(this, field, input, scratch));
        }
Ejemplo n.º 6
0
 public SimpleTextTerms(SimpleTextFieldsReader outerInstance, string field, long termsStart, int maxDoc)
 {
     _outerInstance = outerInstance;
     _maxDoc        = maxDoc;
     _termsStart    = termsStart;
     _fieldInfo     = outerInstance._fieldInfos.FieldInfo(field);
     LoadTerms();
 }
Ejemplo n.º 7
0
        public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd)
        {
            // write the ordinals as numerics
            AddNumericField(field, docToOrd, false);

            // write the values as FST
            WriteFST(field, values);
        }
Ejemplo n.º 8
0
        // note: this might not be the most efficient... but its fairly simple
        public override void AddSortedSetField(FieldInfo field, IEnumerable <BytesRef> values,
                                               IEnumerable <long?> docToOrdCount, IEnumerable <long?> ords)
        {
            // write the ordinals as a binary field
            AddBinaryField(field, new IterableAnonymousInnerClassHelper(this, docToOrdCount, ords));

            // write the values as FST
            WriteFST(field, values);
        }
Ejemplo n.º 9
0
 internal TermsWriter(FSTTermsWriter outerInstance, FieldInfo fieldInfo)
 {
     _outerInstance = outerInstance;
     _numTerms      = 0;
     _fieldInfo     = fieldInfo;
     _longsSize     = outerInstance._postingsWriter.SetField(fieldInfo);
     _outputs       = new FSTTermOutputs(fieldInfo, _longsSize);
     _builder       = new Builder <FSTTermOutputs.TermData>(FST.INPUT_TYPE.BYTE1, _outputs);
 }
        public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.BINARY);

            var maxLength = 0;
            foreach (var value in values)
            {
                var length = value == null ? 0 : value.Length;
                maxLength = Math.Max(maxLength, length);
            }
            WriteFieldEntry(field, FieldInfo.DocValuesType_e.BINARY);

            // write maxLength
            SimpleTextUtil.Write(data, MAXLENGTH);
            SimpleTextUtil.Write(data, Convert.ToString(maxLength), scratch);
            SimpleTextUtil.WriteNewline(data);

            var maxBytesLength = Convert.ToString(maxLength).Length;
            var sb = new StringBuilder();
            for (var i = 0; i < maxBytesLength; i++)
            {
                sb.Append('0');
            }
            // write our pattern for encoding lengths
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            int numDocsWritten = 0;
            foreach (BytesRef value in values)
            {
                int length = value == null ? 0 : value.Length;
                SimpleTextUtil.Write(data, LENGTH);
                SimpleTextUtil.Write(data, length.ToString(sb.ToString()), scratch);
                SimpleTextUtil.WriteNewline(data);

                // write bytes -- don't use SimpleText.Write
                // because it escapes:
                if (value != null)
                {
                    data.WriteBytes(value.Bytes, value.Offset, value.Length);
                }

                // pad to fit
                for (int i = length; i < maxLength; i++)
                {
                    data.WriteByte((byte)(sbyte) ' ');
                }
                SimpleTextUtil.WriteNewline(data);
                SimpleTextUtil.Write(data, value == null ? "F" : "T", scratch);
                SimpleTextUtil.WriteNewline(data);
                numDocsWritten++;
            }

            Debug.Assert(numDocs == numDocsWritten);
        }
Ejemplo n.º 11
0
        /// <summary>Write the header for this field </summary>
        private void WriteFieldEntry(FieldInfo field, FieldInfo.DocValuesType_e type)
        {
            SimpleTextUtil.Write(data, FIELD);
            SimpleTextUtil.Write(data, field.Name, scratch);
            SimpleTextUtil.WriteNewline(data);

            SimpleTextUtil.Write(data, TYPE);
            SimpleTextUtil.Write(data, type.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);
        }
Ejemplo n.º 12
0
        public override BinaryDocValues GetBinary(FieldInfo fieldInfo)
        {
            var field = FIELDS[fieldInfo.Name];

            Debug.Assert(field != null);
            var input   = (IndexInput)DATA.Clone();
            var scratch = new BytesRef();

            return(new BinaryDocValuesAnonymousInnerClassHelper(this, field, input, scratch));
        }
Ejemplo n.º 13
0
        private void AddBinaryFieldValues(FieldInfo field, IEnumerable <BytesRef> values)
        {
            // write the byte[] data
            long startFP    = data.FilePointer;
            bool missing    = false;
            long totalBytes = 0;
            int  count      = 0;

            foreach (BytesRef v in values)
            {
                if (v != null)
                {
                    data.WriteBytes(v.Bytes, v.Offset, v.Length);
                    totalBytes += v.Length;
                    if (totalBytes > DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH)
                    {
                        throw new ArgumentException("DocValuesField \"" + field.Name +
                                                    "\" is too large, cannot have more than DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH (" +
                                                    DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH + ") bytes");
                    }
                }
                else
                {
                    missing = true;
                }
                count++;
            }

            meta.WriteLong(startFP);
            meta.WriteInt((int)totalBytes);
            meta.WriteInt(count);
            if (missing)
            {
                long start = data.FilePointer;
                WriteMissingBitset(values);
                meta.WriteLong(start);
                meta.WriteLong(data.FilePointer - start);
            }
            else
            {
                meta.WriteLong(-1L);
            }

            int addr = 0;

            foreach (BytesRef v in values)
            {
                data.WriteInt(addr);
                if (v != null)
                {
                    addr += v.Length;
                }
            }
            data.WriteInt(addr);
        }
Ejemplo n.º 14
0
        public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd)
        {
            meta.WriteVInt(field.Number);
            meta.WriteByte((byte)DirectDocValuesProducer.SORTED);

            // write the ordinals as numerics
            AddNumericFieldValues(field, docToOrd);

            // write the values as binary
            AddBinaryFieldValues(field, values);
        }
Ejemplo n.º 15
0
 internal TermsReader(FSTTermsReader outerInstance, FieldInfo fieldInfo, IndexInput @in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize)
 {
     this.outerInstance    = outerInstance;
     this.fieldInfo        = fieldInfo;
     this.numTerms         = numTerms;
     this.sumTotalTermFreq = sumTotalTermFreq;
     this.sumDocFreq       = sumDocFreq;
     this.docCount         = docCount;
     this.longsSize        = longsSize;
     this.dict             = new FST <FSTTermOutputs.TermData>(@in, new FSTTermOutputs(fieldInfo, longsSize));
 }
Ejemplo n.º 16
0
 public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq,
                      int docCount, int longsSize, FST <FSTTermOutputs.TermData> fst)
 {
     FieldInfo        = fieldInfo;
     NumTerms         = numTerms;
     SumTotalTermFreq = sumTotalTermFreq;
     SumDocFreq       = sumDocFreq;
     DocCount         = docCount;
     LongsSize        = longsSize;
     Dict             = fst;
 }
Ejemplo n.º 17
0
        public override SortedDocValues GetSorted(FieldInfo fieldInfo)
        {
            var field = FIELDS[fieldInfo.Name];

            // SegmentCoreReaders already verifies this field is valid:
            Debug.Assert(field != null);
            var input   = (IndexInput)DATA.Clone();
            var scratch = new BytesRef();

            return(new SortedDocValuesAnonymousInnerClassHelper(this, field, input, scratch));
        }
Ejemplo n.º 18
0
 public override Status NeedsField(Index.FieldInfo fieldInfo)
 {
     currentField = Array.BinarySearch(fields, fieldInfo.Name);
     if (currentField < 0)
     {
         return(Status.NO);
     }
     else if (builders[currentField].Length > maxLength)
     {
         return(fields.Length == 1 ? Status.STOP : Status.NO);
     }
     return(Status.YES);
 }
        public override SortedSetDocValues GetSortedSet(FieldInfo fieldInfo)
        {
            OneField field = FIELDS[fieldInfo.Name];

            // SegmentCoreReaders already verifies this field is
            // valid:
            Debug.Assert(field != null);

            IndexInput    @in     = (IndexInput)DATA.Clone();
            BytesRef      scratch = new BytesRef();
            DecimalFormat decoder = new DecimalFormat(field.Pattern, new DecimalFormatSymbols(Locale.ROOT));

            return(new SortedSetDocValuesAnonymousInnerClassHelper(this, field, @in, scratch, decoder));
        }
Ejemplo n.º 20
0
        public override NumericDocValues GetNumeric(FieldInfo fieldInfo)
        {
            var field = FIELDS[fieldInfo.Name];

            Debug.Assert(field != null);

            // SegmentCoreReaders already verifies this field is valid:
            Debug.Assert(field != null, "field=" + fieldInfo.Name + " fields=" + FIELDS);

            var @in     = (IndexInput)DATA.Clone();
            var scratch = new BytesRef();

            return(new NumericDocValuesAnonymousInnerClassHelper(this, field, @in, scratch));
        }
Ejemplo n.º 21
0
        public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
        {
            string termsFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

            this.postingsReader = postingsReader;
            IndexInput @in = state.Directory.OpenInput(termsFileName, state.Context);

            bool success = false;

            try
            {
                version = ReadHeader(@in);
                if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(@in);
                }
                this.postingsReader.Init(@in);
                SeekDir(@in);

                FieldInfos fieldInfos = state.FieldInfos;
                int        numFields  = @in.ReadVInt();
                for (int i = 0; i < numFields; i++)
                {
                    int         fieldNumber      = @in.ReadVInt();
                    FieldInfo   fieldInfo        = fieldInfos.FieldInfo(fieldNumber);
                    long        numTerms         = @in.ReadVLong();
                    long        sumTotalTermFreq = fieldInfo.FieldIndexOptions == IndexOptions.DOCS_ONLY ? -1 : @in.ReadVLong();
                    long        sumDocFreq       = @in.ReadVLong();
                    int         docCount         = @in.ReadVInt();
                    int         longsSize        = @in.ReadVInt();
                    TermsReader current          = new TermsReader(this, fieldInfo, @in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
                    TermsReader previous         = fields[fieldInfo.Name] = current;
                    CheckFieldSummary(state.SegmentInfo, @in, current, previous);
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(@in);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(@in);
                }
            }
        }
Ejemplo n.º 22
0
        // note: this might not be the most efficient... but its fairly simple
        public override void AddSortedSetField(FieldInfo field, IEnumerable <BytesRef> values,
                                               IEnumerable <long?> docToOrdCount, IEnumerable <long?> ords)
        {
            meta.WriteVInt(field.Number);
            meta.WriteByte((byte)DirectDocValuesProducer.SORTED_SET);

            // First write docToOrdCounts, except we "aggregate" the
            // counts so they turn into addresses, and add a final
            // value = the total aggregate:
            AddNumericFieldValues(field, new IterableAnonymousInnerClassHelper(this, docToOrdCount));

            // Write ordinals for all docs, appended into one big
            // numerics:
            AddNumericFieldValues(field, ords);

            // write the values as binary
            AddBinaryFieldValues(field, values);
        }
Ejemplo n.º 23
0
            public override void StringField(Index.FieldInfo fieldInfo, string value)
            {
                Debug.Assert(currentField >= 0);
                StringBuilder builder = builders[currentField];

                if (builder.Length > 0 && builder.Length < maxLength)
                {
                    builder.Append(valueSeparators[currentField]);
                }
                if (builder.Length + value.Length > maxLength)
                {
                    builder.Append(value, 0, maxLength - builder.Length);
                }
                else
                {
                    builder.Append(value);
                }
            }
Ejemplo n.º 24
0
        private void ReadField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor)
        {
            ReadLine();
            Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE));
            if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_STRING))
            {
                visitor.StringField(fieldInfo,
                                    _scratch.Bytes.SubList(_scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length,
                                                           _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length).ToString());

                // new string(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                //     Encoding.UTF8));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_BINARY))
            {
                var copy = new byte[_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length];
                Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, copy, 0, copy.Length);
                visitor.BinaryField(fieldInfo, copy);
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_INT))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                        _scratchUtf16);
                visitor.IntField(fieldInfo, Convert.ToInt32(_scratchUtf16.ToString()));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_LONG))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                        _scratchUtf16);
                visitor.LongField(fieldInfo, Convert.ToInt64(_scratchUtf16.ToString()));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_FLOAT))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                        _scratchUtf16);
                visitor.FloatField(fieldInfo, Convert.ToSingle(_scratchUtf16.ToString()));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_DOUBLE))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                        _scratchUtf16);
                visitor.DoubleField(fieldInfo, Convert.ToDouble(_scratchUtf16.ToString()));
            }
        }
Ejemplo n.º 25
0
            public override Terms GetTerms(string field)
            {
                Index.FieldInfo fieldInfo = outerInstance.fieldInfos.FieldInfo(field);
                if (fieldInfo == null)
                {
                    return(null);
                }
                int idx = -1;

                for (int i = 0; i < fieldNumOffs.Length; ++i)
                {
                    if (fieldNums[fieldNumOffs[i]] == fieldInfo.Number)
                    {
                        idx = i;
                        break;
                    }
                }

                if (idx == -1 || numTerms[idx] == 0)
                {
                    // no term
                    return(null);
                }
                int fieldOff = 0, fieldLen = -1;

                for (int i = 0; i < fieldNumOffs.Length; ++i)
                {
                    if (i < idx)
                    {
                        fieldOff += fieldLengths[i];
                    }
                    else
                    {
                        fieldLen = fieldLengths[i];
                        break;
                    }
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(fieldLen >= 0);
                }
                return(new TVTerms(numTerms[idx], fieldFlags[idx], prefixLengths[idx], suffixLengths[idx], termFreqs[idx], positionIndex[idx], positions[idx], startOffsets[idx], lengths[idx], payloadIndex[idx], payloadBytes, new BytesRef(suffixBytes.Bytes, suffixBytes.Offset + fieldOff, fieldLen)));
            }
Ejemplo n.º 26
0
        public override Bits GetDocsWithField(FieldInfo field)
        {
            switch (field.DocValuesType)
            {
            case FieldInfo.DocValuesType_e.SORTED_SET:
                return(DocValues.DocsWithValue(GetSortedSet(field), MAX_DOC));

            case FieldInfo.DocValuesType_e.SORTED:
                return(DocValues.DocsWithValue(GetSorted(field), MAX_DOC));

            case FieldInfo.DocValuesType_e.BINARY:
                return(GetBinaryDocsWithField(field));

            case FieldInfo.DocValuesType_e.NUMERIC:
                return(GetNumericDocsWithField(field));

            default:
                throw new InvalidEnumArgumentException();
            }
        }
Ejemplo n.º 27
0
        private void WriteFST(FieldInfo field, IEnumerable <BytesRef> values)
        {
            meta.WriteVInt(field.Number);
            meta.WriteByte(MemoryDocValuesProducer.FST);
            meta.WriteLong(data.FilePointer);
            PositiveIntOutputs outputs = PositiveIntOutputs.Singleton;
            var  builder = new Builder <long?>(INPUT_TYPE.BYTE1, outputs);
            var  scratch = new IntsRef();
            long ord     = 0;

            foreach (BytesRef v in values)
            {
                builder.Add(Util.ToIntsRef(v, scratch), ord);
                ord++;
            }
            FST <long?> fst = builder.Finish();

            if (fst != null)
            {
                fst.Save(data);
            }
            meta.WriteVLong(ord);
        }
Ejemplo n.º 28
0
        public override void AddSortedField(FieldInfo field, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.SORTED);
            WriteFieldEntry(field, FieldInfo.DocValuesType_e.SORTED);

            int valueCount = 0;
            int maxLength  = -1;

            foreach (BytesRef value in values)
            {
                maxLength = Math.Max(maxLength, value.Length);
                valueCount++;
            }

            // write numValues
            SimpleTextUtil.Write(data, NUMVALUES);
            SimpleTextUtil.Write(data, Convert.ToString(valueCount), scratch);
            SimpleTextUtil.WriteNewline(data);

            // write maxLength
            SimpleTextUtil.Write(data, MAXLENGTH);
            SimpleTextUtil.Write(data, Convert.ToString(maxLength), scratch);
            SimpleTextUtil.WriteNewline(data);

            int maxBytesLength = Convert.ToString(maxLength).Length;
            var sb             = new StringBuilder();

            for (int i = 0; i < maxBytesLength; i++)
            {
                sb.Append('0');
            }

            // write our pattern for encoding lengths
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            var encoderFormat = sb.ToString();

            int maxOrdBytes = Convert.ToString(valueCount + 1L).Length;

            sb.Length = 0;
            for (int i = 0; i < maxOrdBytes; i++)
            {
                sb.Append('0');
            }

            // write our pattern for ords
            SimpleTextUtil.Write(data, ORDPATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            var ordEncoderFormat = sb.ToString();

            // for asserts:
            int valuesSeen = 0;

            foreach (BytesRef value in values)
            {
                // write length
                SimpleTextUtil.Write(data, LENGTH);
                SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat), scratch);
                SimpleTextUtil.WriteNewline(data);

                // write bytes -- don't use SimpleText.Write
                // because it escapes:
                data.WriteBytes(value.Bytes, value.Offset, value.Length);

                // pad to fit
                for (int i = value.Length; i < maxLength; i++)
                {
                    data.WriteByte((byte)(sbyte)' ');
                }
                SimpleTextUtil.WriteNewline(data);
                valuesSeen++;
                Debug.Assert(valuesSeen <= valueCount);
            }

            Debug.Assert(valuesSeen == valueCount);

            foreach (var ord in docToOrd)
            {
                SimpleTextUtil.Write(data, (ord + 1).Value.ToString(ordEncoderFormat), scratch);
                SimpleTextUtil.WriteNewline(data);
            }
        }
Ejemplo n.º 29
0
 internal TermsWriter(FSTTermsWriter outerInstance, FieldInfo fieldInfo)
 {
     _outerInstance = outerInstance;
     _numTerms = 0;
     _fieldInfo = fieldInfo;
     _longsSize = outerInstance._postingsWriter.SetField(fieldInfo);
     _outputs = new FSTTermOutputs(fieldInfo, _longsSize);
     _builder = new Builder<FSTTermOutputs.TermData>(FST.INPUT_TYPE.BYTE1, _outputs);
 }
Ejemplo n.º 30
0
        public override void AddBinaryField(FieldInfo field, IEnumerable <BytesRef> values)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.BINARY);

            var maxLength = 0;

            foreach (var value in values)
            {
                var length = value == null ? 0 : value.Length;
                maxLength = Math.Max(maxLength, length);
            }
            WriteFieldEntry(field, FieldInfo.DocValuesType_e.BINARY);

            // write maxLength
            SimpleTextUtil.Write(data, MAXLENGTH);
            SimpleTextUtil.Write(data, Convert.ToString(maxLength), scratch);
            SimpleTextUtil.WriteNewline(data);

            var maxBytesLength = Convert.ToString(maxLength).Length;
            var sb             = new StringBuilder();

            for (var i = 0; i < maxBytesLength; i++)
            {
                sb.Append('0');
            }
            // write our pattern for encoding lengths
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);


            int numDocsWritten = 0;

            foreach (BytesRef value in values)
            {
                int length = value == null ? 0 : value.Length;
                SimpleTextUtil.Write(data, LENGTH);
                SimpleTextUtil.Write(data, length.ToString(sb.ToString()), scratch);
                SimpleTextUtil.WriteNewline(data);

                // write bytes -- don't use SimpleText.Write
                // because it escapes:
                if (value != null)
                {
                    data.WriteBytes(value.Bytes, value.Offset, value.Length);
                }

                // pad to fit
                for (int i = length; i < maxLength; i++)
                {
                    data.WriteByte((byte)(sbyte)' ');
                }
                SimpleTextUtil.WriteNewline(data);
                SimpleTextUtil.Write(data, value == null ? "F" : "T", scratch);
                SimpleTextUtil.WriteNewline(data);
                numDocsWritten++;
            }

            Debug.Assert(numDocs == numDocsWritten);
        }
Ejemplo n.º 31
0
 public override TermsConsumer AddField(FieldInfo field)
 {
     return new TermsWriter(this, field);
 }
Ejemplo n.º 32
0
 public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq,
     int docCount, int longsSize, FST<FSTTermOutputs.TermData> fst)
 {
     FieldInfo = fieldInfo;
     NumTerms = numTerms;
     SumTotalTermFreq = sumTotalTermFreq;
     SumDocFreq = sumDocFreq;
     DocCount = docCount;
     LongsSize = longsSize;
     Dict = fst;
 }
Ejemplo n.º 33
0
        public override void AddNumericField(FieldInfo field, IEnumerable <long?> values)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.NUMERIC ||
                         field.NormType == FieldInfo.DocValuesType_e.NUMERIC);
            WriteFieldEntry(field, FieldInfo.DocValuesType_e.NUMERIC);

            // first pass to find min/max
            var minValue = long.MaxValue;
            var maxValue = long.MinValue;

            foreach (var n in values)
            {
                var v = n;
                minValue = Math.Min(minValue, v.Value); // Added .Value to account for long?
                maxValue = Math.Max(maxValue, v.Value); // Added .Value to account for long?
            }

            // write our minimum value to the .dat, all entries are deltas from that
            SimpleTextUtil.Write(data, MINVALUE);
            SimpleTextUtil.Write(data, Convert.ToString(minValue), scratch);
            SimpleTextUtil.WriteNewline(data);

            // build up our fixed-width "simple text packed ints" format
            System.Numerics.BigInteger maxBig = maxValue;
            System.Numerics.BigInteger minBig = minValue;
            var diffBig = maxBig - minBig;

            var maxBytesPerValue = diffBig.ToString().Length;
            var sb = new StringBuilder();

            for (var i = 0; i < maxBytesPerValue; i++)
            {
                sb.Append('0');
            }

            // write our pattern to the .dat
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            var patternString = sb.ToString();

            int numDocsWritten = 0;

            // second pass to write the values
            foreach (var n in values)
            {
                long value = n == null ? 0 : n.Value;

                Debug.Assert(value >= minValue);

                var    delta = value - minValue;
                string s     = delta.ToString(patternString);
                Debug.Assert(s.Length == patternString.Length);
                SimpleTextUtil.Write(data, s, scratch);
                SimpleTextUtil.WriteNewline(data);
                SimpleTextUtil.Write(data, n == null ? "F" : "T", scratch);
                SimpleTextUtil.WriteNewline(data);
                numDocsWritten++;
                Debug.Assert(numDocsWritten <= numDocs);
            }

            Debug.Assert(numDocs == numDocsWritten, "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten);
        }
 public override Bits GetDocsWithField(FieldInfo field)
 {
     switch (field.DocValuesType)
     {
         case FieldInfo.DocValuesType_e.SORTED_SET:
             return DocValues.DocsWithValue(GetSortedSet(field), MAX_DOC);
         case FieldInfo.DocValuesType_e.SORTED:
             return DocValues.DocsWithValue(GetSorted(field), MAX_DOC);
         case FieldInfo.DocValuesType_e.BINARY:
             return GetBinaryDocsWithField(field);
         case FieldInfo.DocValuesType_e.NUMERIC:
             return GetNumericDocsWithField(field);
         default:
             throw new InvalidEnumArgumentException();
     }
 }
        private void ReadField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor)
        {
            ReadLine();
            Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE));
            if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_STRING))
            {
                visitor.StringField(fieldInfo,
                    _scratch.Bytes.SubList(_scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length,
                        _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length).ToString());

                   // new string(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                   //     Encoding.UTF8));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_BINARY))
            {
                var copy = new sbyte[_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length];
                Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, copy, 0, copy.Length);
                visitor.BinaryField(fieldInfo, copy);
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_INT))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                    _scratchUtf16);
                visitor.IntField(fieldInfo, Convert.ToInt32(_scratchUtf16.ToString()));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_LONG))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                    _scratchUtf16);
                visitor.LongField(fieldInfo, Convert.ToInt64(_scratchUtf16.ToString()));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_FLOAT))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                    _scratchUtf16);
                visitor.FloatField(fieldInfo, Convert.ToSingle(_scratchUtf16.ToString()));
            }
            else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_DOUBLE))
            {
                UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                    _scratchUtf16);
                visitor.DoubleField(fieldInfo, Convert.ToDouble(_scratchUtf16.ToString()));
            }
        }
        /// <summary>Write the header for this field </summary>
        private void WriteFieldEntry(FieldInfo field, FieldInfo.DocValuesType_e type)
        {
            SimpleTextUtil.Write(data, FIELD);
            SimpleTextUtil.Write(data, field.Name, scratch);
            SimpleTextUtil.WriteNewline(data);

            SimpleTextUtil.Write(data, TYPE);
            SimpleTextUtil.Write(data, type.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);
        }
Ejemplo n.º 37
0
 public override void EncodeTerm(long[] longs, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute)
 {
     IntBlockTermState state = (IntBlockTermState)_state;
     if (absolute)
     {
         LastState = EmptyState;
     }
     longs[0] = state.DocStartFP - LastState.DocStartFP;
     if (FieldHasPositions)
     {
         longs[1] = state.PosStartFP - LastState.PosStartFP;
         if (FieldHasPayloads || FieldHasOffsets)
         {
             longs[2] = state.PayStartFP - LastState.PayStartFP;
         }
     }
     if (state.SingletonDocID != -1)
     {
         @out.WriteVInt(state.SingletonDocID);
     }
     if (FieldHasPositions)
     {
         if (state.LastPosBlockOffset != -1)
         {
             @out.WriteVLong(state.LastPosBlockOffset);
         }
     }
     if (state.SkipOffset != -1)
     {
         @out.WriteVLong(state.SkipOffset);
     }
     LastState = state;
 }
        public override void AddSortedField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<long?> docToOrd)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.SORTED);
            WriteFieldEntry(field, FieldInfo.DocValuesType_e.SORTED);

            int valueCount = 0;
            int maxLength = -1;
            foreach (BytesRef value in values)
            {
                maxLength = Math.Max(maxLength, value.Length);
                valueCount++;
            }

            // write numValues
            SimpleTextUtil.Write(data, NUMVALUES);
            SimpleTextUtil.Write(data, valueCount.ToString(CultureInfo.InvariantCulture), scratch);
            SimpleTextUtil.WriteNewline(data);

            // write maxLength
            SimpleTextUtil.Write(data, MAXLENGTH);
            SimpleTextUtil.Write(data, maxLength.ToString(CultureInfo.InvariantCulture), scratch);
            SimpleTextUtil.WriteNewline(data);

            int maxBytesLength = maxLength.ToString(CultureInfo.InvariantCulture).Length;
            var sb = new StringBuilder();
            for (int i = 0; i < maxBytesLength; i++)
            {
                sb.Append('0');
            }

            // write our pattern for encoding lengths
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            var encoderFormat = sb.ToString();

            int maxOrdBytes = (valueCount + 1L).ToString(CultureInfo.InvariantCulture).Length;
            sb.Length = 0;
            for (int i = 0; i < maxOrdBytes; i++)
            {
                sb.Append('0');
            }

            // write our pattern for ords
            SimpleTextUtil.Write(data, ORDPATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            var ordEncoderFormat = sb.ToString();

            // for asserts:
            int valuesSeen = 0;

            foreach (BytesRef value in values)
            {
                // write length
                SimpleTextUtil.Write(data, LENGTH);
                SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat, CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(data);

                // write bytes -- don't use SimpleText.Write
                // because it escapes:
                data.WriteBytes(value.Bytes, value.Offset, value.Length);

                // pad to fit
                for (int i = value.Length; i < maxLength; i++)
                {
                    data.WriteByte((byte)' ');
                }
                SimpleTextUtil.WriteNewline(data);
                valuesSeen++;
                Debug.Assert(valuesSeen <= valueCount);
            }

            Debug.Assert(valuesSeen == valueCount);

            foreach (var ord in docToOrd)
            {
                SimpleTextUtil.Write(data, (ord + 1).GetValueOrDefault().ToString(ordEncoderFormat, CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(data);
            }
        }
 private Bits GetNumericDocsWithField(FieldInfo fieldInfo)
 {
     var field = FIELDS[fieldInfo.Name];
     var input = (IndexInput)DATA.Clone();
     var scratch = new BytesRef();
     return new BitsAnonymousInnerClassHelper(this, field, input, scratch);
 }
        public override SortedSetDocValues GetSortedSet(FieldInfo fieldInfo)
        {
            var field = FIELDS[fieldInfo.Name];

            // SegmentCoreReaders already verifies this field is
            // valid:
            Debug.Assert(field != null);

            var input = (IndexInput) DATA.Clone();
            var scratch = new BytesRef();

            return new SortedSetDocValuesAnonymousInnerClassHelper(this, field, input, scratch);
        }
        public override NumericDocValues GetNumeric(FieldInfo fieldInfo)
        {
            var field = FIELDS[fieldInfo.Name];
            Debug.Assert(field != null);

            // SegmentCoreReaders already verifies this field is valid:
            Debug.Assert(field != null, "field=" + fieldInfo.Name + " fields=" + FIELDS);

            var @in = (IndexInput)DATA.Clone();
            var scratch = new BytesRef();

            return new NumericDocValuesAnonymousInnerClassHelper(this, field, @in, scratch);
        }
Ejemplo n.º 42
0
        public override void AddSortedSetField(FieldInfo field, IEnumerable <BytesRef> values,
                                               IEnumerable <long?> docToOrdCount, IEnumerable <long?> ords)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.SORTED_SET);
            WriteFieldEntry(field, FieldInfo.DocValuesType_e.SORTED_SET);

            long valueCount = 0;
            int  maxLength  = 0;

            foreach (var value in values)
            {
                maxLength = Math.Max(maxLength, value.Length);
                valueCount++;
            }

            // write numValues
            SimpleTextUtil.Write(data, NUMVALUES);
            SimpleTextUtil.Write(data, Convert.ToString(valueCount), scratch);
            SimpleTextUtil.WriteNewline(data);

            // write maxLength
            SimpleTextUtil.Write(data, MAXLENGTH);
            SimpleTextUtil.Write(data, Convert.ToString(maxLength), scratch);
            SimpleTextUtil.WriteNewline(data);

            int maxBytesLength = Convert.ToString(maxLength).Length;
            var sb             = new StringBuilder();

            for (int i = 0; i < maxBytesLength; i++)
            {
                sb.Append('0');
            }

            // write our pattern for encoding lengths
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            string encoderFormat = sb.ToString();

            // compute ord pattern: this is funny, we encode all values for all docs to find the maximum length
            var maxOrdListLength = 0;
            var sb2       = new StringBuilder();
            var ordStream = ords.GetEnumerator();

            foreach (var n in docToOrdCount)
            {
                sb2.Length = 0;
                var count = (int)n;
                for (int i = 0; i < count; i++)
                {
                    ordStream.MoveNext();

                    var ord = ordStream.Current;
                    if (sb2.Length > 0)
                    {
                        sb2.Append(",");
                    }
                    sb2.Append(Convert.ToString(ord));
                }
                maxOrdListLength = Math.Max(maxOrdListLength, sb2.Length);
            }

            sb2.Length = 0;
            for (int i = 0; i < maxOrdListLength; i++)
            {
                sb2.Append('X');
            }

            // write our pattern for ord lists
            SimpleTextUtil.Write(data, ORDPATTERN);
            SimpleTextUtil.Write(data, sb2.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            // for asserts:
            long valuesSeen = 0;

            foreach (var value in values)
            {
                // write length
                SimpleTextUtil.Write(data, LENGTH);
                SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat), scratch);
                SimpleTextUtil.WriteNewline(data);

                // write bytes -- don't use SimpleText.Write
                // because it escapes:
                data.WriteBytes(value.Bytes, value.Offset, value.Length);

                // pad to fit
                for (var i = value.Length; i < maxLength; i++)
                {
                    data.WriteByte((byte)(sbyte)' ');
                }
                SimpleTextUtil.WriteNewline(data);
                valuesSeen++;
                Debug.Assert(valuesSeen <= valueCount);
            }

            Debug.Assert(valuesSeen == valueCount);

            ordStream = ords.GetEnumerator();

            // write the ords for each doc comma-separated
            foreach (var n in docToOrdCount)
            {
                sb2.Length = 0;
                var count = (int)n;
                for (var i = 0; i < count; i++)
                {
                    ordStream.MoveNext();
                    var ord = ordStream.Current;
                    if (sb2.Length > 0)
                    {
                        sb2.Append(",");
                    }

                    sb2.Append(Convert.ToString(ord));
                }
                // now pad to fit: these are numbers so spaces work well. reader calls trim()
                var numPadding = maxOrdListLength - sb2.Length;
                for (var i = 0; i < numPadding; i++)
                {
                    sb2.Append(' ');
                }
                SimpleTextUtil.Write(data, sb2.ToString(), scratch);
                SimpleTextUtil.WriteNewline(data);
            }
        }
 private static string GetDocValuesType(FieldInfo.DocValuesType_e? type)
 {
     return type.HasValue ? type.ToString() : "false";
 }
Ejemplo n.º 44
0
 public override int SetField(FieldInfo fieldInfo)
 {
     FieldInfo.IndexOptions? indexOptions = fieldInfo.FieldIndexOptions;
     FieldHasFreqs = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS;
     FieldHasPositions = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
     FieldHasOffsets = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
     FieldHasPayloads = fieldInfo.HasPayloads();
     SkipWriter.SetField(FieldHasPositions, FieldHasOffsets, FieldHasPayloads);
     LastState = EmptyState;
     if (FieldHasPositions)
     {
         if (FieldHasPayloads || FieldHasOffsets)
         {
             return 3; // doc + pos + pay FP
         }
         else
         {
             return 2; // doc + pos FP
         }
     }
     else
     {
         return 1; // doc FP
     }
 }
Ejemplo n.º 45
0
 public SimpleTextTerms(SimpleTextFieldsReader outerInstance, string field, long termsStart, int maxDoc)
 {
     _outerInstance = outerInstance;
     _maxDoc = maxDoc;
     _termsStart = termsStart;
     _fieldInfo = outerInstance._fieldInfos.FieldInfo(field);
     LoadTerms();
 }
        public override void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads)
        {
            Write(FIELD);
            Write(Convert.ToString(info.Number, CultureInfo.InvariantCulture));
            NewLine();

            Write(FIELDNAME);
            Write(info.Name);
            NewLine();

            Write(FIELDPOSITIONS);
            Write(Convert.ToString(positions, CultureInfo.InvariantCulture).ToLowerInvariant());
            NewLine();

            Write(FIELDOFFSETS);
            Write(Convert.ToString(offsets, CultureInfo.InvariantCulture).ToLowerInvariant());
            NewLine();

            Write(FIELDPAYLOADS);
            Write(Convert.ToString(payloads, CultureInfo.InvariantCulture).ToLowerInvariant());
            NewLine();

            Write(FIELDTERMCOUNT);
            Write(Convert.ToString(numTerms, CultureInfo.InvariantCulture));
            NewLine();

            _positions = positions;
            _offsets = offsets;
            _payloads = payloads;
        }
        public override void WriteField(FieldInfo info, IndexableField field)
        {
            Write(FIELD);
            Write(info.Number.ToString(CultureInfo.InvariantCulture));
            NewLine();

            Write(NAME);
            Write(field.Name);
            NewLine();

            Write(TYPE);

            var n = field.NumericValue;

            if (n != null)
            {
                if (n is sbyte? || n is short? || n is int?)
                {
                    Write(TYPE_INT);
                    NewLine();

                    Write(VALUE);
                    Write(((int)n).ToString(CultureInfo.InvariantCulture));
                    NewLine();
                }
                else if (n is long?)
                {
                    Write(TYPE_LONG);
                    NewLine();

                    Write(VALUE);
                    Write(((long)n).ToString(CultureInfo.InvariantCulture));
                    NewLine();
                }
                else if (n is float?)
                {
                    Write(TYPE_FLOAT);
                    NewLine();

                    Write(VALUE);
                    // LUCENENET: Need to specify the "R" for round-trip: http://stackoverflow.com/a/611564/181087
                    Write(((float)n).ToString("R", CultureInfo.InvariantCulture));
                    NewLine();
                }
                else if (n is double?)
                {
                    Write(TYPE_DOUBLE);
                    NewLine();

                    Write(VALUE);
                    // LUCENENET: Need to specify the "R" for round-trip: http://stackoverflow.com/a/611564/181087
                    Write(((double)n).ToString("R", CultureInfo.InvariantCulture));
                    NewLine();
                }
                else
                {
                    throw new ArgumentException("cannot store numeric type " + n.GetType());
                }
            }
            else
            {
                BytesRef bytes = field.BinaryValue;
                if (bytes != null)
                {
                    Write(TYPE_BINARY);
                    NewLine();

                    Write(VALUE);
                    Write(bytes);
                    NewLine();
                }
                else if (field.StringValue == null)
                {
                    throw new ArgumentException("field " + field.Name +
                                                       " is stored but does not have binaryValue, stringValue nor numericValue");
                }
                else
                {
                    Write(TYPE_STRING);
                    NewLine();

                    Write(VALUE);
                    Write(field.StringValue);
                    NewLine();
                }
            }
        }
Ejemplo n.º 48
0
            internal TermsWriter(FSTOrdTermsWriter outerInstance, FieldInfo fieldInfo)
            {
                _outerInstance = outerInstance;
                _numTerms = 0;
                _fieldInfo = fieldInfo;
                _longsSize = outerInstance.postingsWriter.SetField(fieldInfo);
                _outputs = PositiveIntOutputs.Singleton;
                _builder = new Builder<long?>(FST.INPUT_TYPE.BYTE1, _outputs);

                _lastBlockStatsFp = 0;
                _lastBlockMetaLongsFp = 0;
                _lastBlockMetaBytesFp = 0;
                _lastBlockLongs = new long[_longsSize];

                _lastLongs = new long[_longsSize];
                _lastMetaBytesFp = 0;
            }
Ejemplo n.º 49
0
 public override TermsConsumer AddField(FieldInfo field)
 {
     return(new TermsWriter(this, field));
 }
        public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix,
            IOContext iocontext)
        {
            var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix,
                SimpleTextFieldInfosWriter.FIELD_INFOS_EXTENSION);
            var input = directory.OpenChecksumInput(fileName, iocontext);
            var scratch = new BytesRef();

            var success = false;
            try
            {

                SimpleTextUtil.ReadLine(input, scratch);
                Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUMFIELDS));
                var size = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUMFIELDS.Length, scratch));
                var infos = new FieldInfo[size];

                for (var i = 0; i < size; i++)
                {
                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NAME));
                    string name = ReadString(SimpleTextFieldInfosWriter.NAME.Length, scratch);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUMBER));
                    int fieldNumber = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUMBER.Length, scratch));

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ISINDEXED));
                    bool isIndexed = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.ISINDEXED.Length, scratch));

                    FieldInfo.IndexOptions? indexOptions;
                    if (isIndexed)
                    {
                        SimpleTextUtil.ReadLine(input, scratch);
                        Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.INDEXOPTIONS));
                        indexOptions = (FieldInfo.IndexOptions)Enum.Parse(typeof(FieldInfo.IndexOptions), ReadString(SimpleTextFieldInfosWriter.INDEXOPTIONS.Length,
                                scratch));
                    }
                    else
                    {
                        indexOptions = null;
                    }

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.STORETV));
                    bool storeTermVector =
                        Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.STORETV.Length, scratch));

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.PAYLOADS));
                    bool storePayloads =
                        Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.PAYLOADS.Length, scratch));

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NORMS));
                    bool omitNorms = !Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.NORMS.Length, scratch));

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NORMS_TYPE));
                    string nrmType = ReadString(SimpleTextFieldInfosWriter.NORMS_TYPE.Length, scratch);
                    FieldInfo.DocValuesType_e? normsType = DocValuesType(nrmType);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.DOCVALUES));
                    string dvType = ReadString(SimpleTextFieldInfosWriter.DOCVALUES.Length, scratch);
                    FieldInfo.DocValuesType_e? docValuesType = DocValuesType(dvType);

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.DOCVALUES_GEN));
                    long dvGen = Convert.ToInt64(ReadString(SimpleTextFieldInfosWriter.DOCVALUES_GEN.Length, scratch));

                    SimpleTextUtil.ReadLine(input, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUM_ATTS));
                    int numAtts = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUM_ATTS.Length, scratch));
                    IDictionary<string, string> atts = new Dictionary<string, string>();

                    for (int j = 0; j < numAtts; j++)
                    {
                        SimpleTextUtil.ReadLine(input, scratch);
                        Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ATT_KEY));
                        string key = ReadString(SimpleTextFieldInfosWriter.ATT_KEY.Length, scratch);

                        SimpleTextUtil.ReadLine(input, scratch);
                        Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ATT_VALUE));
                        string value = ReadString(SimpleTextFieldInfosWriter.ATT_VALUE.Length, scratch);
                        atts[key] = value;
                    }

                    infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads,
                        indexOptions, docValuesType, normsType, new ReadOnlyDictionary<string,string>(atts))
                    {
                        DocValuesGen = dvGen
                    };
                }

                SimpleTextUtil.CheckFooter(input);

                var fieldInfos = new FieldInfos(infos);
                success = true;
                return fieldInfos;
            }
            finally
            {
                if (success)
                {
                    input.Dispose();
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
            }
        }
        public override BinaryDocValues GetBinary(FieldInfo fieldInfo)
        {
            var field = FIELDS[fieldInfo.Name];
            Debug.Assert(field != null);
            var input = (IndexInput)DATA.Clone();
            var scratch = new BytesRef();

            return new BinaryDocValuesAnonymousInnerClassHelper(this, field, input, scratch);
        }
        public override void WriteField(FieldInfo info, IndexableField field)
        {
            Write(FIELD);
            Write(Convert.ToString(info.Number));
            NewLine();

            Write(NAME);
            Write(field.Name());
            NewLine();

            Write(TYPE);

            var n = field.NumericValue;

            if (n != null)
            {
                if (n is sbyte? || n is short? || n is int?)
                {
                    Write(TYPE_INT);
                    NewLine();

                    Write(VALUE);
                    Write(Convert.ToString((int) n));
                    NewLine();
                }
                else if (n is long?)
                {
                    Write(TYPE_LONG);
                    NewLine();

                    Write(VALUE);
                    Write(Convert.ToString((long) n));
                    NewLine();
                }
                else if (n is float?)
                {
                    Write(TYPE_FLOAT);
                    NewLine();

                    Write(VALUE);
                    Write(Convert.ToString((float) n));
                    NewLine();
                }
                else if (n is double?)
                {
                    Write(TYPE_DOUBLE);
                    NewLine();

                    Write(VALUE);
                    Write(Convert.ToString((double) n));
                    NewLine();
                }
                else
                {
                    throw new ArgumentException("cannot store numeric type " + n.GetType());
                }
            }
            else
            {
                BytesRef bytes = field.BinaryValue();
                if (bytes != null)
                {
                    Write(TYPE_BINARY);
                    NewLine();

                    Write(VALUE);
                    Write(bytes);
                    NewLine();
                }
                else if (field.StringValue == null)
                {
                    throw new ArgumentException("field " + field.Name() +
                                                       " is stored but does not have binaryValue, stringValue nor numericValue");
                }
                else
                {
                    Write(TYPE_STRING);
                    NewLine();

                    Write(VALUE);
                    Write(field.StringValue);
                    NewLine();
                }
            }
        }
        public override void AddSortedSetField(FieldInfo field, IEnumerable<BytesRef> values,
            IEnumerable<long?> docToOrdCount, IEnumerable<long?> ords)
        {
            Debug.Assert(FieldSeen(field.Name));
            Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.SORTED_SET);
            WriteFieldEntry(field, FieldInfo.DocValuesType_e.SORTED_SET);

            long valueCount = 0;
            int maxLength = 0;
            foreach (var value in values)
            {
                maxLength = Math.Max(maxLength, value.Length);
                valueCount++;
            }

            // write numValues
            SimpleTextUtil.Write(data, NUMVALUES);
            SimpleTextUtil.Write(data, valueCount.ToString(CultureInfo.InvariantCulture), scratch);
            SimpleTextUtil.WriteNewline(data);

            // write maxLength
            SimpleTextUtil.Write(data, MAXLENGTH);
            SimpleTextUtil.Write(data, maxLength.ToString(CultureInfo.InvariantCulture), scratch);
            SimpleTextUtil.WriteNewline(data);

            int maxBytesLength = maxLength.ToString(CultureInfo.InvariantCulture).Length;
            var sb = new StringBuilder();
            for (int i = 0; i < maxBytesLength; i++)
            {
                sb.Append('0');
            }

            // write our pattern for encoding lengths
            SimpleTextUtil.Write(data, PATTERN);
            SimpleTextUtil.Write(data, sb.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            string encoderFormat = sb.ToString();

            // compute ord pattern: this is funny, we encode all values for all docs to find the maximum length
            var maxOrdListLength = 0;
            var sb2 = new StringBuilder();
            var ordStream = ords.GetEnumerator();
            foreach (var n in docToOrdCount)
            {
                sb2.Length = 0;
                var count = (int) n;
                for (int i = 0; i < count; i++)
                {
                    ordStream.MoveNext();

                    var ord = ordStream.Current;
                    if (sb2.Length > 0)
                    {
                        sb2.Append(",");
                    }
                    sb2.Append(ord.GetValueOrDefault().ToString(CultureInfo.InvariantCulture));
                }
                maxOrdListLength = Math.Max(maxOrdListLength, sb2.Length);
            }

            sb2.Length = 0;
            for (int i = 0; i < maxOrdListLength; i++)
            {
                sb2.Append('X');
            }

            // write our pattern for ord lists
            SimpleTextUtil.Write(data, ORDPATTERN);
            SimpleTextUtil.Write(data, sb2.ToString(), scratch);
            SimpleTextUtil.WriteNewline(data);

            // for asserts:
            long valuesSeen = 0;

            foreach (var value in values)
            {
                // write length
                SimpleTextUtil.Write(data, LENGTH);
                SimpleTextUtil.Write(data, value.Length.ToString(encoderFormat, CultureInfo.InvariantCulture), scratch);
                SimpleTextUtil.WriteNewline(data);

                // write bytes -- don't use SimpleText.Write
                // because it escapes:
                data.WriteBytes(value.Bytes, value.Offset, value.Length);

                // pad to fit
                for (var i = value.Length; i < maxLength; i++)
                {
                    data.WriteByte((byte)' ');
                }
                SimpleTextUtil.WriteNewline(data);
                valuesSeen++;
                Debug.Assert(valuesSeen <= valueCount);
            }

            Debug.Assert(valuesSeen == valueCount);

            ordStream = ords.GetEnumerator();

            // write the ords for each doc comma-separated
            foreach (var n in docToOrdCount)
            {
                sb2.Length = 0;
                var count = (int) n;
                for (var i = 0; i < count; i++)
                {
                    ordStream.MoveNext();
                    var ord = ordStream.Current;
                    if (sb2.Length > 0)
                        sb2.Append(",");
                    
                    sb2.Append(ord);
                }
                // now pad to fit: these are numbers so spaces work well. reader calls trim()
                var numPadding = maxOrdListLength - sb2.Length;
                for (var i = 0; i < numPadding; i++)
                {
                    sb2.Append(' ');
                }
                SimpleTextUtil.Write(data, sb2.ToString(), scratch);
                SimpleTextUtil.WriteNewline(data);
            }
        }