Ejemplo n.º 1
0
 private void WriteHeader(IndexOutput output)
 {
     CodecUtil.WriteHeader(output, CODEC_NAME, VERSION_CURRENT);
 }
Ejemplo n.º 2
0
        public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
                                        IComparer <BytesRef> termComp, String segmentSuffix, IOContext context)
        {
            _termComp = termComp;

            Debug.Assert(indexDivisor == -1 || indexDivisor > 0);

            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(segment, segmentSuffix,
                                                   FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION),
                    context);

            var success = false;

            try
            {
                _version = ReadHeader(_input);

                if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(_input);
                }

                indexInterval = _input.ReadInt();

                if (indexInterval < 1)
                {
                    throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}",
                                                                  indexInterval, _input));
                }

                _indexDivisor = indexDivisor;

                if (indexDivisor < 0)
                {
                    _totalIndexInterval = indexInterval;
                }
                else
                {
                    // In case terms index gets loaded, later, on demand
                    _totalIndexInterval = indexInterval * indexDivisor;
                }

                Debug.Assert(_totalIndexInterval > 0);

                SeekDir(_input, _dirOffset);

                // Read directory
                int numFields = _input.ReadVInt();

                if (numFields < 0)
                {
                    throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields,
                                                                  _input));
                }

                for (int i = 0; i < numFields; i++)
                {
                    int field         = _input.ReadVInt();
                    int numIndexTerms = _input.ReadVInt();
                    if (numIndexTerms < 0)
                    {
                        throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}",
                                                                      numIndexTerms,
                                                                      _input));
                    }

                    long termsStart         = _input.ReadVLong();
                    long indexStart         = _input.ReadVLong();
                    long packedIndexStart   = _input.ReadVLong();
                    long packedOffsetsStart = _input.ReadVLong();

                    if (packedIndexStart < indexStart)
                    {
                        throw new CorruptIndexException(
                                  String.Format(
                                      "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}",
                                      packedIndexStart,
                                      indexStart, numIndexTerms, _input));
                    }

                    FieldInfo fieldInfo = fieldInfos.FieldInfo(field);

                    try
                    {
                        _fields.Add(fieldInfo,
                                    new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart,
                                                       packedOffsetsStart, this));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}",
                                                                      fieldInfo.Name,
                                                                      _input));
                    }
                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(_input);
                }
                if (indexDivisor > 0)
                {
                    _input.Dispose();
                    _input = null;
                    if (success)
                    {
                        _indexLoaded = true;
                    }

                    _termBytesReader = _termBytes.Freeze(true);
                }
            }
        }
        public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
                                           String segmentSuffix, IOContext context)
        {
            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(segment, segmentSuffix,
                                                   VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true));
            var success = false;

            Debug.Assert(indexDivisor == -1 || indexDivisor > 0);

            try
            {
                _version      = ReadHeader(_input);
                _indexDivisor = indexDivisor;

                if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(_input);
                }

                SeekDir(_input, _dirOffset);

                // Read directory
                var numFields = _input.ReadVInt();
                if (numFields < 0)
                {
                    throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + _input + ")");
                }

                for (var i = 0; i < numFields; i++)
                {
                    var field      = _input.ReadVInt();
                    var indexStart = _input.ReadVLong();
                    var fieldInfo  = fieldInfos.FieldInfo(field);

                    try
                    {
                        _fields.Add(fieldInfo, new FieldIndexData(indexStart, this));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}",
                                                                      fieldInfo.Name, _input));
                    }
                }
                success = true;
            }
            finally
            {
                if (indexDivisor > 0)
                {
                    _input.Dispose();
                    _input = null;
                    if (success)
                    {
                        _indexLoaded = true;
                    }
                }
            }
        }
Ejemplo n.º 4
0
        public override int Merge(MergeState mergeState)
        {
            int docCount = 0;
            int idx      = 0;

            foreach (AtomicReader reader in mergeState.Readers)
            {
                SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                CompressingTermVectorsReader matchingVectorsReader = null;
                if (matchingSegmentReader != null)
                {
                    TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader;
                    // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader
                    if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader)
                    {
                        matchingVectorsReader = (CompressingTermVectorsReader)vectorsReader;
                    }
                }

                int   maxDoc   = reader.MaxDoc;
                IBits liveDocs = reader.LiveDocs;

                if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != compressionMode || matchingVectorsReader.ChunkSize != chunkSize || matchingVectorsReader.PackedInt32sVersion != PackedInt32s.VERSION_CURRENT)
                {
                    // naive merge...
                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                    {
                        Fields vectors = reader.GetTermVectors(i);
                        AddAllDocVectors(vectors, mergeState);
                        ++docCount;
                        mergeState.CheckAbort.Work(300);
                    }
                }
                else
                {
                    CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index;
                    IndexInput vectorsStreamOrig             = matchingVectorsReader.VectorsStream;
                    vectorsStreamOrig.Seek(0);
                    ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone());

                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc;)
                    {
                        // We make sure to move the checksum input in any case, otherwise the final
                        // integrity check might need to read the whole file a second time
                        long startPointer = index.GetStartPointer(i);
                        if (startPointer > vectorsStream.GetFilePointer())
                        {
                            vectorsStream.Seek(startPointer);
                        }
                        if ((pendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk
                        {
                            int docBase   = vectorsStream.ReadVInt32();
                            int chunkDocs = vectorsStream.ReadVInt32();
                            Debug.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc);
                            if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs)
                            {
                                long chunkEnd    = index.GetStartPointer(docBase + chunkDocs);
                                long chunkLength = chunkEnd - vectorsStream.GetFilePointer();
                                indexWriter.WriteIndex(chunkDocs, this.vectorsStream.GetFilePointer());
                                this.vectorsStream.WriteVInt32(docCount);
                                this.vectorsStream.WriteVInt32(chunkDocs);
                                this.vectorsStream.CopyBytes(vectorsStream, chunkLength);
                                docCount     += chunkDocs;
                                this.numDocs += chunkDocs;
                                mergeState.CheckAbort.Work(300 * chunkDocs);
                                i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc);
                            }
                            else
                            {
                                for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                                {
                                    Fields vectors = reader.GetTermVectors(i);
                                    AddAllDocVectors(vectors, mergeState);
                                    ++docCount;
                                    mergeState.CheckAbort.Work(300);
                                }
                            }
                        }
                        else
                        {
                            Fields vectors = reader.GetTermVectors(i);
                            AddAllDocVectors(vectors, mergeState);
                            ++docCount;
                            mergeState.CheckAbort.Work(300);
                            i = NextLiveDoc(i + 1, liveDocs, maxDoc);
                        }
                    }

                    vectorsStream.Seek(vectorsStream.Length - CodecUtil.FooterLength());
                    CodecUtil.CheckFooter(vectorsStream);
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return(docCount);
        }
Ejemplo n.º 5
0
        private readonly int version; // LUCENENET: marked readonly
        //static final boolean TEST = false;

        public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
        {
            string termsIndexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION);
            string termsBlockFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);

            this.postingsReader = postingsReader;
            ChecksumIndexInput indexIn = null;
            IndexInput         blockIn = null;
            bool success = false;

            try
            {
                indexIn = state.Directory.OpenChecksumInput(termsIndexFileName, state.Context);
                blockIn = state.Directory.OpenInput(termsBlockFileName, state.Context);
                version = ReadHeader(indexIn);
                ReadHeader(blockIn);
                if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(blockIn);
                }

                this.postingsReader.Init(blockIn);
                SeekDir(blockIn);

                FieldInfos fieldInfos = state.FieldInfos;
                int        numFields  = blockIn.ReadVInt32();
                for (int i = 0; i < numFields; i++)
                {
                    FieldInfo fieldInfo        = fieldInfos.FieldInfo(blockIn.ReadVInt32());
                    bool      hasFreq          = fieldInfo.IndexOptions != IndexOptions.DOCS_ONLY;
                    long      numTerms         = blockIn.ReadVInt64();
                    long      sumTotalTermFreq = hasFreq ? blockIn.ReadVInt64() : -1;
                    long      sumDocFreq       = blockIn.ReadVInt64();
                    int       docCount         = blockIn.ReadVInt32();
                    int       longsSize        = blockIn.ReadVInt32();
                    var       index            = new FST <Int64>(indexIn, PositiveInt32Outputs.Singleton);

                    var current = new TermsReader(this, fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
                    // LUCENENET NOTE: This simulates a put operation in Java,
                    // getting the prior value first before setting it.
                    fields.TryGetValue(fieldInfo.Name, out TermsReader previous);
                    fields[fieldInfo.Name] = current;
                    CheckFieldSummary(state.SegmentInfo, indexIn, blockIn, current, previous);
                }
                if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM)
                {
                    CodecUtil.CheckFooter(indexIn);
                }
                else
                {
#pragma warning disable 612, 618
                    CodecUtil.CheckEOF(indexIn);
#pragma warning restore 612, 618
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(indexIn, blockIn);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(indexIn, blockIn);
                }
            }
        }
        /// <summary>
        /// Sole constructor. </summary>
        public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode)
        {
            this.compressionMode = compressionMode;
            string segment = si.Name;
            bool   success = false;

            fieldInfos = fn;
            numDocs    = si.DocCount;
            ChecksumIndexInput indexStream = null;

            try
            {
                string indexStreamFN  = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
                string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION);
                // Load the index into memory
                indexStream = d.OpenChecksumInput(indexStreamFN, context);
                string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX;
                version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT);
                Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer());
                indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);

                long maxPointer = -1;

                if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM)
                {
                    maxPointer = indexStream.ReadVInt64();
                    CodecUtil.CheckFooter(indexStream);
                }
                else
                {
#pragma warning disable 612, 618
                    CodecUtil.CheckEOF(indexStream);
#pragma warning restore 612, 618
                }
                indexStream.Dispose();
                indexStream = null;

                // Open the data file and read metadata
                fieldsStream = d.OpenInput(fieldsStreamFN, context);
                if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM)
                {
                    if (maxPointer + CodecUtil.FooterLength() != fieldsStream.Length)
                    {
                        throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.Length);
                    }
                }
                else
                {
                    maxPointer = fieldsStream.Length;
                }
                this.maxPointer = maxPointer;
                string codecNameDat  = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT;
                int    fieldsVersion = CodecUtil.CheckHeader(fieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT);
                if (version != fieldsVersion)
                {
                    throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion);
                }
                Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == fieldsStream.GetFilePointer());

                if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS)
                {
                    chunkSize = fieldsStream.ReadVInt32();
                }
                else
                {
                    chunkSize = -1;
                }
                packedIntsVersion = fieldsStream.ReadVInt32();
                decompressor      = compressionMode.NewDecompressor();
                this.bytes        = new BytesRef();

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(this, indexStream);
                }
            }
        }
Ejemplo n.º 7
0
        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context)
        {
            string      fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
            IndexOutput output   = directory.CreateOutput(fileName, context);
            bool        success  = false;

            try
            {
                CodecUtil.WriteHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
                output.WriteVInt(infos.Size());
                foreach (FieldInfo fi in infos)
                {
                    FieldInfo.IndexOptions?indexOptions = fi.FieldIndexOptions;
                    sbyte bits = 0x0;
                    if (fi.HasVectors())
                    {
                        bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
                    }
                    if (fi.OmitsNorms())
                    {
                        bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
                    }
                    if (fi.HasPayloads())
                    {
                        bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
                    }
                    if (fi.Indexed)
                    {
                        bits |= Lucene46FieldInfosFormat.IS_INDEXED;
                        Debug.Assert(indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads());
                        if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY)
                        {
                            bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
                        }
                        else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                        {
                            bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
                        }
                        else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS)
                        {
                            bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
                        }
                    }
                    output.WriteString(fi.Name);
                    output.WriteVInt(fi.Number);
                    output.WriteByte(bits);

                    // pack the DV types in one byte
                    sbyte dv  = DocValuesByte(fi.DocValuesType);
                    sbyte nrm = DocValuesByte(fi.NormType);
                    Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0);
                    sbyte val = unchecked ((sbyte)(0xff & ((nrm << 4) | dv)));
                    output.WriteByte(val);
                    output.WriteLong(fi.DocValuesGen);
                    output.WriteStringStringMap(fi.Attributes());
                }
                CodecUtil.WriteFooter(output);
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(output);
                }
            }
        }
 public override void Init(IndexOutput termsOut)
 {
     CodecUtil.WriteHeader(termsOut, TERMS_CODEC, VERSION_CURRENT);
     termsOut.WriteVInt32(Lucene41PostingsFormat.BLOCK_SIZE);
 }
Ejemplo n.º 9
0
 private void WriteHeader(IndexOutput @out)
 {
     CodecUtil.WriteHeader(@out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT);
 }
Ejemplo n.º 10
0
        protected BinaryDictionary()
        {
            int[]      targetMapOffsets = null, targetMap = null;
            string[]   posDict          = null;
            string[]   inflFormDict     = null;
            string[]   inflTypeDict     = null;
            ByteBuffer buffer           = null;

            using (Stream mapIS = GetResource(TARGETMAP_FILENAME_SUFFIX))
            {
                DataInput @in = new InputStreamDataInput(mapIS);
                CodecUtil.CheckHeader(@in, TARGETMAP_HEADER, VERSION, VERSION);
                targetMap        = new int[@in.ReadVInt32()];
                targetMapOffsets = new int[@in.ReadVInt32()];
                int accum = 0, sourceId = 0;
                for (int ofs = 0; ofs < targetMap.Length; ofs++)
                {
                    int val = @in.ReadVInt32();
                    if ((val & 0x01) != 0)
                    {
                        targetMapOffsets[sourceId] = ofs;
                        sourceId++;
                    }
                    accum         += (int)((uint)val) >> 1;
                    targetMap[ofs] = accum;
                }
                if (sourceId + 1 != targetMapOffsets.Length)
                {
                    throw new IOException("targetMap file format broken");
                }
                targetMapOffsets[sourceId] = targetMap.Length;
            }

            using (Stream posIS = GetResource(POSDICT_FILENAME_SUFFIX))
            {
                DataInput @in = new InputStreamDataInput(posIS);
                CodecUtil.CheckHeader(@in, POSDICT_HEADER, VERSION, VERSION);
                int posSize = @in.ReadVInt32();
                posDict      = new string[posSize];
                inflTypeDict = new string[posSize];
                inflFormDict = new string[posSize];
                for (int j = 0; j < posSize; j++)
                {
                    posDict[j]      = @in.ReadString();
                    inflTypeDict[j] = @in.ReadString();
                    inflFormDict[j] = @in.ReadString();
                    // this is how we encode null inflections
                    if (inflTypeDict[j].Length == 0)
                    {
                        inflTypeDict[j] = null;
                    }
                    if (inflFormDict[j].Length == 0)
                    {
                        inflFormDict[j] = null;
                    }
                }
            }

            ByteBuffer tmpBuffer;

            using (Stream dictIS = GetResource(DICT_FILENAME_SUFFIX))
            {
                // no buffering here, as we load in one large buffer
                DataInput @in = new InputStreamDataInput(dictIS);
                CodecUtil.CheckHeader(@in, DICT_HEADER, VERSION, VERSION);
                int size = @in.ReadVInt32();
                tmpBuffer = ByteBuffer.Allocate(size); // AllocateDirect..?
                int read = dictIS.Read(tmpBuffer.Array, 0, size);
                if (read != size)
                {
                    throw new EndOfStreamException("Cannot read whole dictionary");
                }
            }
            buffer = tmpBuffer.AsReadOnlyBuffer();

            this.targetMap        = targetMap;
            this.targetMapOffsets = targetMapOffsets;
            this.posDict          = posDict;
            this.inflTypeDict     = inflTypeDict;
            this.inflFormDict     = inflFormDict;
            this.buffer           = buffer;
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Reads the snapshots information from the given <see cref="Directory"/>. This
        /// method can be used if the snapshots information is needed, however you
        /// cannot instantiate the deletion policy (because e.g., some other process
        /// keeps a lock on the snapshots directory).
        /// </summary>
        private void LoadPriorSnapshots()
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                long           genLoaded     = -1;
                Exception      ioe           = null; // LUCENENET: No need to cast to IOExcpetion
                IList <string> snapshotFiles = new JCG.List <string>();
                foreach (string file in dir.ListAll())
                {
                    if (file.StartsWith(SNAPSHOTS_PREFIX, StringComparison.Ordinal))
                    {
                        // LUCENENET: Optimized to not allocate a substring during the parse
                        long gen = Long.Parse(file, SNAPSHOTS_PREFIX.Length, file.Length - SNAPSHOTS_PREFIX.Length, radix: 10);
                        if (genLoaded == -1 || gen > genLoaded)
                        {
                            snapshotFiles.Add(file);
                            IDictionary <long, int> m = new Dictionary <long, int>();
                            IndexInput @in            = dir.OpenInput(file, IOContext.DEFAULT);
                            try
                            {
                                CodecUtil.CheckHeader(@in, CODEC_NAME, VERSION_START, VERSION_START);
                                int count = @in.ReadVInt32();
                                for (int i = 0; i < count; i++)
                                {
                                    long commitGen = @in.ReadVInt64();
                                    int  refCount  = @in.ReadVInt32();
                                    m[commitGen] = refCount;
                                }
                            }
                            catch (Exception ioe2) when(ioe2.IsIOException())
                            {
                                // Save first exception & throw in the end
                                if (ioe is null)
                                {
                                    ioe = ioe2;
                                }
                            }
                            finally
                            {
                                @in.Dispose();
                            }

                            genLoaded = gen;
                            m_refCounts.Clear();
                            m_refCounts.PutAll(m);
                        }
                    }
                }

                if (genLoaded == -1)
                {
                    // Nothing was loaded...
                    if (ioe != null)
                    {
                        // ... not for lack of trying:
                        ExceptionDispatchInfo.Capture(ioe).Throw(); // LUCENENET: Rethrow to preserve stack details from the original throw
                    }
                }
                else
                {
                    if (snapshotFiles.Count > 1)
                    {
                        // Remove any broken / old snapshot files:
                        string curFileName = SNAPSHOTS_PREFIX + genLoaded;
                        foreach (string file in snapshotFiles)
                        {
                            if (!curFileName.Equals(file, StringComparison.Ordinal))
                            {
                                dir.DeleteFile(file);
                            }
                        }
                    }
                    nextWriteGen = 1 + genLoaded;
                }
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }
Ejemplo n.º 12
0
        internal void Persist()
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                string      fileName = SNAPSHOTS_PREFIX + nextWriteGen;
                IndexOutput @out     = dir.CreateOutput(fileName, IOContext.DEFAULT);
                bool        success  = false;
                try
                {
                    CodecUtil.WriteHeader(@out, CODEC_NAME, VERSION_CURRENT);
                    @out.WriteVInt32(m_refCounts.Count);
                    foreach (KeyValuePair <long, int> ent in m_refCounts)
                    {
                        @out.WriteVInt64(ent.Key);
                        @out.WriteVInt32(ent.Value);
                    }
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        IOUtils.DisposeWhileHandlingException(@out);
                        try
                        {
                            dir.DeleteFile(fileName);
                        }
                        catch (Exception e) when(e.IsException())
                        {
                            // Suppress so we keep throwing original exception
                        }
                    }
                    else
                    {
                        IOUtils.Dispose(@out);
                    }
                }

                dir.Sync(/*Collections.singletonList(*/ new[] { fileName } /*)*/);

                if (nextWriteGen > 0)
                {
                    string lastSaveFile = SNAPSHOTS_PREFIX + (nextWriteGen - 1);
                    try
                    {
                        dir.DeleteFile(lastSaveFile);
                    }
                    catch (Exception ioe) when(ioe.IsIOException())
                    {
                        // OK: likely it didn't exist
                    }
                }

                nextWriteGen++;
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }
Ejemplo n.º 13
0
 protected override int ReadIndexHeader(IndexInput input)
 {
     return(CodecUtil.CheckHeader(input, APPENDING_TERMS_INDEX_CODEC_NAME,
                                  BlockTreeTermsWriter.VERSION_START,
                                  BlockTreeTermsWriter.VERSION_CURRENT));
 }
Ejemplo n.º 14
0
        /// <summary>
        /// Reads the snapshots information from the given <see cref="Directory"/>. This
        /// method can be used if the snapshots information is needed, however you
        /// cannot instantiate the deletion policy (because e.g., some other process
        /// keeps a lock on the snapshots directory).
        /// </summary>
        private void LoadPriorSnapshots()
        {
            lock (this)
            {
                long           genLoaded     = -1;
                IOException    ioe           = null;
                IList <string> snapshotFiles = new List <string>();
                foreach (string file in dir.ListAll())
                {
                    if (file.StartsWith(SNAPSHOTS_PREFIX, StringComparison.Ordinal))
                    {
                        long gen = Convert.ToInt64(file.Substring(SNAPSHOTS_PREFIX.Length), CultureInfo.InvariantCulture);
                        if (genLoaded == -1 || gen > genLoaded)
                        {
                            snapshotFiles.Add(file);
                            IDictionary <long, int> m = new Dictionary <long, int>();
                            IndexInput @in            = dir.OpenInput(file, IOContext.DEFAULT);
                            try
                            {
                                CodecUtil.CheckHeader(@in, CODEC_NAME, VERSION_START, VERSION_START);
                                int count = @in.ReadVInt32();
                                for (int i = 0; i < count; i++)
                                {
                                    long commitGen = @in.ReadVInt64();
                                    int  refCount  = @in.ReadVInt32();
                                    m[commitGen] = refCount;
                                }
                            }
                            catch (IOException ioe2)
                            {
                                // Save first exception & throw in the end
                                if (ioe == null)
                                {
                                    ioe = ioe2;
                                }
                            }
                            finally
                            {
                                @in.Dispose();
                            }

                            genLoaded = gen;
                            m_refCounts.Clear();
                            m_refCounts.PutAll(m);
                        }
                    }
                }

                if (genLoaded == -1)
                {
                    // Nothing was loaded...
                    if (ioe != null)
                    {
                        // ... not for lack of trying:
                        throw ioe;
                    }
                }
                else
                {
                    if (snapshotFiles.Count > 1)
                    {
                        // Remove any broken / old snapshot files:
                        string curFileName = SNAPSHOTS_PREFIX + genLoaded;
                        foreach (string file in snapshotFiles)
                        {
                            if (!curFileName.Equals(file, StringComparison.Ordinal))
                            {
                                dir.DeleteFile(file);
                            }
                        }
                    }
                    nextWriteGen = 1 + genLoaded;
                }
            }
        }
        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context)
        {
            string      fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION);
            IndexOutput output   = directory.CreateOutput(fileName, context);
            bool        success  = false;

            try
            {
                CodecUtil.WriteHeader(output, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_CURRENT);
                output.WriteVInt32(infos.Count);
                foreach (FieldInfo fi in infos)
                {
                    IndexOptions indexOptions = fi.IndexOptions;
                    sbyte        bits         = 0x0;
                    if (fi.HasVectors)
                    {
                        bits |= Lucene40FieldInfosFormat.STORE_TERMVECTOR;
                    }
                    if (fi.OmitsNorms)
                    {
                        bits |= Lucene40FieldInfosFormat.OMIT_NORMS;
                    }
                    if (fi.HasPayloads)
                    {
                        bits |= Lucene40FieldInfosFormat.STORE_PAYLOADS;
                    }
                    if (fi.IsIndexed)
                    {
                        bits |= Lucene40FieldInfosFormat.IS_INDEXED;
                        Debug.Assert(indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads);
                        if (indexOptions == IndexOptions.DOCS_ONLY)
                        {
                            bits |= Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
                        }
                        else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                        {
                            bits |= Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
                        }
                        else if (indexOptions == IndexOptions.DOCS_AND_FREQS)
                        {
                            bits |= Lucene40FieldInfosFormat.OMIT_POSITIONS;
                        }
                    }
                    output.WriteString(fi.Name);
                    output.WriteVInt32(fi.Number);
                    output.WriteByte((byte)bits);

                    // pack the DV types in one byte
                    byte dv  = DocValuesByte(fi.DocValuesType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY));
                    byte nrm = DocValuesByte(fi.NormType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY));
                    Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0);
                    var val = (byte)(0xff & ((nrm << 4) | (byte)dv));
                    output.WriteByte(val);
                    output.WriteStringStringMap(fi.Attributes);
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(output);
                }
            }
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Creates a postings writer with the specified PackedInts overhead ratio </summary>
        // TODO: does this ctor even make sense?
        public Lucene41PostingsWriter(SegmentWriteState state, float acceptableOverheadRatio)
            : base()
        {
            docOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), state.Context);
            IndexOutput posOut  = null;
            IndexOutput payOut  = null;
            bool        success = false;

            try
            {
                CodecUtil.WriteHeader(docOut, DOC_CODEC, VERSION_CURRENT);
                forUtil = new ForUtil(acceptableOverheadRatio, docOut);
                if (state.FieldInfos.HasProx)
                {
                    posDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE];
                    posOut         = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), state.Context);
                    CodecUtil.WriteHeader(posOut, POS_CODEC, VERSION_CURRENT);

                    if (state.FieldInfos.HasPayloads)
                    {
                        payloadBytes        = new byte[128];
                        payloadLengthBuffer = new int[ForUtil.MAX_DATA_SIZE];
                    }
                    else
                    {
                        payloadBytes        = null;
                        payloadLengthBuffer = null;
                    }

                    if (state.FieldInfos.HasOffsets)
                    {
                        offsetStartDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE];
                        offsetLengthBuffer     = new int[ForUtil.MAX_DATA_SIZE];
                    }
                    else
                    {
                        offsetStartDeltaBuffer = null;
                        offsetLengthBuffer     = null;
                    }

                    if (state.FieldInfos.HasPayloads || state.FieldInfos.HasOffsets)
                    {
                        payOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), state.Context);
                        CodecUtil.WriteHeader(payOut, PAY_CODEC, VERSION_CURRENT);
                    }
                }
                else
                {
                    posDeltaBuffer         = null;
                    payloadLengthBuffer    = null;
                    offsetStartDeltaBuffer = null;
                    offsetLengthBuffer     = null;
                    payloadBytes           = null;
                }
                this.payOut = payOut;
                this.posOut = posOut;
                success     = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(docOut, posOut, payOut);
                }
            }

            docDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE];
            freqBuffer     = new int[ForUtil.MAX_DATA_SIZE];

            // TODO: should we try skipping every 2/4 blocks...?
            skipWriter = new Lucene41SkipWriter(maxSkipLevels, Lucene41PostingsFormat.BLOCK_SIZE, state.SegmentInfo.DocCount, docOut, posOut, payOut);

            encoded = new byte[ForUtil.MAX_ENCODED_SIZE];
        }
Ejemplo n.º 17
0
        public override NumericDocValues GetNumeric(FieldInfo field)
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                if (!numericInstances.TryGetValue(field.Number, out NumericDocValues instance))
                {
                    string     fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat");
                    IndexInput input    = dir.OpenInput(fileName, state.Context);
                    bool       success  = false;
                    try
                    {
                        var type = field.GetAttribute(legacyKey).ToLegacyDocValuesType();

                        //switch (Enum.Parse(typeof(LegacyDocValuesType), field.GetAttribute(LegacyKey)))
                        //{
                        if (type == LegacyDocValuesType.VAR_INTS)
                        {
                            instance = LoadVarInt32sField(/* field, // LUCENENET: Never read */ input);
                        }
                        else if (type == LegacyDocValuesType.FIXED_INTS_8)
                        {
                            instance = LoadByteField(/* field, // LUCENENET: Never read */ input);
                        }
                        else if (type == LegacyDocValuesType.FIXED_INTS_16)
                        {
                            instance = LoadInt16Field(/* field, // LUCENENET: Never read */ input);
                        }
                        else if (type == LegacyDocValuesType.FIXED_INTS_32)
                        {
                            instance = LoadInt32Field(/* field, // LUCENENET: Never read */ input);
                        }
                        else if (type == LegacyDocValuesType.FIXED_INTS_64)
                        {
                            instance = LoadInt64Field(/* field, // LUCENENET: Never read */ input);
                        }
                        else if (type == LegacyDocValuesType.FLOAT_32)
                        {
                            instance = LoadSingleField(/* field, // LUCENENET: Never read */ input);
                        }
                        else if (type == LegacyDocValuesType.FLOAT_64)
                        {
                            instance = LoadDoubleField(/* field, // LUCENENET: Never read */ input);
                        }
                        else
                        {
                            throw AssertionError.Create();
                        }

                        CodecUtil.CheckEOF(input);
                        success = true;
                    }
                    finally
                    {
                        if (success)
                        {
                            IOUtils.Dispose(input);
                        }
                        else
                        {
                            IOUtils.DisposeWhileHandlingException(input);
                        }
                    }
                    numericInstances[field.Number] = instance;
                }
                return(instance);
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }
Ejemplo n.º 18
0
        //static final boolean TEST = false;

        public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
        {
            string termsIndexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION);
            string termsBlockFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);

            this.postingsReader = postingsReader;
            ChecksumIndexInput indexIn = null;
            IndexInput         blockIn = null;
            bool success = false;

            try
            {
                indexIn = state.Directory.OpenChecksumInput(termsIndexFileName, state.Context);
                blockIn = state.Directory.OpenInput(termsBlockFileName, state.Context);
                version = ReadHeader(indexIn);
                ReadHeader(blockIn);
                if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(blockIn);
                }

                this.postingsReader.Init(blockIn);
                SeekDir(blockIn);

                FieldInfos fieldInfos = state.FieldInfos;
                int        numFields  = blockIn.ReadVInt();
                for (int i = 0; i < numFields; i++)
                {
                    FieldInfo fieldInfo        = fieldInfos.FieldInfo(blockIn.ReadVInt());
                    bool      hasFreq          = fieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY;
                    long      numTerms         = blockIn.ReadVLong();
                    long      sumTotalTermFreq = hasFreq ? blockIn.ReadVLong() : -1;
                    long      sumDocFreq       = blockIn.ReadVLong();
                    int       docCount         = blockIn.ReadVInt();
                    int       longsSize        = blockIn.ReadVInt();
                    var       index            = new FST <long?>(indexIn, PositiveIntOutputs.Singleton);

                    var current = new TermsReader(this, fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);

                    var previous = fields[fieldInfo.Name] = current;
                    CheckFieldSummary(state.SegmentInfo, indexIn, blockIn, current, previous);
                }
                if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM)
                {
                    CodecUtil.CheckFooter(indexIn);
                }
                else
                {
                    CodecUtil.CheckEOF(indexIn);
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(indexIn, blockIn);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(indexIn, blockIn);
                }
            }
        }
Ejemplo n.º 19
0
        private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd)
        {
            field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.ToString());

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            /* values */

            long startPos = data.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream

            int valueCount = 0;

            foreach (BytesRef v in values)
            {
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
                valueCount++;
            }

            /* addresses */

            long maxAddress = data.Position - startPos; // LUCENENET specific: Renamed from getFilePointer() to match FileStream

            index.WriteInt64(maxAddress);

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(valueCount != int.MaxValue);                           // unsupported by the 4.0 impl
            }
            PackedInt32s.Writer w = PackedInt32s.GetWriter(index, valueCount + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT);
            long currentPosition  = 0;

            foreach (BytesRef v in values)
            {
                w.Add(currentPosition);
                currentPosition += v.Length;
            }
            // write sentinel
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(currentPosition == maxAddress);
            }
            w.Add(currentPosition);
            w.Finish();

            /* ordinals */

            int maxDoc = state.SegmentInfo.DocCount;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(valueCount > 0);
            }
            PackedInt32s.Writer ords = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT);
            foreach (long n in docToOrd)
            {
                ords.Add((long)n);
            }
            ords.Finish();
        }
Ejemplo n.º 20
0
        /// <summary>
        /// Sole constructor. </summary>
        public CompressingTermVectorsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode)
        {
            this.compressionMode = compressionMode;
            string segment = si.Name;
            bool   success = false;

            fieldInfos = fn;
            numDocs    = si.DocCount;
            ChecksumIndexInput indexStream = null;

            try
            {
                // Load the index into memory
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION);
                indexStream = d.OpenChecksumInput(indexStreamFN, context);
                string codecNameIdx = formatName + CompressingTermVectorsWriter.CODEC_SFX_IDX;
                version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.Position);                           // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                }
                indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);

                if (version >= CompressingTermVectorsWriter.VERSION_CHECKSUM)
                {
                    indexStream.ReadVInt64(); // the end of the data file
                    CodecUtil.CheckFooter(indexStream);
                }
                else
                {
#pragma warning disable 612, 618
                    CodecUtil.CheckEOF(indexStream);
#pragma warning restore 612, 618
                }
                indexStream.Dispose();
                indexStream = null;

                // Open the data file and read metadata
                string vectorsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_EXTENSION);
                vectorsStream = d.OpenInput(vectorsStreamFN, context);
                string codecNameDat = formatName + CompressingTermVectorsWriter.CODEC_SFX_DAT;
                int    version2     = CodecUtil.CheckHeader(vectorsStream, codecNameDat, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT);
                if (version != version2)
                {
                    throw RuntimeException.Create("Version mismatch between stored fields index and data: " + version + " != " + version2);
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.Position);                           // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                }
                packedIntsVersion = vectorsStream.ReadVInt32();
                chunkSize         = vectorsStream.ReadVInt32();
                decompressor      = compressionMode.NewDecompressor();
                this.reader       = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0);

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(this, indexStream);
                }
            }
        }
Ejemplo n.º 21
0
        /// <summary>
        /// expert: instantiates a new reader </summary>
        protected internal Lucene45DocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension)
        {
            string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
            // read in the entries from the metadata file.
            ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context);

            this.MaxDoc = state.SegmentInfo.DocCount;
            bool success = false;

            try
            {
                Version    = CodecUtil.CheckHeader(@in, metaCodec, Lucene45DocValuesFormat.VERSION_START, Lucene45DocValuesFormat.VERSION_CURRENT);
                Numerics   = new Dictionary <int, NumericEntry>();
                Ords       = new Dictionary <int, NumericEntry>();
                OrdIndexes = new Dictionary <int, NumericEntry>();
                Binaries   = new Dictionary <int, BinaryEntry>();
                SortedSets = new Dictionary <int, SortedSetEntry>();
                ReadFields(@in, state.FieldInfos);

                if (Version >= Lucene45DocValuesFormat.VERSION_CHECKSUM)
                {
                    CodecUtil.CheckFooter(@in);
                }
                else
                {
                    CodecUtil.CheckEOF(@in);
                }

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(@in);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(@in);
                }
            }

            success = false;
            try
            {
                string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension);
                Data = state.Directory.OpenInput(dataName, state.Context);
                int version2 = CodecUtil.CheckHeader(Data, dataCodec, Lucene45DocValuesFormat.VERSION_START, Lucene45DocValuesFormat.VERSION_CURRENT);
                if (Version != version2)
                {
                    throw new Exception("Format versions mismatch");
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(this.Data);
                }
            }

            RamBytesUsed_Renamed = new AtomicLong(RamUsageEstimator.ShallowSizeOfInstance(this.GetType()));
        }
Ejemplo n.º 22
0
 private int ReadHeader(IndexInput @in)
 {
     return(CodecUtil.CheckHeader(@in, FSTTermsWriter.TERMS_CODEC_NAME, FSTTermsWriter.TERMS_VERSION_START, FSTTermsWriter.TERMS_VERSION_CURRENT));
 }
Ejemplo n.º 23
0
        public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext context)
        {
            string             fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
            ChecksumIndexInput input    = directory.OpenChecksumInput(fileName, context);

            bool success = false;

            try
            {
                int codecVersion = CodecUtil.CheckHeader(input, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_START, Lucene46FieldInfosFormat.FORMAT_CURRENT);

                int         size  = input.ReadVInt32(); //read in the size
                FieldInfo[] infos = new FieldInfo[size];

                for (int i = 0; i < size; i++)
                {
                    string       name            = input.ReadString();
                    int          fieldNumber     = input.ReadVInt32();
                    byte         bits            = input.ReadByte();
                    bool         isIndexed       = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0;
                    bool         storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0;
                    bool         omitNorms       = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0;
                    bool         storePayloads   = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0;
                    IndexOptions indexOptions;
                    if (!isIndexed)
                    {
                        indexOptions = IndexOptions.NONE;
                    }
                    else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0)
                    {
                        indexOptions = IndexOptions.DOCS_ONLY;
                    }
                    else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0)
                    {
                        indexOptions = IndexOptions.DOCS_AND_FREQS;
                    }
                    else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0)
                    {
                        indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                    }
                    else
                    {
                        indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    }

                    // DV Types are packed in one byte
                    byte          val                       = input.ReadByte();
                    DocValuesType docValuesType             = GetDocValuesType(input, (sbyte)(val & 0x0F));
                    DocValuesType normsType                 = GetDocValuesType(input, (sbyte)(((int)((uint)val >> 4)) & 0x0F));
                    long          dvGen                     = input.ReadInt64();
                    IDictionary <string, string> attributes = input.ReadStringStringMap();
                    infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.UnmodifiableMap(attributes));
                    infos[i].DocValuesGen = dvGen;
                }

                if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM)
                {
                    CodecUtil.CheckFooter(input);
                }
                else
                {
#pragma warning disable 612, 618
                    CodecUtil.CheckEOF(input);
#pragma warning restore 612, 618
                }
                FieldInfos fieldInfos = new FieldInfos(infos);
                success = true;
                return(fieldInfos);
            }
            finally
            {
                if (success)
                {
                    input.Dispose();
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(input);
                }
            }
        }
        /// <summary>
        /// Helper method that reads CFS entries from an input stream </summary>
        private static IDictionary <string, FileEntry> ReadEntries(IndexInputSlicer handle, Directory dir, string name)
        {
            IOException        priorE        = null;
            IndexInput         stream        = null;
            ChecksumIndexInput entriesStream = null;

            // read the first VInt. If it is negative, it's the version number
            // otherwise it's the count (pre-3.1 indexes)
            try
            {
                IDictionary <string, FileEntry> mapping;
#pragma warning disable 612, 618
                stream = handle.OpenFullSlice();
#pragma warning restore 612, 618
                int firstInt = stream.ReadVInt32();
                // impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible
                // and separate norms/etc are outside of cfs.
                if (firstInt == CODEC_MAGIC_BYTE1)
                {
                    sbyte secondByte = (sbyte)stream.ReadByte();
                    sbyte thirdByte  = (sbyte)stream.ReadByte();
                    sbyte fourthByte = (sbyte)stream.ReadByte();
                    if (secondByte != CODEC_MAGIC_BYTE2 || thirdByte != CODEC_MAGIC_BYTE3 || fourthByte != CODEC_MAGIC_BYTE4)
                    {
                        throw new CorruptIndexException("Illegal/impossible header for CFS file: " + secondByte + "," + thirdByte + "," + fourthByte);
                    }
                    int    version         = CodecUtil.CheckHeaderNoMagic(stream, CompoundFileWriter.DATA_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT);
                    string entriesFileName = IndexFileNames.SegmentFileName(
                        IndexFileNames.StripExtension(name), "",
                        IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
                    entriesStream = dir.OpenChecksumInput(entriesFileName, IOContext.READ_ONCE);
                    CodecUtil.CheckHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT);
                    int numEntries = entriesStream.ReadVInt32();
                    mapping = new Dictionary <string, FileEntry>(numEntries);
                    for (int i = 0; i < numEntries; i++)
                    {
                        FileEntry fileEntry = new FileEntry();
                        string    id        = entriesStream.ReadString();
                        FileEntry previous  = mapping.Put(id, fileEntry);
                        if (previous != null)
                        {
                            throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + entriesStream);
                        }
                        fileEntry.Offset = entriesStream.ReadInt64();
                        fileEntry.Length = entriesStream.ReadInt64();
                    }
                    if (version >= CompoundFileWriter.VERSION_CHECKSUM)
                    {
                        CodecUtil.CheckFooter(entriesStream);
                    }
                    else
                    {
#pragma warning disable 612, 618
                        CodecUtil.CheckEOF(entriesStream);
#pragma warning restore 612, 618
                    }
                }
                else
                {
                    // TODO remove once 3.x is not supported anymore
                    mapping = ReadLegacyEntries(stream, firstInt);
                }
                return(mapping);
            }
            catch (IOException ioe)
            {
                priorE = ioe;
            }
            finally
            {
                IOUtils.DisposeWhileHandlingException(priorE, stream, entriesStream);
            }
            // this is needed until Java 7's real try-with-resources:
            throw new InvalidOperationException("impossible to get here");
        }
Ejemplo n.º 25
0
        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context)
        {
            string      fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
            IndexOutput output   = directory.CreateOutput(fileName, context);
            bool        success  = false;

            try
            {
                CodecUtil.WriteHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
                output.WriteVInt32(infos.Count);
                foreach (FieldInfo fi in infos)
                {
                    IndexOptions indexOptions = fi.IndexOptions;
                    sbyte        bits         = 0x0;
                    if (fi.HasVectors)
                    {
                        bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
                    }
                    if (fi.OmitsNorms)
                    {
                        bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
                    }
                    if (fi.HasPayloads)
                    {
                        bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
                    }
                    if (fi.IsIndexed)
                    {
                        bits |= Lucene46FieldInfosFormat.IS_INDEXED;
                        // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare()
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(IndexOptionsComparer.Default.Compare(indexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads);
                        }
                        if (indexOptions == IndexOptions.DOCS_ONLY)
                        {
                            bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
                        }
                        else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                        {
                            bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
                        }
                        else if (indexOptions == IndexOptions.DOCS_AND_FREQS)
                        {
                            bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
                        }
                    }
                    output.WriteString(fi.Name);
                    output.WriteVInt32(fi.Number);
                    output.WriteByte((byte)bits);

                    // pack the DV types in one byte
                    var dv  = DocValuesByte(fi.DocValuesType);
                    var nrm = DocValuesByte(fi.NormType);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0);
                    }
                    var val = (byte)(0xff & ((nrm << 4) | (byte)dv));
                    output.WriteByte(val);
                    output.WriteInt64(fi.DocValuesGen);
                    output.WriteStringStringMap(fi.Attributes);
                }
                CodecUtil.WriteFooter(output);
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(output);
                }
            }
        }
Ejemplo n.º 26
0
        public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext)
        {
            string     fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION);
            IndexInput input    = directory.OpenInput(fileName, iocontext);

            bool success = false;

            try
            {
                CodecUtil.CheckHeader(input, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_START, Lucene40FieldInfosFormat.FORMAT_CURRENT);

                int         size  = input.ReadVInt32(); //read in the size
                FieldInfo[] infos = new FieldInfo[size];

                for (int i = 0; i < size; i++)
                {
                    string       name            = input.ReadString();
                    int          fieldNumber     = input.ReadVInt32();
                    byte         bits            = input.ReadByte();
                    bool         isIndexed       = (bits & Lucene40FieldInfosFormat.IS_INDEXED) != 0;
                    bool         storeTermVector = (bits & Lucene40FieldInfosFormat.STORE_TERMVECTOR) != 0;
                    bool         omitNorms       = (bits & Lucene40FieldInfosFormat.OMIT_NORMS) != 0;
                    bool         storePayloads   = (bits & Lucene40FieldInfosFormat.STORE_PAYLOADS) != 0;
                    IndexOptions indexOptions;
                    if (!isIndexed)
                    {
                        indexOptions = IndexOptions.NONE;
                    }
                    else if ((bits & Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0)
                    {
                        indexOptions = IndexOptions.DOCS_ONLY;
                    }
                    else if ((bits & Lucene40FieldInfosFormat.OMIT_POSITIONS) != 0)
                    {
                        indexOptions = IndexOptions.DOCS_AND_FREQS;
                    }
                    else if ((bits & Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0)
                    {
                        indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                    }
                    else
                    {
                        indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    }

                    // LUCENE-3027: past indices were able to write
                    // storePayloads=true when omitTFAP is also true,
                    // which is invalid.  We correct that, here:
                    if (isIndexed && indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0)
                    {
                        storePayloads = false;
                    }
                    // DV Types are packed in one byte
                    byte val = input.ReadByte();
                    LegacyDocValuesType          oldValuesType = GetDocValuesType((sbyte)(val & 0x0F));
                    LegacyDocValuesType          oldNormsType  = GetDocValuesType((sbyte)(((int)((uint)val >> 4)) & 0x0F));
                    IDictionary <string, string> attributes    = input.ReadStringStringMap();
                    if (oldValuesType.GetMapping() != DocValuesType.NONE)
                    {
                        attributes[LEGACY_DV_TYPE_KEY] = oldValuesType.ToString();
                    }
                    if (oldNormsType.GetMapping() != DocValuesType.NONE)
                    {
                        if (oldNormsType.GetMapping() != DocValuesType.NUMERIC)
                        {
                            throw new CorruptIndexException("invalid norm type: " + oldNormsType + " (resource=" + input + ")");
                        }
                        attributes[LEGACY_NORM_TYPE_KEY] = oldNormsType.ToString();
                    }
                    infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, oldValuesType.GetMapping(), oldNormsType.GetMapping(), attributes);
                }
                CodecUtil.CheckEOF(input);
                FieldInfos fieldInfos = new FieldInfos(infos);
                success = true;
                return(fieldInfos);
            }
            finally
            {
                if (success)
                {
                    input.Dispose();
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
            }
        }
Ejemplo n.º 27
0
        public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, string segment, int indexDivisor,
                                        IComparer <BytesRef> termComp, string segmentSuffix, IOContext context)
        {
            this.termComp = termComp;

            Debug.Assert(indexDivisor == -1 || indexDivisor > 0);

            input = dir.OpenInput(IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context);

            bool success = false;

            try
            {
                version = ReadHeader(input);

                if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(input);
                }

                indexInterval = input.ReadInt32();
                if (indexInterval < 1)
                {
                    throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + input + ")");
                }
                this.indexDivisor = indexDivisor;

                if (indexDivisor < 0)
                {
                    totalIndexInterval = indexInterval;
                }
                else
                {
                    // In case terms index gets loaded, later, on demand
                    totalIndexInterval = indexInterval * indexDivisor;
                }
                Debug.Assert(totalIndexInterval > 0);

                SeekDir(input, dirOffset);

                // Read directory
                int numFields = input.ReadVInt32();
                if (numFields < 0)
                {
                    throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + input + ")");
                }
                //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields);
                for (int i = 0; i < numFields; i++)
                {
                    int field         = input.ReadVInt32();
                    int numIndexTerms = input.ReadVInt32();
                    if (numIndexTerms < 0)
                    {
                        throw new CorruptIndexException("invalid numIndexTerms: " + numIndexTerms + " (resource=" + input + ")");
                    }
                    long termsStart         = input.ReadVInt64();
                    long indexStart         = input.ReadVInt64();
                    long packedIndexStart   = input.ReadVInt64();
                    long packedOffsetsStart = input.ReadVInt64();
                    if (packedIndexStart < indexStart)
                    {
                        throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms + " (resource=" + input + ")");
                    }
                    FieldInfo      fieldInfo = fieldInfos.FieldInfo(field);
                    FieldIndexData previous  = fields.Put(fieldInfo, new FieldIndexData(this, fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
                    if (previous != null)
                    {
                        throw new CorruptIndexException("duplicate field: " + fieldInfo.Name + " (resource=" + input + ")");
                    }
                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
                if (indexDivisor > 0)
                {
                    input.Dispose();
                    input = null;
                    if (success)
                    {
                        indexLoaded = true;
                    }
                    termBytesReader = termBytes.Freeze(true);
                }
            }
        }
Ejemplo n.º 28
0
        internal Lucene42DocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension)
        {
            maxDoc = state.SegmentInfo.DocCount;
            string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
            // read in the entries from the metadata file.
            ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context);
            bool success           = false;

            ramBytesUsed = new AtomicInt64(RamUsageEstimator.ShallowSizeOfInstance(this.GetType()));
            try
            {
                version  = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT);
                numerics = new Dictionary <int, NumericEntry>();
                binaries = new Dictionary <int, BinaryEntry>();
                fsts     = new Dictionary <int, FSTEntry>();
                ReadFields(@in /*, state.FieldInfos // LUCENENET: Never read */);

                if (version >= VERSION_CHECKSUM)
                {
                    CodecUtil.CheckFooter(@in);
                }
                else
                {
#pragma warning disable 612, 618
                    CodecUtil.CheckEOF(@in);
#pragma warning restore 612, 618
                }

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(@in);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(@in);
                }
            }

            success = false;
            try
            {
                string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension);
                data = state.Directory.OpenInput(dataName, state.Context);
                int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
                if (version != version2)
                {
                    throw new CorruptIndexException("Format versions mismatch");
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(this.data);
                }
            }
        }
Ejemplo n.º 29
0
        internal MemoryDocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension,
                                         string metaCodec, string metaExtension)
        {
            maxDoc = state.SegmentInfo.DocCount;
            string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
            // read in the entries from the metadata file.
            ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context);
            bool success           = false;

            try
            {
                version  = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT);
                numerics = new Dictionary <>();
                binaries = new Dictionary <>();
                fsts     = new Dictionary <>();
                ReadFields(@in, state.FieldInfos);
                if (version >= VERSION_CHECKSUM)
                {
                    CodecUtil.CheckFooter(@in);
                }
                else
                {
                    CodecUtil.CheckEOF(@in);
                }
                ramBytesUsed_Renamed = new AtomicLong(RamUsageEstimator.ShallowSizeOfInstance(this.GetType()));
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(@in);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(@in);
                }
            }

            success = false;
            try
            {
                string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix,
                                                                 dataExtension);
                data = state.Directory.OpenInput(dataName, state.Context);

                int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
                if (version != version2)
                {
                    throw new CorruptIndexException("Format versions mismatch");
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(this.data);
                }
            }
        }