public static void ReadLegacyInfos(SegmentInfos infos, Directory directory, IndexInput input, int format)
        {
            infos.Version = input.ReadLong(); // read version
            infos.Counter = input.ReadInt(); // read counter
            Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader();
            for (int i = input.ReadInt(); i > 0; i--) // read segmentInfos
            {
                SegmentCommitInfo siPerCommit = reader.ReadLegacySegmentInfo(directory, format, input);
                SegmentInfo si = siPerCommit.Info;

                if (si.Version == null)
                {
                    // Could be a 3.0 - try to open the doc stores - if it fails, it's a
                    // 2.x segment, and an IndexFormatTooOldException will be thrown,
                    // which is what we want.
                    Directory dir = directory;
                    if (Lucene3xSegmentInfoFormat.GetDocStoreOffset(si) != -1)
                    {
                        if (Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si))
                        {
                            dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(Lucene3xSegmentInfoFormat.GetDocStoreSegment(si), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), IOContext.READONCE, false);
                        }
                    }
                    else if (si.UseCompoundFile)
                    {
                        dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE, false);
                    }

                    try
                    {
                        Lucene3xStoredFieldsReader.CheckCodeVersion(dir, Lucene3xSegmentInfoFormat.GetDocStoreSegment(si));
                    }
                    finally
                    {
                        // If we opened the directory, close it
                        if (dir != directory)
                        {
                            dir.Dispose();
                        }
                    }

                    // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next
                    // time the segment is read, its version won't be null and we won't
                    // need to open FieldsReader every time for each such segment.
                    si.Version = "3.0";
                }
                else if (si.Version.Equals("2.x"))
                {
                    // If it's a 3x index touched by 3.1+ code, then segments record their
                    // version, whether they are 2.x ones or not. We detect that and throw
                    // appropriate exception.
                    throw new IndexFormatTooOldException("segment " + si.Name + " in resource " + input, si.Version);
                }
                infos.Add(siPerCommit);
            }

            infos.UserData = input.ReadStringStringMap();
        }
예제 #2
0
        public SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
        {
            Input = i;
            FieldInfos = fis;
            IsIndex = isi;
            MaxSkipLevels = 1; // use single-level skip lists for formats > -3

            int firstInt = Input.ReadInt();
            if (firstInt >= 0)
            {
                // original-format file, without explicit format version number
                Format = 0;
                Size = firstInt;

                // back-compatible settings
                IndexInterval = 128;
                SkipInterval = int.MaxValue; // switch off skipTo optimization
            }
            else
            {
                // we have a format version number
                Format = firstInt;

                // check that it is a format we can understand
                if (Format > FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(Input, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }
                if (Format < FORMAT_CURRENT)
                {
                    throw new IndexFormatTooNewException(Input, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }

                Size = Input.ReadLong(); // read the size

                IndexInterval = Input.ReadInt();
                SkipInterval = Input.ReadInt();
                MaxSkipLevels = Input.ReadInt();
                Debug.Assert(IndexInterval > 0, "indexInterval=" + IndexInterval + " is negative; must be > 0");
                Debug.Assert(SkipInterval > 0, "skipInterval=" + SkipInterval + " is negative; must be > 0");
            }
        }
        public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
            IComparer<BytesRef> termComp, String segmentSuffix, IOContext context)
        {
            _termComp = termComp;

            Debug.Assert(indexDivisor == -1 || indexDivisor > 0);

            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(segment, segmentSuffix,
                        FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION),
                    context);

            var success = false;

            try
            {

                _version = ReadHeader(_input);

                if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
                    CodecUtil.ChecksumEntireFile(_input);
                
                indexInterval = _input.ReadInt();
                
                if (indexInterval < 1)
                {
                    throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}",
                        indexInterval, _input));
                }

                _indexDivisor = indexDivisor;

                if (indexDivisor < 0)
                {
                    _totalIndexInterval = indexInterval;
                }
                else
                {
                    // In case terms index gets loaded, later, on demand
                    _totalIndexInterval = indexInterval*indexDivisor;
                }

                Debug.Assert(_totalIndexInterval > 0);

                SeekDir(_input, _dirOffset);

                // Read directory
                int numFields = _input.ReadVInt();

                if (numFields < 0)
                    throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields,
                        _input));

                for (int i = 0; i < numFields; i++)
                {
                    int field = _input.ReadVInt();
                    int numIndexTerms = _input.ReadVInt();
                    if (numIndexTerms < 0)
                        throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}",
                            numIndexTerms,
                            _input));

                    long termsStart = _input.ReadVLong();
                    long indexStart = _input.ReadVLong();
                    long packedIndexStart = _input.ReadVLong();
                    long packedOffsetsStart = _input.ReadVLong();

                    if (packedIndexStart < indexStart)
                        throw new CorruptIndexException(
                            String.Format(
                                "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}",
                                packedIndexStart,
                                indexStart, numIndexTerms, _input));

                    FieldInfo fieldInfo = fieldInfos.FieldInfo(field);

                    try
                    {
                        _fields.Add(fieldInfo,
                            new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart,
                                packedOffsetsStart, this));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}",
                            fieldInfo.Name,
                            _input));
                    }


                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(_input);
                }
                if (indexDivisor > 0)
                {
                    _input.Dispose();
                    _input = null;
                    if (success)
                        _indexLoaded = true;

                    _termBytesReader = _termBytes.Freeze(true);
                }
            }
        }
예제 #4
0
        public override void Init(IndexInput termsIn)
        {
            // Make sure we are talking to the matching past writer
            CodecUtil.CheckHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT);

            SkipInterval = termsIn.ReadInt();
            MaxSkipLevels = termsIn.ReadInt();
            SkipMinimum = termsIn.ReadInt();
        }
        public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context)
        {
            string segment = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si);
            int docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si);
            int size = si.DocCount;
            bool success = false;
            FieldInfos = fn;
            try
            {
                if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si))
                {
                    d = StoreCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
                }
                else
                {
                    StoreCFSReader = null;
                }
                FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", FIELDS_EXTENSION), context);
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
                IndexStream = d.OpenInput(indexStreamFN, context);

                Format = IndexStream.ReadInt();

                if (Format < FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }
                if (Format > FORMAT_CURRENT)
                {
                    throw new IndexFormatTooNewException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }

                long indexSize = IndexStream.Length() - FORMAT_SIZE;

                if (docStoreOffset != -1)
                {
                    // We read only a slice out of this shared fields file
                    this.DocStoreOffset = docStoreOffset;
                    this.Size = size;

                    // Verify the file is long enough to hold all of our
                    // docs
                    Debug.Assert(((int)(indexSize / 8)) >= size + this.DocStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }
                else
                {
                    this.DocStoreOffset = 0;
                    this.Size = (int)(indexSize >> 3);
                    // Verify two sources of "maxDoc" agree:
                    if (this.Size != si.DocCount)
                    {
                        throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size + " but segmentInfo shows " + si.DocCount);
                    }
                }
                NumTotalDocs = (int)(indexSize >> 3);
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // keep our original exception
                    catch (Exception t)
                    {
                    }
                }
            }
        }
        /// <summary>
        /// reads from legacy 3.x segments_N </summary>
        private SegmentCommitInfo ReadLegacySegmentInfo(Directory dir, int format, IndexInput input)
        {
            // check that it is a format we can understand
            if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS)
            {
                throw new IndexFormatTooOldException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1);
            }
            if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1)
            {
                throw new IndexFormatTooNewException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1);
            }
            string version;
            if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1)
            {
                version = input.ReadString();
            }
            else
            {
                version = null;
            }

            string name = input.ReadString();

            int docCount = input.ReadInt();
            long delGen = input.ReadLong();

            int docStoreOffset = input.ReadInt();
            IDictionary<string, string> attributes = new Dictionary<string, string>();

            // parse the docstore stuff and shove it into attributes
            string docStoreSegment;
            bool docStoreIsCompoundFile;
            if (docStoreOffset != -1)
            {
                docStoreSegment = input.ReadString();
                docStoreIsCompoundFile = input.ReadByte() == SegmentInfo.YES;
                attributes[Lucene3xSegmentInfoFormat.DS_OFFSET_KEY] = Convert.ToString(docStoreOffset);
                attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = docStoreSegment;
                attributes[Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY] = Convert.ToString(docStoreIsCompoundFile);
            }
            else
            {
                docStoreSegment = name;
                docStoreIsCompoundFile = false;
            }

            // pre-4.0 indexes write a byte if there is a single norms file
            byte b = input.ReadByte();

            //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format);

            Debug.Assert(1 == b, "expected 1 but was: " + b + " format: " + format);
            int numNormGen = input.ReadInt();
            IDictionary<int, long> normGen;
            if (numNormGen == SegmentInfo.NO)
            {
                normGen = null;
            }
            else
            {
                normGen = new Dictionary<int, long>();
                for (int j = 0; j < numNormGen; j++)
                {
                    normGen[j] = input.ReadLong();
                }
            }
            bool isCompoundFile = input.ReadByte() == SegmentInfo.YES;

            int delCount = input.ReadInt();
            Debug.Assert(delCount <= docCount);

            bool hasProx = input.ReadByte() == 1;

            IDictionary<string, string> diagnostics = input.ReadStringStringMap();

            if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS)
            {
                // NOTE: unused
                int hasVectors = input.ReadByte();
            }

            // Replicate logic from 3.x's SegmentInfo.files():
            ISet<string> files = new HashSet<string>();
            if (isCompoundFile)
            {
                files.Add(IndexFileNames.SegmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
            }
            else
            {
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION));
            }

            if (docStoreOffset != -1)
            {
                if (docStoreIsCompoundFile)
                {
                    files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
                }
                else
                {
                    files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
                    files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
                    AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
                    AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
                    AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
                }
            }
            else if (!isCompoundFile)
            {
                files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
                files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
            }

            // parse the normgen stuff and shove it into attributes
            if (normGen != null)
            {
                attributes[Lucene3xSegmentInfoFormat.NORMGEN_KEY] = Convert.ToString(numNormGen);
                foreach (KeyValuePair<int, long> ent in normGen)
                {
                    long gen = ent.Value;
                    if (gen >= SegmentInfo.YES)
                    {
                        // Definitely a separate norm file, with generation:
                        files.Add(IndexFileNames.FileNameFromGeneration(name, "s" + ent.Key, gen));
                        attributes[Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.Key] = Convert.ToString(gen);
                    }
                    else if (gen == SegmentInfo.NO)
                    {
                        // No separate norm
                    }
                    else
                    {
                        // We should have already hit indexformat too old exception
                        Debug.Assert(false);
                    }
                }
            }

            SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, CollectionsHelper.UnmodifiableMap(attributes));
            info.Files = files;

            SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1);
            return infoPerCommit;
        }
        private SegmentInfo ReadUpgradedSegmentInfo(string name, Directory dir, IndexInput input)
        {
            CodecUtil.CheckHeader(input, Lucene3xSegmentInfoFormat.UPGRADED_SI_CODEC_NAME, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_START, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_CURRENT);
            string version = input.ReadString();

            int docCount = input.ReadInt();

            IDictionary<string, string> attributes = input.ReadStringStringMap();

            bool isCompoundFile = input.ReadByte() == SegmentInfo.YES;

            IDictionary<string, string> diagnostics = input.ReadStringStringMap();

            ISet<string> files = input.ReadStringSet();

            SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, CollectionsHelper.UnmodifiableMap(attributes));
            info.Files = files;
            return info;
        }
 private NumericDocValues LoadDoubleField(FieldInfo field, IndexInput input)
 {
     CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME, Lucene40DocValuesFormat.FLOATS_VERSION_START, Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT);
     int valueSize = input.ReadInt();
     if (valueSize != 8)
     {
         throw new CorruptIndexException("invalid valueSize: " + valueSize);
     }
     int maxDoc = State.SegmentInfo.DocCount;
     long[] values = new long[maxDoc];
     for (int i = 0; i < values.Length; i++)
     {
         values[i] = input.ReadLong();
     }
     RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values));
     return new NumericDocValuesAnonymousInnerClassHelper8(this, values);
 }
        private SortedDocValues LoadBytesFixedSorted(FieldInfo field, IndexInput data, IndexInput index)
        {
            CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
            CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);

            int fixedLength = data.ReadInt();
            int valueCount = index.ReadInt();

            PagedBytes bytes = new PagedBytes(16);
            bytes.Copy(data, fixedLength * (long)valueCount);
            PagedBytes.Reader bytesReader = bytes.Freeze(true);
            PackedInts.Reader reader = PackedInts.GetReader(index);
            RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed());

            return CorrectBuggyOrds(new SortedDocValuesAnonymousInnerClassHelper(this, fixedLength, valueCount, bytesReader, reader));
        }
예제 #10
0
        /// <summary>Returns a <see cref="Status" /> instance detailing
        /// the state of the index.
        ///
        /// </summary>
        /// <param name="onlySegments">list of specific segment names to check
        ///
        /// <p/>As this method checks every byte in the specified
        /// segments, on a large index it can take quite a long
        /// time to run.
        ///
        /// <p/><b>WARNING</b>: make sure
        /// you only call this when the index is not opened by any
        /// writer.
        /// </param>
        public virtual Status CheckIndex_Renamed_Method(List <string> onlySegments)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis    = new SegmentInfos();
            Status       result = new Status();

            result.dir = dir;
            try
            {
                sis.Read(dir);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read any segments file in directory");
                result.missingSegments = true;
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                return(result);
            }

            int        numSegments      = sis.Count;
            var        segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.cantOpenSegments = true;
                return(result);
            }
            int format = 0;

            try
            {
                format = input.ReadInt();
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.missingSegmentVersion = true;
                return(result);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else
            {
                if (format == SegmentInfos.FORMAT_CHECKSUM)
                {
                    sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_DEL_COUNT)
                {
                    sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_HAS_PROX)
                {
                    sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_USER_DATA)
                {
                    sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
                }
                else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
                }
                else if (format < SegmentInfos.CURRENT_FORMAT)
                {
                    sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                    skip    = true;
                }
                else
                {
                    sFormat = format + " [Lucene 1.3 or prior]";
                }
            }

            result.segmentsFileName = segmentsFileName;
            result.numSegments      = numSegments;
            result.segmentFormat    = sFormat;
            result.userData         = sis.UserData;
            System.String userDataString;
            if (sis.UserData.Count > 0)
            {
                userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData);
            }
            else
            {
                userDataString = "";
            }

            Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                }
                foreach (string s in onlySegments)
                {
                    if (infoStream != null)
                    {
                        infoStream.Write(" " + s);
                    }
                }
                result.segmentsChecked.AddRange(onlySegments);
                Msg(":");
            }

            if (skip)
            {
                Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.toolOutOfDate = true;
                return(result);
            }


            result.newSegments = (SegmentInfos)sis.Clone();
            result.newSegments.Clear();

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                if (onlySegments != null && !onlySegments.Contains(info.name))
                {
                    continue;
                }
                var segInfoStat = new Status.SegmentInfoStatus();
                result.segmentInfos.Add(segInfoStat);
                Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                segInfoStat.name     = info.name;
                segInfoStat.docCount = info.docCount;

                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    Msg("    compound=" + info.GetUseCompoundFile());
                    segInfoStat.compound = info.GetUseCompoundFile();
                    Msg("    hasProx=" + info.HasProx);
                    segInfoStat.hasProx = info.HasProx;
                    Msg("    numFiles=" + info.Files().Count);
                    segInfoStat.numFiles = info.Files().Count;
                    Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    IDictionary <string, string> diagnostics = info.Diagnostics;
                    segInfoStat.diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg("    diagnostics = " + CollectionsHelper.CollectionToString(diagnostics));
                    }

                    int docStoreOffset = info.DocStoreOffset;
                    if (docStoreOffset != -1)
                    {
                        Msg("    docStoreOffset=" + docStoreOffset);
                        segInfoStat.docStoreOffset = docStoreOffset;
                        Msg("    docStoreSegment=" + info.DocStoreSegment);
                        segInfoStat.docStoreSegment = info.DocStoreSegment;
                        Msg("    docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile);
                        segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile;
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        Msg("    no deletions");
                        segInfoStat.hasDeletions = false;
                    }
                    else
                    {
                        Msg("    has deletions [delFileName=" + delFileName + "]");
                        segInfoStat.hasDeletions      = true;
                        segInfoStat.deletionsFileName = delFileName;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);

                    segInfoStat.openReaderPassed = true;

                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions)
                    {
                        if (reader.deletedDocs.Count() != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (reader.deletedDocs.Count() > reader.MaxDoc)
                        {
                            throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (info.docCount - numDocs != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        segInfoStat.numDeleted = info.docCount - numDocs;
                        Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.GetDelCount() != 0)
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        Msg("OK");
                    }
                    if (reader.MaxDoc != info.docCount)
                    {
                        throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount);
                    }

                    // Test getFieldNames()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    Msg("OK [" + fieldNames.Count + " fields]");
                    segInfoStat.numFields = fieldNames.Count;

                    // Test Field Norms
                    segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);

                    // Test the Term Index
                    segInfoStat.termIndexStatus = TestTermIndex(info, reader);

                    // Test Stored Fields
                    segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);

                    // Test Term Vectors
                    segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);

                    // Rethrow the first exception we encountered
                    //  This will cause stats for failed segments to be incremented properly
                    if (segInfoStat.fieldNormStatus.error != null)
                    {
                        throw new SystemException("Field Norm test failed");
                    }
                    else if (segInfoStat.termIndexStatus.error != null)
                    {
                        throw new SystemException("Term Index test failed");
                    }
                    else if (segInfoStat.storedFieldStatus.error != null)
                    {
                        throw new SystemException("Stored Field test failed");
                    }
                    else if (segInfoStat.termVectorStatus.error != null)
                    {
                        throw new System.SystemException("Term Vector test failed");
                    }

                    Msg("");
                }
                catch (System.Exception t)
                {
                    Msg("FAILED");
                    const string comment = "fixIndex() would remove reference to this segment";
                    Msg("    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        infoStream.WriteLine(t.StackTrace);
                    }
                    Msg("");
                    result.totLoseDocCount += toLoseDocCount;
                    result.numBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                result.newSegments.Add((SegmentInfo)info.Clone());
            }

            if (0 == result.numBadSegments)
            {
                result.clean = true;
                Msg("No problems were detected with this index.\n");
            }
            else
            {
                Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
            }

            return(result);
        }
 private NumericDocValues LoadByteField(FieldInfo field, IndexInput input)
 {
     CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_START, Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
     int valueSize = input.ReadInt();
     if (valueSize != 1)
     {
         throw new CorruptIndexException("invalid valueSize: " + valueSize);
     }
     int maxDoc = State.SegmentInfo.DocCount;
     var values = new byte[maxDoc];
     input.ReadBytes(values, 0, values.Length);
     RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values));
     return new NumericDocValuesAnonymousInnerClassHelper3(this, values);
 }
예제 #12
0
        public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context)
        {
            string segment        = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si);
            int    docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si);
            int    size           = si.DocCount;
            bool   success        = false;

            FieldInfos = fn;
            try
            {
                if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si))
                {
                    d = StoreCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
                }
                else
                {
                    StoreCFSReader = null;
                }
                FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", FIELDS_EXTENSION), context);
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
                IndexStream = d.OpenInput(indexStreamFN, context);

                Format = IndexStream.ReadInt();

                if (Format < FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }
                if (Format > FORMAT_CURRENT)
                {
                    throw new IndexFormatTooNewException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }

                long indexSize = IndexStream.Length() - FORMAT_SIZE;

                if (docStoreOffset != -1)
                {
                    // We read only a slice out of this shared fields file
                    this.DocStoreOffset = docStoreOffset;
                    this.Size           = size;

                    // Verify the file is long enough to hold all of our
                    // docs
                    Debug.Assert(((int)(indexSize / 8)) >= size + this.DocStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }
                else
                {
                    this.DocStoreOffset = 0;
                    this.Size           = (int)(indexSize >> 3);
                    // Verify two sources of "maxDoc" agree:
                    if (this.Size != si.DocCount)
                    {
                        throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size + " but segmentInfo shows " + si.DocCount);
                    }
                }
                NumTotalDocs = (int)(indexSize >> 3);
                success      = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // keep our original exception
                    catch (Exception t)
                    {
                    }
                }
            }
        }
예제 #13
0
        internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;

            isOriginal = true;
            try
            {
                fieldInfos = fn;

                cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
                cloneableIndexStream  = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);

                // First version of fdx did not include a format
                // header, but, the first int will always be 0 in that
                // case
                int firstInt = cloneableIndexStream.ReadInt();
                format = firstInt == 0 ? 0 : firstInt;

                if (format > FieldsWriter.FORMAT_CURRENT)
                {
                    throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");
                }

                formatSize = format > FieldsWriter.FORMAT ? 4 : 0;

                if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                {
                    cloneableFieldsStream.SetModifiedUTF8StringsMode();
                }

                fieldsStream = (IndexInput)cloneableFieldsStream.Clone();

                long indexSize = cloneableIndexStream.Length() - formatSize;

                if (docStoreOffset != -1)
                {
                    // We read only a slice out of this shared fields file
                    this.docStoreOffset = docStoreOffset;
                    this.size           = size;

                    // Verify the file is long enough to hold all of our
                    // docs
                    System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }
                else
                {
                    this.docStoreOffset = 0;
                    this.size           = (int)(indexSize >> 3);
                }

                indexStream  = (IndexInput)cloneableIndexStream.Clone();
                numTotalDocs = (int)(indexSize >> 3);
                success      = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Dispose();
                }
            }
        }
예제 #14
0
        public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
                                        IComparer <BytesRef> termComp, String segmentSuffix, IOContext context)
        {
            _termComp = termComp;

            Debug.Assert(indexDivisor == -1 || indexDivisor > 0);

            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(segment, segmentSuffix,
                                                   FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION),
                    context);

            var success = false;

            try
            {
                _version = ReadHeader(_input);

                if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(_input);
                }

                indexInterval = _input.ReadInt();

                if (indexInterval < 1)
                {
                    throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}",
                                                                  indexInterval, _input));
                }

                _indexDivisor = indexDivisor;

                if (indexDivisor < 0)
                {
                    _totalIndexInterval = indexInterval;
                }
                else
                {
                    // In case terms index gets loaded, later, on demand
                    _totalIndexInterval = indexInterval * indexDivisor;
                }

                Debug.Assert(_totalIndexInterval > 0);

                SeekDir(_input, _dirOffset);

                // Read directory
                int numFields = _input.ReadVInt();

                if (numFields < 0)
                {
                    throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields,
                                                                  _input));
                }

                for (int i = 0; i < numFields; i++)
                {
                    int field         = _input.ReadVInt();
                    int numIndexTerms = _input.ReadVInt();
                    if (numIndexTerms < 0)
                    {
                        throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}",
                                                                      numIndexTerms,
                                                                      _input));
                    }

                    long termsStart         = _input.ReadVLong();
                    long indexStart         = _input.ReadVLong();
                    long packedIndexStart   = _input.ReadVLong();
                    long packedOffsetsStart = _input.ReadVLong();

                    if (packedIndexStart < indexStart)
                    {
                        throw new CorruptIndexException(
                                  String.Format(
                                      "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}",
                                      packedIndexStart,
                                      indexStart, numIndexTerms, _input));
                    }

                    FieldInfo fieldInfo = fieldInfos.FieldInfo(field);

                    try
                    {
                        _fields.Add(fieldInfo,
                                    new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart,
                                                       packedOffsetsStart, this));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}",
                                                                      fieldInfo.Name,
                                                                      _input));
                    }
                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(_input);
                }
                if (indexDivisor > 0)
                {
                    _input.Dispose();
                    _input = null;
                    if (success)
                    {
                        _indexLoaded = true;
                    }

                    _termBytesReader = _termBytes.Freeze(true);
                }
            }
        }
예제 #15
0
 public override void Init(IndexInput termsIn)
 {
     // Make sure we are talking to the matching past writer
     CodecUtil.CheckHeader(termsIn, SepPostingsWriter.CODEC, SepPostingsWriter.VERSION_START,
         SepPostingsWriter.VERSION_START);
     _skipInterval = termsIn.ReadInt();
     _maxSkipLevels = termsIn.ReadInt();
     _skipMinimum = termsIn.ReadInt();
 }
예제 #16
0
 /// <summary>
 /// Read as a bit set </summary>
 private void ReadBits(IndexInput input)
 {
     Count_Renamed = input.ReadInt();                     // read count
     Bits          = new byte[GetNumBytes(Size_Renamed)]; // allocate bits
     input.ReadBytes(Bits, 0, Bits.Length);
 }