Example #1
0
        /// <summary>
        /// Returns a <seealso cref="Status"/> instance detailing
        ///  the state of the index.
        /// </summary>
        ///  <param name="onlySegments"> list of specific segment names to check
        ///
        ///  <p>As this method checks every byte in the specified
        ///  segments, on a large index it can take quite a long
        ///  time to run.
        ///
        ///  <p><b>WARNING</b>: make sure
        ///  you only call this when the index is not opened by any
        ///  writer.  </param>
        public virtual Status DoCheckIndex(IList<string> onlySegments)
        {
            NumberFormatInfo nf = CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis = new SegmentInfos();
            Status result = new Status();
            result.Dir = Dir;
            try
            {
                sis.Read(Dir);
            }
            catch (Exception t)
            {
                Msg(infoStream, "ERROR: could not read any segments file in directory");
                result.MissingSegments = true;
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(t.ToString());
                    //infoStream.WriteLine(t.StackTrace);
                }
                return result;
            }

            // find the oldest and newest segment versions
            string oldest = Convert.ToString(int.MaxValue), newest = Convert.ToString(int.MinValue);
            string oldSegs = null;
            bool foundNonNullVersion = false;
            IComparer<string> versionComparator = StringHelper.VersionComparator;
            foreach (SegmentCommitInfo si in sis.Segments)
            {
                string version = si.Info.Version;
                if (version == null)
                {
                    // pre-3.1 segment
                    oldSegs = "pre-3.1";
                }
                else
                {
                    foundNonNullVersion = true;
                    if (versionComparator.Compare(version, oldest) < 0)
                    {
                        oldest = version;
                    }
                    if (versionComparator.Compare(version, newest) > 0)
                    {
                        newest = version;
                    }
                }
            }

            int numSegments = sis.Size();
            string segmentsFileName = sis.SegmentsFileName;
            // note: we only read the format byte (required preamble) here!
            IndexInput input = null;
            try
            {
                input = Dir.OpenInput(segmentsFileName, IOContext.READONCE);
            }
            catch (Exception t)
            {
                Msg(infoStream, "ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(t.ToString());
                    //infoStream.WriteLine(t.StackTrace);
                }
                result.CantOpenSegments = true;
                return result;
            }
            int format = 0;
            try
            {
                format = input.ReadInt();
            }
            catch (Exception t)
            {
                Msg(infoStream, "ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(t.ToString());
                    //infoStream.WriteLine(t.StackTrace);
                }
                result.MissingSegmentVersion = true;
                return result;
            }
            finally
            {
                if (input != null)
                {
                    input.Dispose();
                }
            }

            string sFormat = "";
            bool skip = false;

            result.SegmentsFileName = segmentsFileName;
            result.NumSegments = numSegments;
            result.UserData = sis.UserData;
            string userDataString;
            if (sis.UserData.Count > 0)
            {
                userDataString = " userData=" + sis.UserData;
            }
            else
            {
                userDataString = "";
            }

            string versionString = null;
            if (oldSegs != null)
            {
                if (foundNonNullVersion)
                {
                    versionString = "versions=[" + oldSegs + " .. " + newest + "]";
                }
                else
                {
                    versionString = "version=" + oldSegs;
                }
            }
            else
            {
                versionString = oldest.Equals(newest) ? ("version=" + oldest) : ("versions=[" + oldest + " .. " + newest + "]");
            }

            Msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments + " " + versionString + " format=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.Partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                    foreach (string s in onlySegments)
                    {
                        infoStream.Write(" " + s);
                    }
                }
                result.SegmentsChecked.AddRange(onlySegments);
                Msg(infoStream, ":");
            }

            if (skip)
            {
                Msg(infoStream, "\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.ToolOutOfDate = true;
                return result;
            }

            result.NewSegments = (SegmentInfos)sis.Clone();
            result.NewSegments.Clear();
            result.MaxSegmentName = -1;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentCommitInfo info = sis.Info(i);
                int segmentName = 0;
                try
                {
                    segmentName = int.Parse /*Convert.ToInt32*/(info.Info.Name.Substring(1));
                }
                catch
                {
                }
                if (segmentName > result.MaxSegmentName)
                {
                    result.MaxSegmentName = segmentName;
                }
                if (onlySegments != null && !onlySegments.Contains(info.Info.Name))
                {
                    continue;
                }
                Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
                result.SegmentInfos.Add(segInfoStat);
                Msg(infoStream, "  " + (1 + i) + " of " + numSegments + ": name=" + info.Info.Name + " docCount=" + info.Info.DocCount);
                segInfoStat.Name = info.Info.Name;
                segInfoStat.DocCount = info.Info.DocCount;

                string version = info.Info.Version;
                if (info.Info.DocCount <= 0 && version != null && versionComparator.Compare(version, "4.5") >= 0)
                {
                    throw new Exception("illegal number of documents: maxDoc=" + info.Info.DocCount);
                }

                int toLoseDocCount = info.Info.DocCount;

                AtomicReader reader = null;

                try
                {
                    Codec codec = info.Info.Codec;
                    Msg(infoStream, "    codec=" + codec);
                    segInfoStat.Codec = codec;
                    Msg(infoStream, "    compound=" + info.Info.UseCompoundFile);
                    segInfoStat.Compound = info.Info.UseCompoundFile;
                    Msg(infoStream, "    numFiles=" + info.Files().Count);
                    segInfoStat.NumFiles = info.Files().Count;
                    segInfoStat.SizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    if (info.Info.GetAttribute(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY) == null)
                    {
                        // don't print size in bytes if its a 3.0 segment with shared docstores
                        Msg(infoStream, "    size (MB)=" + segInfoStat.SizeMB.ToString(nf));
                    }
                    IDictionary<string, string> diagnostics = info.Info.Diagnostics;
                    segInfoStat.Diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg(infoStream, "    diagnostics = " + diagnostics);
                    }

                    if (!info.HasDeletions())
                    {
                        Msg(infoStream, "    no deletions");
                        segInfoStat.HasDeletions = false;
                    }
                    else
                    {
                        Msg(infoStream, "    has deletions [delGen=" + info.DelGen + "]");
                        segInfoStat.HasDeletions = true;
                        segInfoStat.DeletionsGen = info.DelGen;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT);
                    Msg(infoStream, "OK");

                    segInfoStat.OpenReaderPassed = true;

                    if (infoStream != null)
                    {
                        infoStream.Write("    test: check integrity.....");
                    }
                    reader.CheckIntegrity();
                    Msg(infoStream, "OK");

                    if (infoStream != null)
                    {
                        infoStream.Write("    test: check live docs.....");
                    }
                    int numDocs = reader.NumDocs;
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions)
                    {
                        if (reader.NumDocs != info.Info.DocCount - info.DelCount)
                        {
                            throw new Exception("delete count mismatch: info=" + (info.Info.DocCount - info.DelCount) + " vs reader=" + reader.NumDocs);
                        }
                        if ((info.Info.DocCount - reader.NumDocs) > reader.MaxDoc)
                        {
                            throw new Exception("too many deleted docs: maxDoc()=" + reader.MaxDoc + " vs del count=" + (info.Info.DocCount - reader.NumDocs));
                        }
                        if (info.Info.DocCount - numDocs != info.DelCount)
                        {
                            throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs));
                        }
                        Bits liveDocs = reader.LiveDocs;
                        if (liveDocs == null)
                        {
                            throw new Exception("segment should have deletions, but liveDocs is null");
                        }
                        else
                        {
                            int numLive = 0;
                            for (int j = 0; j < liveDocs.Length(); j++)
                            {
                                if (liveDocs.Get(j))
                                {
                                    numLive++;
                                }
                            }
                            if (numLive != numDocs)
                            {
                                throw new Exception("liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive);
                            }
                        }

                        segInfoStat.NumDeleted = info.Info.DocCount - numDocs;
                        Msg(infoStream, "OK [" + (segInfoStat.NumDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.DelCount != 0)
                        {
                            throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs));
                        }
                        Bits liveDocs = reader.LiveDocs;
                        if (liveDocs != null)
                        {
                            // its ok for it to be non-null here, as long as none are set right?
                            for (int j = 0; j < liveDocs.Length(); j++)
                            {
                                if (!liveDocs.Get(j))
                                {
                                    throw new Exception("liveDocs mismatch: info says no deletions but doc " + j + " is deleted.");
                                }
                            }
                        }
                        Msg(infoStream, "OK");
                    }
                    if (reader.MaxDoc != info.Info.DocCount)
                    {
                        throw new Exception("SegmentReader.maxDoc() " + reader.MaxDoc + " != SegmentInfos.docCount " + info.Info.DocCount);
                    }

                    // Test getFieldInfos()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    FieldInfos fieldInfos = reader.FieldInfos;
                    Msg(infoStream, "OK [" + fieldInfos.Size() + " fields]");
                    segInfoStat.NumFields = fieldInfos.Size();

                    // Test Field Norms
                    segInfoStat.FieldNormStatus = TestFieldNorms(reader, infoStream);

                    // Test the Term Index
                    segInfoStat.TermIndexStatus = TestPostings(reader, infoStream, Verbose);

                    // Test Stored Fields
                    segInfoStat.StoredFieldStatus = TestStoredFields(reader, infoStream);

                    // Test Term Vectors
                    segInfoStat.TermVectorStatus = TestTermVectors(reader, infoStream, Verbose, CrossCheckTermVectors_Renamed);

                    segInfoStat.DocValuesStatus = TestDocValues(reader, infoStream);

                    // Rethrow the first exception we encountered
                    //  this will cause stats for failed segments to be incremented properly
                    if (segInfoStat.FieldNormStatus.Error != null)
                    {
                        throw new Exception("Field Norm test failed");
                    }
                    else if (segInfoStat.TermIndexStatus.Error != null)
                    {
                        throw new Exception("Term Index test failed");
                    }
                    else if (segInfoStat.StoredFieldStatus.Error != null)
                    {
                        throw new Exception("Stored Field test failed");
                    }
                    else if (segInfoStat.TermVectorStatus.Error != null)
                    {
                        throw new Exception("Term Vector test failed");
                    }
                    else if (segInfoStat.DocValuesStatus.Error != null)
                    {
                        throw new Exception("DocValues test failed");
                    }

                    Msg(infoStream, "");
                }
                catch (Exception t)
                {
                    Msg(infoStream, "FAILED");
                    string comment;
                    comment = "fixIndex() would remove reference to this segment";
                    Msg(infoStream, "    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        // LUCENENET NOTE: Some tests rely on the error type being in
                        // the message. We can't get the error type with StackTrace, we
                        // need ToString() for that.
                        infoStream.WriteLine(t.ToString());
                        //infoStream.WriteLine(t.StackTrace);
                    }
                    Msg(infoStream, "");
                    result.TotLoseDocCount += toLoseDocCount;
                    result.NumBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Dispose();
                    }
                }

                // Keeper
                result.NewSegments.Add((SegmentCommitInfo)info.Clone());
            }

            if (0 == result.NumBadSegments)
            {
                result.Clean = true;
            }
            else
            {
                Msg(infoStream, "WARNING: " + result.NumBadSegments + " broken segments (containing " + result.TotLoseDocCount + " documents) detected");
            }

            if (!(result.ValidCounter = (result.MaxSegmentName < sis.Counter)))
            {
                result.Clean = false;
                result.NewSegments.Counter = result.MaxSegmentName + 1;
                Msg(infoStream, "ERROR: Next segment name counter " + sis.Counter + " is not greater than max segment name " + result.MaxSegmentName);
            }

            if (result.Clean)
            {
                Msg(infoStream, "No problems were detected with this index.\n");
            }

            return result;
        }