Ejemplo n.º 1
0
        /// <summary> Copy contents of a directory src to a directory dest.
        /// If a file in src already exists in dest then the
        /// one in dest will be blindly overwritten.
        ///
        /// <p/><b>NOTE:</b> the source directory cannot change
        /// while this method is running.  Otherwise the results
        /// are undefined and you could easily hit a
        /// FileNotFoundException.
        ///
        /// <p/><b>NOTE:</b> this method only copies files that look
        /// like index files (ie, have extensions matching the
        /// known extensions of index files).
        ///
        /// </summary>
        /// <param name="src">source directory
        /// </param>
        /// <param name="dest">destination directory
        /// </param>
        /// <param name="closeDirSrc">if <c>true</c>, call <see cref="Close()" /> method on source directory
        /// </param>
        /// <throws>  IOException </throws>
        public static void  Copy(Directory src, Directory dest, bool closeDirSrc)
        {
            System.String[] files = src.ListAll();

            IndexFileNameFilter filter = IndexFileNameFilter.Filter;

            byte[] buf = new byte[BufferedIndexOutput.BUFFER_SIZE];
            for (int i = 0; i < files.Length; i++)
            {
                if (!filter.Accept(null, files[i]))
                {
                    continue;
                }

                IndexOutput os         = null;
                IndexInput  is_Renamed = null;
                try
                {
                    // create file in dest directory
                    os = dest.CreateOutput(files[i]);
                    // read current file
                    is_Renamed = src.OpenInput(files[i]);
                    // and copy to dest directory
                    long len       = is_Renamed.Length();
                    long readCount = 0;
                    while (readCount < len)
                    {
                        int toRead = readCount + BufferedIndexOutput.BUFFER_SIZE > len?(int)(len - readCount):BufferedIndexOutput.BUFFER_SIZE;
                        is_Renamed.ReadBytes(buf, 0, toRead);
                        os.WriteBytes(buf, toRead);
                        readCount += toRead;
                    }
                }
                finally
                {
                    // graceful cleanup
                    try
                    {
                        if (os != null)
                        {
                            os.Close();
                        }
                    }
                    finally
                    {
                        if (is_Renamed != null)
                        {
                            is_Renamed.Close();
                        }
                    }
                }
            }
            if (closeDirSrc)
            {
                src.Close();
            }
        }
Ejemplo n.º 2
0
		public CompoundFileReader(Directory dir, System.String name, int readBufferSize)
		{
			directory = dir;
			fileName = name;
			this.readBufferSize = readBufferSize;
			
			bool success = false;
			
			try
			{
				stream = dir.OpenInput(name, readBufferSize);
				
				// read the directory and init files
				int count = stream.ReadVInt();
				FileEntry entry = null;
				for (int i = 0; i < count; i++)
				{
					long offset = stream.ReadLong();
					System.String id = stream.ReadString();
					
					if (entry != null)
					{
						// set length of the previous entry
						entry.length = offset - entry.offset;
					}

					entry = new FileEntry {offset = offset};
					entries[id] = entry;
				}
				
				// set the length of the final entry
				if (entry != null)
				{
					entry.length = stream.Length() - entry.offset;
				}
				
				success = true;
			}
			finally
			{
				if (!success && (stream != null))
				{
					try
					{
						stream.Close();
					}
					catch (System.IO.IOException)
					{
					}
				}
			}
		}
Ejemplo n.º 3
0
		/// <summary> Construct a FieldInfos object using the directory and the name of the file
		/// IndexInput
		/// </summary>
		/// <param name="d">The directory to open the IndexInput from
		/// </param>
		/// <param name="name">The name of the file to open the IndexInput from in the Directory
		/// </param>
		/// <throws>  IOException </throws>
		public /*internal*/ FieldInfos(Directory d, String name)
		{
			IndexInput input = d.OpenInput(name);
			try
			{
				try
				{
					Read(input, name);
				}
				catch (System.IO.IOException)
				{
					if (format == FORMAT_PRE)
					{
						// LUCENE-1623: FORMAT_PRE (before there was a
						// format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8)
						// encoding; retry with input set to pre-utf8
						input.Seek(0);
						input.SetModifiedUTF8StringsMode();
						byNumber.Clear();
						byName.Clear();

					    bool rethrow = false;
						try
						{
							Read(input, name);
						}
						catch (Exception)
                        {
                            // Ignore any new exception & set to throw original IOE
						    rethrow = true;
						}
                        if(rethrow)
                        {
                            // Preserve stack trace
                            throw;
                        }
					}
					else
					{
						// The IOException cannot be caused by
						// LUCENE-1623, so re-throw it
						throw;
					}
				}
			}
			finally
			{
				input.Close();
			}
		}
Ejemplo n.º 4
0
		/// <summary> Copy contents of a directory src to a directory dest.
		/// If a file in src already exists in dest then the
		/// one in dest will be blindly overwritten.
		/// 
		/// <p/><b>NOTE:</b> the source directory cannot change
		/// while this method is running.  Otherwise the results
		/// are undefined and you could easily hit a
		/// FileNotFoundException.
		/// 
		/// <p/><b>NOTE:</b> this method only copies files that look
		/// like index files (ie, have extensions matching the
		/// known extensions of index files).
		/// 
		/// </summary>
		/// <param name="src">source directory
		/// </param>
		/// <param name="dest">destination directory
		/// </param>
		/// <param name="closeDirSrc">if <c>true</c>, call <see cref="Close()" /> method on source directory
		/// </param>
		/// <throws>  IOException </throws>
		public static void  Copy(Directory src, Directory dest, bool closeDirSrc)
		{
			System.String[] files = src.ListAll();
			
			IndexFileNameFilter filter = IndexFileNameFilter.Filter;
			
			byte[] buf = new byte[BufferedIndexOutput.BUFFER_SIZE];
			for (int i = 0; i < files.Length; i++)
			{
				
				if (!filter.Accept(null, files[i]))
					continue;
				
				IndexOutput os = null;
				IndexInput is_Renamed = null;
				try
				{
					// create file in dest directory
					os = dest.CreateOutput(files[i]);
					// read current file
					is_Renamed = src.OpenInput(files[i]);
					// and copy to dest directory
					long len = is_Renamed.Length();
					long readCount = 0;
					while (readCount < len)
					{
						int toRead = readCount + BufferedIndexOutput.BUFFER_SIZE > len?(int) (len - readCount):BufferedIndexOutput.BUFFER_SIZE;
						is_Renamed.ReadBytes(buf, 0, toRead);
						os.WriteBytes(buf, toRead);
						readCount += toRead;
					}
				}
				finally
				{
					// graceful cleanup
					try
					{
						if (os != null)
							os.Close();
					}
					finally
					{
						if (is_Renamed != null)
							is_Renamed.Close();
					}
				}
			}
			if (closeDirSrc)
				src.Close();
		}
Ejemplo n.º 5
0
            public System.Object Run(IndexCommit commit)
            {
                if (commit != null)
                {
                    if (directory != commit.Directory)
                    {
                        throw new System.IO.IOException("the specified commit does not match the specified Directory");
                    }
                    return(DoBody(commit.SegmentsFileName));
                }

                System.String segmentFileName   = null;
                long          lastGen           = -1;
                long          gen               = 0;
                int           genLookaheadCount = 0;

                System.IO.IOException exc = null;
                bool retry = false;

                int method = 0;

                // Loop until we succeed in calling doBody() without
                // hitting an IOException.  An IOException most likely
                // means a commit was in process and has finished, in
                // the time it took us to load the now-old infos files
                // (and segments files).  It's also possible it's a
                // true error (corrupt index).  To distinguish these,
                // on each retry we must see "forward progress" on
                // which generation we are trying to load.  If we
                // don't, then the original error is real and we throw
                // it.

                // We have three methods for determining the current
                // generation.  We try the first two in parallel, and
                // fall back to the third when necessary.

                while (true)
                {
                    if (0 == method)
                    {
                        // Method 1: list the directory and use the highest
                        // segments_N file.  This method works well as long
                        // as there is no stale caching on the directory
                        // contents (NOTE: NFS clients often have such stale
                        // caching):
                        System.String[] files = null;

                        long genA = -1;

                        files = directory.ListAll();

                        if (files != null)
                        {
                            genA = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files);
                        }

                        Lucene.Net.Index.SegmentInfos.Message("directory listing genA=" + genA);

                        // Method 2: open segments.gen and read its
                        // contents.  Then we take the larger of the two
                        // gens.  This way, if either approach is hitting
                        // a stale cache (NFS) we have a better chance of
                        // getting the right generation.
                        long genB = -1;
                        for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++)
                        {
                            IndexInput genInput = null;
                            try
                            {
                                genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN);
                            }
                            catch (System.IO.FileNotFoundException e)
                            {
                                Lucene.Net.Index.SegmentInfos.Message("segments.gen open: FileNotFoundException " + e);
                                break;
                            }
                            catch (System.IO.IOException e)
                            {
                                Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e);
                            }

                            if (genInput != null)
                            {
                                try
                                {
                                    int version = genInput.ReadInt();
                                    if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS)
                                    {
                                        long gen0 = genInput.ReadLong();
                                        long gen1 = genInput.ReadLong();
                                        Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1);
                                        if (gen0 == gen1)
                                        {
                                            // The file is consistent.
                                            genB = gen0;
                                            break;
                                        }
                                    }
                                }
                                catch (System.IO.IOException)
                                {
                                    // will retry
                                }
                                finally
                                {
                                    genInput.Close();
                                }
                            }

                            System.Threading.Thread.Sleep(new TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec));
                        }

                        Lucene.Net.Index.SegmentInfos.Message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB);

                        // Pick the larger of the two gen's:
                        if (genA > genB)
                        {
                            gen = genA;
                        }
                        else
                        {
                            gen = genB;
                        }

                        if (gen == -1)
                        {
                            throw new System.IO.FileNotFoundException("no segments* file found in " + directory + ": files:" + string.Join(" ", files));
                        }
                    }

                    // Third method (fallback if first & second methods
                    // are not reliable): since both directory cache and
                    // file contents cache seem to be stale, just
                    // advance the generation.
                    if (1 == method || (0 == method && lastGen == gen && retry))
                    {
                        method = 1;

                        if (genLookaheadCount < Lucene.Net.Index.SegmentInfos.defaultGenLookaheadCount)
                        {
                            gen++;
                            genLookaheadCount++;
                            Lucene.Net.Index.SegmentInfos.Message("look ahead increment gen to " + gen);
                        }
                    }

                    if (lastGen == gen)
                    {
                        // This means we're about to try the same
                        // segments_N last tried.  This is allowed,
                        // exactly once, because writer could have been in
                        // the process of writing segments_N last time.

                        if (retry)
                        {
                            // OK, we've tried the same segments_N file
                            // twice in a row, so this must be a real
                            // error.  We throw the original exception we
                            // got.
                            throw exc;
                        }

                        retry = true;
                    }
                    else if (0 == method)
                    {
                        // Segment file has advanced since our last loop, so
                        // reset retry:
                        retry = false;
                    }

                    lastGen = gen;

                    segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);

                    try
                    {
                        System.Object v = DoBody(segmentFileName);
                        Lucene.Net.Index.SegmentInfos.Message("success on " + segmentFileName);

                        return(v);
                    }
                    catch (System.IO.IOException err)
                    {
                        // Save the original root cause:
                        if (exc == null)
                        {
                            exc = err;
                        }

                        Lucene.Net.Index.SegmentInfos.Message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen);

                        if (!retry && gen > 1)
                        {
                            // This is our first time trying this segments
                            // file (because retry is false), and, there is
                            // possibly a segments_(N-1) (because gen > 1).
                            // So, check if the segments_(N-1) exists and
                            // try it if so:
                            System.String prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1);

                            bool prevExists;
                            prevExists = directory.FileExists(prevSegmentFileName);

                            if (prevExists)
                            {
                                Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'");
                                try
                                {
                                    System.Object v = DoBody(prevSegmentFileName);
                                    if (exc != null)
                                    {
                                        Lucene.Net.Index.SegmentInfos.Message("success on fallback " + prevSegmentFileName);
                                    }
                                    return(v);
                                }
                                catch (System.IO.IOException err2)
                                {
                                    Lucene.Net.Index.SegmentInfos.Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry");
                                }
                            }
                        }
                    }
                }
            }
Ejemplo n.º 6
0
        /// <summary> Read a particular segmentFileName.  Note that this may
        /// throw an IOException if a commit is in process.
        ///
        /// </summary>
        /// <param name="directory">-- directory containing the segments file
        /// </param>
        /// <param name="segmentFileName">-- segment file to load
        /// </param>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public void  Read(Directory directory, System.String segmentFileName)
        {
            bool success = false;

            // Clear any previous segments:
            Clear();

            var input = new ChecksumIndexInput(directory.OpenInput(segmentFileName));

            generation = GenerationFromSegmentsFileName(segmentFileName);

            lastGeneration = generation;

            try
            {
                int format = input.ReadInt();
                if (format < 0)
                {
                    // file contains explicit format info
                    // check that it is a format we can understand
                    if (format < CURRENT_FORMAT)
                    {
                        throw new CorruptIndexException("Unknown format version: " + format);
                    }
                    version = input.ReadLong();                    // read version
                    counter = input.ReadInt();                     // read counter
                }
                else
                {
                    // file is in old format without explicit format info
                    counter = format;
                }

                for (int i = input.ReadInt(); i > 0; i--)
                {
                    // read segmentInfos
                    Add(new SegmentInfo(directory, format, input));
                }

                if (format >= 0)
                {
                    // in old format the version number may be at the end of the file
                    if (input.FilePointer >= input.Length())
                    {
                        version = (DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond);
                    }
                    // old file format without version number
                    else
                    {
                        version = input.ReadLong();                         // read version
                    }
                }

                if (format <= FORMAT_USER_DATA)
                {
                    if (format <= FORMAT_DIAGNOSTICS)
                    {
                        userData = input.ReadStringStringMap();
                    }
                    else if (0 != input.ReadByte())
                    {
                        // TODO: Should be read-only map
                        userData = new HashMap <string, string> {
                            { "userData", input.ReadString() }
                        };
                    }
                    else
                    {
                        // TODO: Should be empty read-only map
                        userData = new HashMap <string, string>();
                    }
                }
                else
                {
                    // TODO: Should be empty read-only map
                    userData = new HashMap <string, string>();
                }

                if (format <= FORMAT_CHECKSUM)
                {
                    long checksumNow  = input.Checksum;
                    long checksumThen = input.ReadLong();
                    if (checksumNow != checksumThen)
                    {
                        throw new CorruptIndexException("checksum mismatch in segments file");
                    }
                }
                success = true;
            }
            finally
            {
                input.Close();
                if (!success)
                {
                    // Clear any segment infos we had loaded so we
                    // have a clean slate on retry:
                    Clear();
                }
            }
        }
Ejemplo n.º 7
0
		internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
		{
			bool success = false;
			
			if (indexDivisor < 1 && indexDivisor != - 1)
			{
				throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
			}
			
			try
			{
				directory = dir;
				segment = seg;
				fieldInfos = fis;
				
				origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
				size = origEnum.size;
				
				
				if (indexDivisor != - 1)
				{
					// Load terms index
					totalIndexInterval = origEnum.indexInterval * indexDivisor;
					var indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);
					
					try
					{
						int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index
						
						indexTerms = new Term[indexSize];
						indexInfos = new TermInfo[indexSize];
						indexPointers = new long[indexSize];
						
						for (int i = 0; indexEnum.Next(); i++)
						{
							indexTerms[i] = indexEnum.Term;
							indexInfos[i] = indexEnum.TermInfo();
							indexPointers[i] = indexEnum.indexPointer;
							
							for (int j = 1; j < indexDivisor; j++)
								if (!indexEnum.Next())
									break;
						}
					}
					finally
					{
						indexEnum.Close();
					}
				}
				else
				{
					// Do not load terms index:
					totalIndexInterval = - 1;
					indexTerms = null;
					indexInfos = null;
					indexPointers = null;
				}
				success = true;
			}
			finally
			{
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above. In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					Dispose();
				}
			}
		}
Ejemplo n.º 8
0
        /// <summary>Returns a <see cref="Status" /> instance detailing
        /// the state of the index.
        ///
        /// </summary>
        /// <param name="onlySegments">list of specific segment names to check
        ///
        /// <p/>As this method checks every byte in the specified
        /// segments, on a large index it can take quite a long
        /// time to run.
        ///
        /// <p/><b>WARNING</b>: make sure
        /// you only call this when the index is not opened by any
        /// writer.
        /// </param>
        public virtual Status CheckIndex_Renamed_Method(List <string> onlySegments)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis    = new SegmentInfos();
            Status       result = new Status();

            result.dir = dir;
            try
            {
                sis.Read(dir);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read any segments file in directory");
                result.missingSegments = true;
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                return(result);
            }

            int        numSegments      = sis.Count;
            var        segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.cantOpenSegments = true;
                return(result);
            }
            int format = 0;

            try
            {
                format = input.ReadInt();
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.missingSegmentVersion = true;
                return(result);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else
            {
                if (format == SegmentInfos.FORMAT_CHECKSUM)
                {
                    sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_DEL_COUNT)
                {
                    sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_HAS_PROX)
                {
                    sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_USER_DATA)
                {
                    sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
                }
                else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
                }
                else if (format < SegmentInfos.CURRENT_FORMAT)
                {
                    sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                    skip    = true;
                }
                else
                {
                    sFormat = format + " [Lucene 1.3 or prior]";
                }
            }

            result.segmentsFileName = segmentsFileName;
            result.numSegments      = numSegments;
            result.segmentFormat    = sFormat;
            result.userData         = sis.UserData;
            System.String userDataString;
            if (sis.UserData.Count > 0)
            {
                userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData);
            }
            else
            {
                userDataString = "";
            }

            Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                }
                foreach (string s in onlySegments)
                {
                    if (infoStream != null)
                    {
                        infoStream.Write(" " + s);
                    }
                }
                result.segmentsChecked.AddRange(onlySegments);
                Msg(":");
            }

            if (skip)
            {
                Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.toolOutOfDate = true;
                return(result);
            }


            result.newSegments = (SegmentInfos)sis.Clone();
            result.newSegments.Clear();

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                if (onlySegments != null && !onlySegments.Contains(info.name))
                {
                    continue;
                }
                var segInfoStat = new Status.SegmentInfoStatus();
                result.segmentInfos.Add(segInfoStat);
                Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                segInfoStat.name     = info.name;
                segInfoStat.docCount = info.docCount;

                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    Msg("    compound=" + info.GetUseCompoundFile());
                    segInfoStat.compound = info.GetUseCompoundFile();
                    Msg("    hasProx=" + info.HasProx);
                    segInfoStat.hasProx = info.HasProx;
                    Msg("    numFiles=" + info.Files().Count);
                    segInfoStat.numFiles = info.Files().Count;
                    Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    IDictionary <string, string> diagnostics = info.Diagnostics;
                    segInfoStat.diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg("    diagnostics = " + CollectionsHelper.CollectionToString(diagnostics));
                    }

                    int docStoreOffset = info.DocStoreOffset;
                    if (docStoreOffset != -1)
                    {
                        Msg("    docStoreOffset=" + docStoreOffset);
                        segInfoStat.docStoreOffset = docStoreOffset;
                        Msg("    docStoreSegment=" + info.DocStoreSegment);
                        segInfoStat.docStoreSegment = info.DocStoreSegment;
                        Msg("    docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile);
                        segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile;
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        Msg("    no deletions");
                        segInfoStat.hasDeletions = false;
                    }
                    else
                    {
                        Msg("    has deletions [delFileName=" + delFileName + "]");
                        segInfoStat.hasDeletions      = true;
                        segInfoStat.deletionsFileName = delFileName;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);

                    segInfoStat.openReaderPassed = true;

                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions)
                    {
                        if (reader.deletedDocs.Count() != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (reader.deletedDocs.Count() > reader.MaxDoc)
                        {
                            throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (info.docCount - numDocs != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        segInfoStat.numDeleted = info.docCount - numDocs;
                        Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.GetDelCount() != 0)
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        Msg("OK");
                    }
                    if (reader.MaxDoc != info.docCount)
                    {
                        throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount);
                    }

                    // Test getFieldNames()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    Msg("OK [" + fieldNames.Count + " fields]");
                    segInfoStat.numFields = fieldNames.Count;

                    // Test Field Norms
                    segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);

                    // Test the Term Index
                    segInfoStat.termIndexStatus = TestTermIndex(info, reader);

                    // Test Stored Fields
                    segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);

                    // Test Term Vectors
                    segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);

                    // Rethrow the first exception we encountered
                    //  This will cause stats for failed segments to be incremented properly
                    if (segInfoStat.fieldNormStatus.error != null)
                    {
                        throw new SystemException("Field Norm test failed");
                    }
                    else if (segInfoStat.termIndexStatus.error != null)
                    {
                        throw new SystemException("Term Index test failed");
                    }
                    else if (segInfoStat.storedFieldStatus.error != null)
                    {
                        throw new SystemException("Stored Field test failed");
                    }
                    else if (segInfoStat.termVectorStatus.error != null)
                    {
                        throw new System.SystemException("Term Vector test failed");
                    }

                    Msg("");
                }
                catch (System.Exception t)
                {
                    Msg("FAILED");
                    const string comment = "fixIndex() would remove reference to this segment";
                    Msg("    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        infoStream.WriteLine(t.StackTrace);
                    }
                    Msg("");
                    result.totLoseDocCount += toLoseDocCount;
                    result.numBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                result.newSegments.Add((SegmentInfo)info.Clone());
            }

            if (0 == result.numBadSegments)
            {
                result.clean = true;
                Msg("No problems were detected with this index.\n");
            }
            else
            {
                Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
            }

            return(result);
        }
Ejemplo n.º 9
0
        internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;

            isOriginal = true;
            try
            {
                fieldInfos = fn;

                cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
                cloneableIndexStream  = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);

                // First version of fdx did not include a format
                // header, but, the first int will always be 0 in that
                // case
                int firstInt = cloneableIndexStream.ReadInt();
                format = firstInt == 0 ? 0 : firstInt;

                if (format > FieldsWriter.FORMAT_CURRENT)
                {
                    throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");
                }

                formatSize = format > FieldsWriter.FORMAT ? 4 : 0;

                if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                {
                    cloneableFieldsStream.SetModifiedUTF8StringsMode();
                }

                fieldsStream = (IndexInput)cloneableFieldsStream.Clone();

                long indexSize = cloneableIndexStream.Length() - formatSize;

                if (docStoreOffset != -1)
                {
                    // We read only a slice out of this shared fields file
                    this.docStoreOffset = docStoreOffset;
                    this.size           = size;

                    // Verify the file is long enough to hold all of our
                    // docs
                    System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }
                else
                {
                    this.docStoreOffset = 0;
                    this.size           = (int)(indexSize >> 3);
                }

                indexStream  = (IndexInput)cloneableIndexStream.Clone();
                numTotalDocs = (int)(indexSize >> 3);
                success      = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Dispose();
                }
            }
        }
Ejemplo n.º 10
0
		internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
		{
			bool success = false;
			isOriginal = true;
			try
			{
				fieldInfos = fn;
				
				cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
				cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);
				
				// First version of fdx did not include a format
				// header, but, the first int will always be 0 in that
				// case
				int firstInt = cloneableIndexStream.ReadInt();
				format = firstInt == 0 ? 0 : firstInt;
				
				if (format > FieldsWriter.FORMAT_CURRENT)
					throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");
				
				formatSize = format > FieldsWriter.FORMAT ? 4 : 0;
				
				if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
					cloneableFieldsStream.SetModifiedUTF8StringsMode();
				
				fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
				
				long indexSize = cloneableIndexStream.Length() - formatSize;
				
				if (docStoreOffset != - 1)
				{
					// We read only a slice out of this shared fields file
					this.docStoreOffset = docStoreOffset;
					this.size = size;
					
					// Verify the file is long enough to hold all of our
					// docs
					System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
				}
				else
				{
					this.docStoreOffset = 0;
					this.size = (int) (indexSize >> 3);
				}
				
				indexStream = (IndexInput) cloneableIndexStream.Clone();
				numTotalDocs = (int) (indexSize >> 3);
				success = true;
			}
			finally
			{
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above. In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					Dispose();
				}
			}
		}
Ejemplo n.º 11
0
		/// <summary>Constructs a bit vector from the file <c>name</c> in Directory
		/// <c>d</c>, as written by the <see cref="Write" /> method.
		/// </summary>
		public BitVector(Directory d, System.String name)
		{
			IndexInput input = d.OpenInput(name);
			try
			{
				size = input.ReadInt(); // read size
				if (size == - 1)
				{
					ReadDgaps(input);
				}
				else
				{
					ReadBits(input);
				}
			}
			finally
			{
				input.Close();
			}
		}