Пример #1
0
        public /*internal*/ Document Doc(int n, FieldSelector fieldSelector)
        {
            SeekIndex(n);
            long position = indexStream.ReadLong();

            fieldsStream.Seek(position);

            Document doc       = new Document();
            int      numFields = fieldsStream.ReadVInt();

            for (int i = 0; i < numFields; i++)
            {
                int                 fieldNumber = fieldsStream.ReadVInt();
                FieldInfo           fi          = fieldInfos.FieldInfo(fieldNumber);
                FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name);

                byte bits = fieldsStream.ReadByte();
                System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);

                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                bool tokenize   = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
                bool binary     = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
                //TODO: Find an alternative approach here if this list continues to grow beyond the
                //list of 5 or 6 currently here.  See Lucene 762 for discussion
                if (acceptField.Equals(FieldSelectorResult.LOAD))
                {
                    AddField(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.LOAD_FOR_MERGE))
                {
                    AddFieldForMerge(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
                {
                    AddField(doc, fi, binary, compressed, tokenize);
                    break;                     //Get out of this loop
                }
                else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
                {
                    AddFieldLazy(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.SIZE))
                {
                    SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
                }
                else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
                {
                    AddFieldSize(doc, fi, binary, compressed);
                    break;
                }
                else
                {
                    SkipField(binary, compressed);
                }
            }

            return(doc);
        }
Пример #2
0
        /// <summary>Retrieve the length (in bytes) of the tvd and tvf
        /// entries for the next numDocs starting with
        /// startDocID.  This is used for bulk copying when
        /// merging segments, if the field numbers are
        /// congruent.  Once this returns, the tvf &amp; tvd streams
        /// are seeked to the startDocID.
        /// </summary>
        internal void  RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs)
        {
            if (tvx == null)
            {
                for (int i = 0; i < tvdLengths.Length; i++)
                {
                    tvdLengths[i] = 0;
                }
                for (int i = 0; i < tvfLengths.Length; i++)
                {
                    tvfLengths[i] = 0;
                }
                return;
            }

            // SegmentMerger calls canReadRawDocs() first and should
            // not call us if that returns false.
            if (format < FORMAT_VERSION2)
            {
                throw new System.SystemException("cannot read raw docs with older term vector formats");
            }

            SeekTvx(startDocID);

            long tvdPosition = tvx.ReadLong();

            tvd.Seek(tvdPosition);

            long tvfPosition = tvx.ReadLong();

            tvf.Seek(tvfPosition);

            long lastTvdPosition = tvdPosition;
            long lastTvfPosition = tvfPosition;

            int count = 0;

            while (count < numDocs)
            {
                int docID = docStoreOffset + startDocID + count + 1;
                System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
                if (docID < numTotalDocs)
                {
                    tvdPosition = tvx.ReadLong();
                    tvfPosition = tvx.ReadLong();
                }
                else
                {
                    tvdPosition = tvd.Length();
                    tvfPosition = tvf.Length();
                    System.Diagnostics.Debug.Assert(count == numDocs - 1);
                }
                tvdLengths[count] = (int)(tvdPosition - lastTvdPosition);
                tvfLengths[count] = (int)(tvfPosition - lastTvfPosition);
                count++;
                lastTvdPosition = tvdPosition;
                lastTvfPosition = tvfPosition;
            }
        }
Пример #3
0
        public void  Read(Directory directory)
        {
            IndexInput input = directory.OpenInput(IndexFileNames.SEGMENTS);

            try
            {
                int format = input.ReadInt();
                if (format < 0)
                {
                    // file contains explicit format info
                    // check that it is a format we can understand
                    if (format < FORMAT)
                    {
                        throw new System.IO.IOException("Unknown format version: " + format);
                    }
                    version = input.ReadLong();                    // read version
                    counter = input.ReadInt();                     // read counter
                }
                else
                {
                    // file is in old format without explicit format info
                    counter = format;
                }

                for (int i = input.ReadInt(); i > 0; i--)
                {
                    // read segmentInfos
                    SegmentInfo si = new SegmentInfo(input.ReadString(), input.ReadInt(), directory);
                    Add(si);
                }

                if (format >= 0)
                {
                    // in old format the version number may be at the end of the file
                    if (input.GetFilePointer() >= input.Length())
                    {
                        version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
                    }
                    // old file format without version number
                    else
                    {
                        version = input.ReadLong();                         // read version
                    }
                }
            }
            finally
            {
                input.Close();
            }
        }
Пример #4
0
        internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
        {
            input = i;
            fieldInfos = fis;
            isIndex = isi;
            maxSkipLevels = 1; // use single-level skip lists for formats > -3

            int firstInt = input.ReadInt();
            if (firstInt >= 0)
            {
                // original-format file, without explicit format version number
                format = 0;
                size = firstInt;

                // back-compatible settings
                indexInterval = 128;
                skipInterval = System.Int32.MaxValue; // switch off skipTo optimization
            }
            else
            {
                // we have a format version number
                format = firstInt;

                // check that it is a format we can understand
                if (format < TermInfosWriter.FORMAT_CURRENT)
                    throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher");

                size = input.ReadLong(); // read the size

                if (format == - 1)
                {
                    if (!isIndex)
                    {
                        indexInterval = input.ReadInt();
                        formatM1SkipInterval = input.ReadInt();
                    }
                    // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
                    // skipTo implementation of these versions
                    skipInterval = System.Int32.MaxValue;
                }
                else
                {
                    indexInterval = input.ReadInt();
                    skipInterval = input.ReadInt();
                    if (format <= TermInfosWriter.FORMAT)
                    {
                        // this new format introduces multi-level skipping
                        maxSkipLevels = input.ReadInt();
                    }
                }
                System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0");
                System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0");
            }
            if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
            {
                termBuffer.SetPreUTF8Strings();
                scanBuffer.SetPreUTF8Strings();
                prevBuffer.SetPreUTF8Strings();
            }
        }
Пример #5
0
        internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
        {
            input         = i;
            fieldInfos    = fis;
            isIndex       = isi;
            maxSkipLevels = 1;             // use single-level skip lists for formats > -3

            int firstInt = input.ReadInt();

            if (firstInt >= 0)
            {
                // original-format file, without explicit format version number
                format = 0;
                size   = firstInt;

                // back-compatible settings
                indexInterval = 128;
                skipInterval  = System.Int32.MaxValue;                // switch off skipTo optimization
            }
            else
            {
                // we have a format version number
                format = firstInt;

                // check that it is a format we can understand
                if (format < TermInfosWriter.FORMAT)
                {
                    throw new CorruptIndexException("Unknown format version:" + format);
                }

                size = input.ReadLong();                 // read the size

                if (format == -1)
                {
                    if (!isIndex)
                    {
                        indexInterval        = input.ReadInt();
                        formatM1SkipInterval = input.ReadInt();
                    }
                    // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
                    // skipTo implementation of these versions
                    skipInterval = System.Int32.MaxValue;
                }
                else
                {
                    indexInterval = input.ReadInt();
                    skipInterval  = input.ReadInt();
                    if (format == -3)
                    {
                        // this new format introduces multi-level skipping
                        maxSkipLevels = input.ReadInt();
                    }
                }
            }
        }
Пример #6
0
        public CompoundFileReader(Directory dir, string name, int readBufferSize)
        {
            directory           = dir;
            fileName            = name;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                stream = dir.OpenInput(name, readBufferSize);

                // read the directory and init files
                int       count = stream.ReadVInt();
                FileEntry entry = null;
                for (int i = 0; i < count; i++)
                {
                    long   offset = stream.ReadLong();
                    string id     = stream.ReadString();

                    if (entry != null)
                    {
                        // set length of the previous entry
                        entry.length = offset - entry.offset;
                    }

                    entry = new FileEntry {
                        offset = offset
                    };
                    entries[id] = entry;
                }

                // set the length of the final entry
                if (entry != null)
                {
                    entry.length = stream.Length() - entry.offset;
                }

                success = true;
            }
            finally
            {
                if (!success && (stream != null))
                {
                    try
                    {
                        stream.Close();
                    }
                    catch (System.IO.IOException)
                    {
                    }
                }
            }
        }
Пример #7
0
 /// <summary> Construct a new SegmentInfo instance by reading a
 /// previously saved SegmentInfo from input.
 ///
 /// </summary>
 /// <param name="dir">directory to load from
 /// </param>
 /// <param name="format">format of the segments info file
 /// </param>
 /// <param name="input">input handle to read segment info from
 /// </param>
 public SegmentInfo(Directory dir, int format, IndexInput input)
 {
     this.dir = dir;
     name     = input.ReadString();
     docCount = input.ReadInt();
     if (format <= SegmentInfos.FORMAT_LOCKLESS)
     {
         delGen = input.ReadLong();
         if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
         {
             hasSingleNormFile = (1 == input.ReadByte());
         }
         else
         {
             hasSingleNormFile = false;
         }
         int numNormGen = input.ReadInt();
         if (numNormGen == -1)
         {
             normGen = null;
         }
         else
         {
             normGen = new long[numNormGen];
             for (int j = 0; j < numNormGen; j++)
             {
                 normGen[j] = input.ReadLong();
             }
         }
         isCompoundFile = (sbyte)input.ReadByte();
         preLockless    = isCompoundFile == 0;
     }
     else
     {
         delGen            = 0;
         normGen           = null;
         isCompoundFile    = 0;
         preLockless       = true;
         hasSingleNormFile = false;
     }
 }
Пример #8
0
		public CompoundFileReader(Directory dir, System.String name, int readBufferSize)
		{
			directory = dir;
			fileName = name;
			this.readBufferSize = readBufferSize;
			
			bool success = false;
			
			try
			{
				stream = dir.OpenInput(name, readBufferSize);
				
				// read the directory and init files
				int count = stream.ReadVInt();
				FileEntry entry = null;
				for (int i = 0; i < count; i++)
				{
					long offset = stream.ReadLong();
					System.String id = stream.ReadString();
					
					if (entry != null)
					{
						// set length of the previous entry
						entry.length = offset - entry.offset;
					}
					
					entry = new FileEntry();
					entry.offset = offset;
					entries[id] = entry;
				}
				
				// set the length of the final entry
				if (entry != null)
				{
					entry.length = stream.Length() - entry.offset;
				}
				
				success = true;
			}
			finally
			{
				if (!success && (stream != null))
				{
					try
					{
						stream.Close();
					}
					catch (System.IO.IOException e)
					{
					}
				}
			}
		}
Пример #9
0
		internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
		{
			input = i;
			fieldInfos = fis;
			isIndex = isi;
			
			int firstInt = input.ReadInt();
			if (firstInt >= 0)
			{
				// original-format file, without explicit format version number
				format = 0;
				size = firstInt;
				
				// back-compatible settings
				indexInterval = 128;
				skipInterval = System.Int32.MaxValue; // switch off skipTo optimization
			}
			else
			{
				// we have a format version number
				format = firstInt;
				
				// check that it is a format we can understand
				if (format < TermInfosWriter.FORMAT)
					throw new System.IO.IOException("Unknown format version:" + format);
				
				size = input.ReadLong(); // read the size
				
				if (format == - 1)
				{
					if (!isIndex)
					{
						indexInterval = input.ReadInt();
						formatM1SkipInterval = input.ReadInt();
					}
					// switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in 
					// skipTo implementation of these versions
					skipInterval = System.Int32.MaxValue;
				}
				else
				{
					indexInterval = input.ReadInt();
					skipInterval = input.ReadInt();
				}
			}
		}
Пример #10
0
        public override long Get(int index)
        {
            int  blockOffset = index / ValuesPerBlock;
            long skip        = ((long)blockOffset) << 3;

            try
            {
                @in.Seek(StartPointer + skip);

                long block         = @in.ReadLong();
                int  offsetInBlock = index % ValuesPerBlock;
                return(((long)((ulong)block >> (offsetInBlock * bitsPerValue))) & Mask);
            }
            catch (System.IO.IOException e)
            {
                throw new InvalidOperationException("failed", e);
            }
        }
Пример #11
0
            public override System.Object DoBody(System.String segmentFileName)
            {
                IndexInput input = directory.OpenInput(segmentFileName);

                int  format  = 0;
                long version = 0;

                try
                {
                    format = input.ReadInt();
                    if (format < 0)
                    {
                        if (format < Lucene.Net.Index.SegmentInfos.FORMAT_SINGLE_NORM_FILE)
                        {
                            throw new System.IO.IOException("Unknown format version: " + format);
                        }
                        version = input.ReadLong();                         // read version
                    }
                }
                finally
                {
                    input.Close();
                }

                if (format < 0)
                {
                    return((long)version);
                }

                // We cannot be sure about the format of the file.
                // Therefore we have to read the whole file and cannot simply seek to the version entry.
                SegmentInfos sis = new SegmentInfos();

                sis.Read(directory, segmentFileName);
                return((long)sis.GetVersion());
            }
Пример #12
0
        /// <summary> Current version number from segments file.</summary>
        public static long ReadCurrentVersion(Directory directory)
        {
            IndexInput input   = directory.OpenInput(IndexFileNames.SEGMENTS);
            int        format  = 0;
            long       version = 0;

            try
            {
                format = input.ReadInt();
                if (format < 0)
                {
                    if (format < FORMAT)
                    {
                        throw new System.IO.IOException("Unknown format version: " + format);
                    }
                    version = input.ReadLong();                     // read version
                }
            }
            finally
            {
                input.Close();
            }

            if (format < 0)
            {
                return(version);
            }

            // We cannot be sure about the format of the file.
            // Therefore we have to read the whole file and cannot simply seek to the version entry.

            SegmentInfos sis = new SegmentInfos();

            sis.Read(directory);
            return(sis.GetVersion());
        }
Пример #13
0
 private NumericDocValues LoadLongField(FieldInfo field, IndexInput input)
 {
     CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_START, Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
     int valueSize = input.ReadInt();
     if (valueSize != 8)
     {
         throw new CorruptIndexException("invalid valueSize: " + valueSize);
     }
     int maxDoc = State.SegmentInfo.DocCount;
     long[] values = new long[maxDoc];
     for (int i = 0; i < values.Length; i++)
     {
         values[i] = input.ReadLong();
     }
     RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values));
     return new NumericDocValuesAnonymousInnerClassHelper6(values);
 }
Пример #14
0
        /// <summary> Construct a new SegmentInfo instance by reading a
        /// previously saved SegmentInfo from input.
        ///
        /// </summary>
        /// <param name="dir">directory to load from
        /// </param>
        /// <param name="format">format of the segments info file
        /// </param>
        /// <param name="input">input handle to read segment info from
        /// </param>
        internal SegmentInfo(Directory dir, int format, IndexInput input)
        {
            this.dir = dir;
            name     = input.ReadString();
            docCount = input.ReadInt();
            if (format <= SegmentInfos.FORMAT_LOCKLESS)
            {
                delGen = input.ReadLong();
                if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
                {
                    docStoreOffset = input.ReadInt();
                    if (docStoreOffset != -1)
                    {
                        docStoreSegment        = input.ReadString();
                        docStoreIsCompoundFile = (1 == input.ReadByte());
                    }
                    else
                    {
                        docStoreSegment        = name;
                        docStoreIsCompoundFile = false;
                    }
                }
                else
                {
                    docStoreOffset         = -1;
                    docStoreSegment        = name;
                    docStoreIsCompoundFile = false;
                }
                if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
                {
                    hasSingleNormFile = (1 == input.ReadByte());
                }
                else
                {
                    hasSingleNormFile = false;
                }
                int numNormGen = input.ReadInt();
                if (numNormGen == NO)
                {
                    normGen = null;
                }
                else
                {
                    normGen = new long[numNormGen];
                    for (int j = 0; j < numNormGen; j++)
                    {
                        normGen[j] = input.ReadLong();
                    }
                }
                isCompoundFile = (sbyte)input.ReadByte();
                preLockless    = (isCompoundFile == CHECK_DIR);
                if (format <= SegmentInfos.FORMAT_DEL_COUNT)
                {
                    delCount = input.ReadInt();
                    System.Diagnostics.Debug.Assert(delCount <= docCount);
                }
                else
                {
                    delCount = -1;
                }
                if (format <= SegmentInfos.FORMAT_HAS_PROX)
                {
                    hasProx = input.ReadByte() == 1;
                }
                else
                {
                    hasProx = true;
                }

                if (format <= SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    diagnostics = input.ReadStringStringMap();
                }
                else
                {
                    diagnostics = new Dictionary <string, string>();
                }
            }
            else
            {
                delGen                 = CHECK_DIR;
                normGen                = null;
                isCompoundFile         = (sbyte)(CHECK_DIR);
                preLockless            = true;
                hasSingleNormFile      = false;
                docStoreOffset         = -1;
                docStoreIsCompoundFile = false;
                docStoreSegment        = null;
                delCount               = -1;
                hasProx                = true;
                diagnostics            = new Dictionary <string, string>();
            }
        }
Пример #15
0
 private NumericDocValues LoadVarIntsField(FieldInfo field, IndexInput input)
 {
     CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_START, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT);
     var header = (sbyte)input.ReadByte();
     if (header == Lucene40DocValuesFormat.VAR_INTS_FIXED_64)
     {
         int maxDoc = State.SegmentInfo.DocCount;
         var values = new long[maxDoc];
         for (int i = 0; i < values.Length; i++)
         {
             values[i] = input.ReadLong();
         }
         RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values));
         return new NumericDocValuesAnonymousInnerClassHelper(values);
     }
     else if (header == Lucene40DocValuesFormat.VAR_INTS_PACKED)
     {
         long minValue = input.ReadLong();
         long defaultValue = input.ReadLong();
         PackedInts.Reader reader = PackedInts.GetReader(input);
         RamBytesUsed_Renamed.AddAndGet(reader.RamBytesUsed());
         return new NumericDocValuesAnonymousInnerClassHelper2(minValue, defaultValue, reader);
     }
     else
     {
         throw new CorruptIndexException("invalid VAR_INTS header byte: " + header + " (resource=" + input + ")");
     }
 }
Пример #16
0
            public System.Object Run(IndexCommit commit)
            {
                if (commit != null)
                {
                    if (directory != commit.GetDirectory())
                    {
                        throw new System.IO.IOException("the specified commit does not match the specified Directory");
                    }
                    return(DoBody(commit.GetSegmentsFileName()));
                }

                System.String segmentFileName   = null;
                long          lastGen           = -1;
                long          gen               = 0;
                int           genLookaheadCount = 0;

                System.IO.IOException exc = null;
                bool retry = false;

                int method = 0;

                // Loop until we succeed in calling doBody() without
                // hitting an IOException.  An IOException most likely
                // means a commit was in process and has finished, in
                // the time it took us to load the now-old infos files
                // (and segments files).  It's also possible it's a
                // true error (corrupt index).  To distinguish these,
                // on each retry we must see "forward progress" on
                // which generation we are trying to load.  If we
                // don't, then the original error is real and we throw
                // it.

                // We have three methods for determining the current
                // generation.  We try the first two in parallel, and
                // fall back to the third when necessary.

                while (true)
                {
                    if (0 == method)
                    {
                        // Method 1: list the directory and use the highest
                        // segments_N file.  This method works well as long
                        // as there is no stale caching on the directory
                        // contents (NOTE: NFS clients often have such stale
                        // caching):
                        System.String[] files = null;

                        long genA = -1;

                        files = directory.ListAll();

                        if (files != null)
                        {
                            genA = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files);
                        }

                        Lucene.Net.Index.SegmentInfos.Message("directory listing genA=" + genA);

                        // Method 2: open segments.gen and read its
                        // contents.  Then we take the larger of the two
                        // gen's.  This way, if either approach is hitting
                        // a stale cache (NFS) we have a better chance of
                        // getting the right generation.
                        long genB = -1;
                        for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++)
                        {
                            IndexInput genInput = null;
                            try
                            {
                                genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN);
                            }
                            catch (System.IO.FileNotFoundException e)
                            {
                                Lucene.Net.Index.SegmentInfos.Message("segments.gen open: FileNotFoundException " + e);
                                break;
                            }
                            catch (System.IO.IOException e)
                            {
                                Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e);
                            }

                            if (genInput != null)
                            {
                                try
                                {
                                    int version = genInput.ReadInt();
                                    if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS)
                                    {
                                        long gen0 = genInput.ReadLong();
                                        long gen1 = genInput.ReadLong();
                                        Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1);
                                        if (gen0 == gen1)
                                        {
                                            // The file is consistent.
                                            genB = gen0;
                                            break;
                                        }
                                    }
                                }
                                catch (System.IO.IOException err2)
                                {
                                    // will retry
                                }
                                finally
                                {
                                    genInput.Close();
                                }
                            }
                            try
                            {
                                System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec));
                            }
                            catch (System.Threading.ThreadInterruptedException ie)
                            {
                                // In 3.0 we will change this to throw
                                // InterruptedException instead
                                SupportClass.ThreadClass.Current().Interrupt();
                                throw new System.SystemException(ie.Message, ie);
                            }
                        }

                        Lucene.Net.Index.SegmentInfos.Message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB);

                        // Pick the larger of the two gen's:
                        if (genA > genB)
                        {
                            gen = genA;
                        }
                        else
                        {
                            gen = genB;
                        }

                        if (gen == -1)
                        {
                            // Neither approach found a generation
                            System.String s;
                            if (files != null)
                            {
                                s = "";
                                for (int i = 0; i < files.Length; i++)
                                {
                                    s += (" " + files[i]);
                                }
                            }
                            else
                            {
                                s = " null";
                            }
                            throw new System.IO.FileNotFoundException("no segments* file found in " + directory + ": files:" + s);
                        }
                    }

                    // Third method (fallback if first & second methods
                    // are not reliable): since both directory cache and
                    // file contents cache seem to be stale, just
                    // advance the generation.
                    if (1 == method || (0 == method && lastGen == gen && retry))
                    {
                        method = 1;

                        if (genLookaheadCount < Lucene.Net.Index.SegmentInfos.defaultGenLookaheadCount)
                        {
                            gen++;
                            genLookaheadCount++;
                            Lucene.Net.Index.SegmentInfos.Message("look ahead increment gen to " + gen);
                        }
                    }

                    if (lastGen == gen)
                    {
                        // This means we're about to try the same
                        // segments_N last tried.  This is allowed,
                        // exactly once, because writer could have been in
                        // the process of writing segments_N last time.

                        if (retry)
                        {
                            // OK, we've tried the same segments_N file
                            // twice in a row, so this must be a real
                            // error.  We throw the original exception we
                            // got.
                            throw exc;
                        }
                        else
                        {
                            retry = true;
                        }
                    }
                    else if (0 == method)
                    {
                        // Segment file has advanced since our last loop, so
                        // reset retry:
                        retry = false;
                    }

                    lastGen = gen;

                    segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);

                    try
                    {
                        System.Object v = DoBody(segmentFileName);
                        if (exc != null)
                        {
                            Lucene.Net.Index.SegmentInfos.Message("success on " + segmentFileName);
                        }
                        return(v);
                    }
                    catch (System.IO.IOException err)
                    {
                        // Save the original root cause:
                        if (exc == null)
                        {
                            exc = err;
                        }

                        Lucene.Net.Index.SegmentInfos.Message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen);

                        if (!retry && gen > 1)
                        {
                            // This is our first time trying this segments
                            // file (because retry is false), and, there is
                            // possibly a segments_(N-1) (because gen > 1).
                            // So, check if the segments_(N-1) exists and
                            // try it if so:
                            System.String prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1);

                            bool prevExists;
                            prevExists = directory.FileExists(prevSegmentFileName);

                            if (prevExists)
                            {
                                Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'");
                                try
                                {
                                    System.Object v = DoBody(prevSegmentFileName);
                                    if (exc != null)
                                    {
                                        Lucene.Net.Index.SegmentInfos.Message("success on fallback " + prevSegmentFileName);
                                    }
                                    return(v);
                                }
                                catch (System.IO.IOException err2)
                                {
                                    Lucene.Net.Index.SegmentInfos.Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry");
                                }
                            }
                        }
                    }
                }
            }
Пример #17
0
            public System.Object run()
            {
                System.String segmentFileName   = null;
                long          lastGen           = -1;
                long          gen               = 0;
                int           genLookaheadCount = 0;

                System.IO.IOException exc = null;
                bool retry = false;

                int method = 0;

                // Loop until we succeed in calling doBody() without
                // hitting an IOException.  An IOException most likely
                // means a commit was in process and has finished, in
                // the time it took us to load the now-old infos files
                // (and segments files).  It's also possible it's a
                // true error (corrupt index).  To distinguish these,
                // on each retry we must see "forward progress" on
                // which generation we are trying to load.  If we
                // don't, then the original error is real and we throw
                // it.

                // We have three methods for determining the current
                // generation.  We try each in sequence.

                while (true)
                {
                    // Method 1: list the directory and use the highest
                    // segments_N file.  This method works well as long
                    // as there is no stale caching on the directory
                    // contents:
                    System.String[] files = null;

                    if (0 == method)
                    {
                        if (directory != null)
                        {
                            files = directory.List();
                        }
                        else
                        {
                            files = System.IO.Directory.GetFileSystemEntries(fileDirectory.FullName);
                            for (int i = 0; i < files.Length; i++)
                            {
                                files[i] = System.IO.Path.GetFileName(files[i]);
                            }
                        }

                        gen = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files);

                        if (gen == -1)
                        {
                            System.String s = "";
                            for (int i = 0; i < files.Length; i++)
                            {
                                s += (" " + files[i]);
                            }
                            throw new System.IO.FileNotFoundException("no segments* file found: files:" + s);
                        }
                    }

                    // Method 2 (fallback if Method 1 isn't reliable):
                    // if the directory listing seems to be stale, then
                    // try loading the "segments.gen" file.
                    if (1 == method || (0 == method && lastGen == gen && retry))
                    {
                        method = 1;

                        for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++)
                        {
                            IndexInput genInput = null;
                            try
                            {
                                genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN);
                            }
                            catch (System.IO.IOException e)
                            {
                                Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e);
                            }
                            if (genInput != null)
                            {
                                try
                                {
                                    int version = genInput.ReadInt();
                                    if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS)
                                    {
                                        long gen0 = genInput.ReadLong();
                                        long gen1 = genInput.ReadLong();
                                        Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1);
                                        if (gen0 == gen1)
                                        {
                                            // The file is consistent.
                                            if (gen0 > gen)
                                            {
                                                Lucene.Net.Index.SegmentInfos.Message("fallback to '" + IndexFileNames.SEGMENTS_GEN + "' check: now try generation " + gen0 + " > " + gen);
                                                gen = gen0;
                                            }
                                            break;
                                        }
                                    }
                                }
                                catch (System.IO.IOException err2)
                                {
                                    // will retry
                                }
                                finally
                                {
                                    genInput.Close();
                                }
                            }
                            try
                            {
                                System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec));
                            }
                            catch (System.Threading.ThreadInterruptedException e)
                            {
                                // will retry
                            }
                        }
                    }

                    // Method 3 (fallback if Methods 2 & 3 are not
                    // reliable): since both directory cache and file
                    // contents cache seem to be stale, just advance the
                    // generation.
                    if (2 == method || (1 == method && lastGen == gen && retry))
                    {
                        method = 2;

                        if (genLookaheadCount < Lucene.Net.Index.SegmentInfos.defaultGenLookaheadCount)
                        {
                            gen++;
                            genLookaheadCount++;
                            Lucene.Net.Index.SegmentInfos.Message("look ahead increment gen to " + gen);
                        }
                    }

                    if (lastGen == gen)
                    {
                        // This means we're about to try the same
                        // segments_N last tried.  This is allowed,
                        // exactly once, because writer could have been in
                        // the process of writing segments_N last time.

                        if (retry)
                        {
                            // OK, we've tried the same segments_N file
                            // twice in a row, so this must be a real
                            // error.  We throw the original exception we
                            // got.
                            throw exc;
                        }
                        else
                        {
                            retry = true;
                        }
                    }
                    else
                    {
                        // Segment file has advanced since our last loop, so
                        // reset retry:
                        retry = false;
                    }

                    lastGen = gen;

                    segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);

                    try
                    {
                        System.Object v = DoBody(segmentFileName);
                        if (exc != null)
                        {
                            Lucene.Net.Index.SegmentInfos.Message("success on " + segmentFileName);
                        }
                        return(v);
                    }
                    catch (System.IO.IOException err)
                    {
                        // Save the original root cause:
                        if (exc == null)
                        {
                            exc = err;
                        }

                        Lucene.Net.Index.SegmentInfos.Message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen);

                        if (!retry && gen > 1)
                        {
                            // This is our first time trying this segments
                            // file (because retry is false), and, there is
                            // possibly a segments_(N-1) (because gen > 1).
                            // So, check if the segments_(N-1) exists and
                            // try it if so:
                            System.String prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1);

                            if (directory.FileExists(prevSegmentFileName))
                            {
                                Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'");
                                try
                                {
                                    System.Object v = DoBody(prevSegmentFileName);
                                    if (exc != null)
                                    {
                                        Lucene.Net.Index.SegmentInfos.Message("success on fallback " + prevSegmentFileName);
                                    }
                                    return(v);
                                }
                                catch (System.IO.IOException err2)
                                {
                                    Lucene.Net.Index.SegmentInfos.Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry");
                                }
                            }
                        }
                    }
                }
            }
Пример #18
0
		/// <summary> Construct a new SegmentInfo instance by reading a
		/// previously saved SegmentInfo from input.
		/// 
		/// </summary>
		/// <param name="dir">directory to load from
		/// </param>
		/// <param name="format">format of the segments info file
		/// </param>
		/// <param name="input">input handle to read segment info from
		/// </param>
		public SegmentInfo(Directory dir, int format, IndexInput input)
		{
			this.dir = dir;
			name = input.ReadString();
			docCount = input.ReadInt();
			if (format <= SegmentInfos.FORMAT_LOCKLESS)
			{
				delGen = input.ReadLong();
				if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
				{
					hasSingleNormFile = (1 == input.ReadByte());
				}
				else
				{
					hasSingleNormFile = false;
				}
				int numNormGen = input.ReadInt();
				if (numNormGen == - 1)
				{
					normGen = null;
				}
				else
				{
					normGen = new long[numNormGen];
					for (int j = 0; j < numNormGen; j++)
					{
						normGen[j] = input.ReadLong();
					}
				}
				isCompoundFile = (sbyte) input.ReadByte();
				preLockless = isCompoundFile == 0;
			}
			else
			{
				delGen = 0;
				normGen = null;
				isCompoundFile = 0;
				preLockless = true;
				hasSingleNormFile = false;
			}
		}
Пример #19
0
        public /*internal*/ Document Doc(int n)
        {
            indexStream.Seek(n * 8L);
            long position = indexStream.ReadLong();

            fieldsStream.Seek(position);

            Document doc       = new Document();
            int      numFields = fieldsStream.ReadVInt();

            for (int i = 0; i < numFields; i++)
            {
                int       fieldNumber = fieldsStream.ReadVInt();
                FieldInfo fi          = fieldInfos.FieldInfo(fieldNumber);

                byte bits = fieldsStream.ReadByte();

                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                bool tokenize   = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;

                if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0)
                {
                    byte[] b = new byte[fieldsStream.ReadVInt()];
                    fieldsStream.ReadBytes(b, 0, b.Length);
                    if (compressed)
                    {
                        doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
                    }
                    else
                    {
                        doc.Add(new Field(fi.name, b, Field.Store.YES));
                    }
                }
                else
                {
                    Field.Index index;
                    Field.Store store = Field.Store.YES;

                    if (fi.isIndexed && tokenize)
                    {
                        index = Field.Index.TOKENIZED;
                    }
                    else if (fi.isIndexed && !tokenize)
                    {
                        index = Field.Index.UN_TOKENIZED;
                    }
                    else
                    {
                        index = Field.Index.NO;
                    }

                    Field.TermVector termVector = null;
                    if (fi.storeTermVector)
                    {
                        if (fi.storeOffsetWithTermVector)
                        {
                            if (fi.storePositionWithTermVector)
                            {
                                termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
                            }
                            else
                            {
                                termVector = Field.TermVector.WITH_OFFSETS;
                            }
                        }
                        else if (fi.storePositionWithTermVector)
                        {
                            termVector = Field.TermVector.WITH_POSITIONS;
                        }
                        else
                        {
                            termVector = Field.TermVector.YES;
                        }
                    }
                    else
                    {
                        termVector = Field.TermVector.NO;
                    }

                    if (compressed)
                    {
                        store = Field.Store.COMPRESS;
                        byte[] b = new byte[fieldsStream.ReadVInt()];
                        fieldsStream.ReadBytes(b, 0, b.Length);
                        Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
                        f.SetOmitNorms(fi.omitNorms);
                        doc.Add(f);
                    }
                    else
                    {
                        Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
                        f.SetOmitNorms(fi.omitNorms);
                        doc.Add(f);
                    }
                }
            }

            return(doc);
        }
Пример #20
0
		/// <summary> Construct a new SegmentInfo instance by reading a
		/// previously saved SegmentInfo from input.
		/// 
		/// </summary>
		/// <param name="dir">directory to load from
		/// </param>
		/// <param name="format">format of the segments info file
		/// </param>
		/// <param name="input">input handle to read segment info from
		/// </param>
		internal SegmentInfo(Directory dir, int format, IndexInput input)
		{
			this.dir = dir;
			name = input.ReadString();
			docCount = input.ReadInt();
			if (format <= SegmentInfos.FORMAT_LOCKLESS)
			{
				delGen = input.ReadLong();
				if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
				{
					docStoreOffset = input.ReadInt();
					if (docStoreOffset != - 1)
					{
						docStoreSegment = input.ReadString();
						docStoreIsCompoundFile = (1 == input.ReadByte());
					}
					else
					{
						docStoreSegment = name;
						docStoreIsCompoundFile = false;
					}
				}
				else
				{
					docStoreOffset = - 1;
					docStoreSegment = name;
					docStoreIsCompoundFile = false;
				}
				if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
				{
					hasSingleNormFile = (1 == input.ReadByte());
				}
				else
				{
					hasSingleNormFile = false;
				}
				int numNormGen = input.ReadInt();
				if (numNormGen == NO)
				{
					normGen = null;
				}
				else
				{
					normGen = new long[numNormGen];
					for (int j = 0; j < numNormGen; j++)
					{
						normGen[j] = input.ReadLong();
					}
				}
				isCompoundFile = (sbyte) input.ReadByte();
				preLockless = (isCompoundFile == CHECK_DIR);
				if (format <= SegmentInfos.FORMAT_DEL_COUNT)
				{
					delCount = input.ReadInt();
					System.Diagnostics.Debug.Assert(delCount <= docCount);
				}
				else
					delCount = - 1;
				if (format <= SegmentInfos.FORMAT_HAS_PROX)
					hasProx = input.ReadByte() == 1;
				else
					hasProx = true;
				
				if (format <= SegmentInfos.FORMAT_DIAGNOSTICS)
				{
					diagnostics = input.ReadStringStringMap();
				}
				else
				{
					diagnostics = new System.Collections.Generic.Dictionary<string,string>();
				}
			}
			else
			{
				delGen = CHECK_DIR;
				normGen = null;
				isCompoundFile = (sbyte) (CHECK_DIR);
				preLockless = true;
				hasSingleNormFile = false;
				docStoreOffset = - 1;
				docStoreIsCompoundFile = false;
				docStoreSegment = null;
				delCount = - 1;
				hasProx = true;
				diagnostics = new System.Collections.Generic.Dictionary<string,string>();
			}
		}
Пример #21
0
		/// <summary> Construct a new SegmentInfo instance by reading a
		/// previously saved SegmentInfo from input.
		/// 
		/// </summary>
		/// <param name="dir">directory to load from
		/// </param>
		/// <param name="format">format of the segments info file
		/// </param>
		/// <param name="input">input handle to read segment info from
		/// </param>
		internal SegmentInfo(Directory dir, int format, IndexInput input)
		{
			this.dir = dir;
			name = input.ReadString();
			docCount = input.ReadInt();
			if (format <= SegmentInfos.FORMAT_LOCKLESS)
			{
				delGen = input.ReadLong();
				if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
				{
					docStoreOffset = input.ReadInt();
					if (docStoreOffset != - 1)
					{
						docStoreSegment = input.ReadString();
						docStoreIsCompoundFile = (1 == input.ReadByte());
					}
					else
					{
						docStoreSegment = name;
						docStoreIsCompoundFile = false;
					}
				}
				else
				{
					docStoreOffset = - 1;
					docStoreSegment = name;
					docStoreIsCompoundFile = false;
				}
				if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
				{
					hasSingleNormFile = (1 == input.ReadByte());
				}
				else
				{
					hasSingleNormFile = false;
				}
				int numNormGen = input.ReadInt();
				if (numNormGen == NO)
				{
					normGen = null;
				}
				else
				{
					normGen = new long[numNormGen];
					for (int j = 0; j < numNormGen; j++)
					{
						normGen[j] = input.ReadLong();
					}
				}
				isCompoundFile = (sbyte) input.ReadByte();
				preLockless = (isCompoundFile == CHECK_DIR);
			}
			else
			{
				delGen = CHECK_DIR;
				normGen = null;
				isCompoundFile = (sbyte) (CHECK_DIR);
				preLockless = true;
				hasSingleNormFile = false;
				docStoreOffset = - 1;
				docStoreIsCompoundFile = false;
				docStoreSegment = null;
			}
		}
Пример #22
0
 /// <summary> Construct a new SegmentInfo instance by reading a
 /// previously saved SegmentInfo from input.
 ///
 /// </summary>
 /// <param name="dir">directory to load from
 /// </param>
 /// <param name="format">format of the segments info file
 /// </param>
 /// <param name="input">input handle to read segment info from
 /// </param>
 internal SegmentInfo(Directory dir, int format, IndexInput input)
 {
     this.dir = dir;
     name     = input.ReadString();
     docCount = input.ReadInt();
     if (format <= SegmentInfos.FORMAT_LOCKLESS)
     {
         delGen = input.ReadLong();
         if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
         {
             docStoreOffset = input.ReadInt();
             if (docStoreOffset != -1)
             {
                 docStoreSegment        = input.ReadString();
                 docStoreIsCompoundFile = (1 == input.ReadByte());
             }
             else
             {
                 docStoreSegment        = name;
                 docStoreIsCompoundFile = false;
             }
         }
         else
         {
             docStoreOffset         = -1;
             docStoreSegment        = name;
             docStoreIsCompoundFile = false;
         }
         if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
         {
             hasSingleNormFile = (1 == input.ReadByte());
         }
         else
         {
             hasSingleNormFile = false;
         }
         int numNormGen = input.ReadInt();
         if (numNormGen == NO)
         {
             normGen = null;
         }
         else
         {
             normGen = new long[numNormGen];
             for (int j = 0; j < numNormGen; j++)
             {
                 normGen[j] = input.ReadLong();
             }
         }
         isCompoundFile = (sbyte)input.ReadByte();
         preLockless    = (isCompoundFile == CHECK_DIR);
     }
     else
     {
         delGen                 = CHECK_DIR;
         normGen                = null;
         isCompoundFile         = (sbyte)(CHECK_DIR);
         preLockless            = true;
         hasSingleNormFile      = false;
         docStoreOffset         = -1;
         docStoreIsCompoundFile = false;
         docStoreSegment        = null;
     }
 }
Пример #23
0
        /// <summary> Read a particular segmentFileName.  Note that this may
        /// throw an IOException if a commit is in process.
        ///
        /// </summary>
        /// <param name="directory">-- directory containing the segments file
        /// </param>
        /// <param name="segmentFileName">-- segment file to load
        /// </param>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public void  Read(Directory directory, System.String segmentFileName)
        {
            bool success = false;

            // Clear any previous segments:
            Clear();

            IndexInput input = directory.OpenInput(segmentFileName);

            generation = GenerationFromSegmentsFileName(segmentFileName);

            lastGeneration = generation;

            try
            {
                int format = input.ReadInt();
                if (format < 0)
                {
                    // file contains explicit format info
                    // check that it is a format we can understand
                    if (format < CURRENT_FORMAT)
                    {
                        throw new CorruptIndexException("Unknown format version: " + format);
                    }
                    version = input.ReadLong();                    // read version
                    counter = input.ReadInt();                     // read counter
                }
                else
                {
                    // file is in old format without explicit format info
                    counter = format;
                }

                for (int i = input.ReadInt(); i > 0; i--)
                {
                    // read segmentInfos
                    Add(new SegmentInfo(directory, format, input));
                }

                if (format >= 0)
                {
                    // in old format the version number may be at the end of the file
                    if (input.GetFilePointer() >= input.Length())
                    {
                        version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
                    }
                    // old file format without version number
                    else
                    {
                        version = input.ReadLong();                         // read version
                    }
                }
                success = true;
            }
            finally
            {
                input.Close();
                if (!success)
                {
                    // Clear any segment infos we had loaded so we
                    // have a clean slate on retry:
                    Clear();
                }
            }
        }
        internal static NumericEntry ReadNumericEntry(IndexInput meta)
        {
            NumericEntry entry = new NumericEntry();
            entry.Format = meta.ReadVInt();
            entry.MissingOffset = meta.ReadLong();
            entry.PackedIntsVersion = meta.ReadVInt();
            entry.Offset = meta.ReadLong();
            entry.Count = meta.ReadVLong();
            entry.BlockSize = meta.ReadVInt();
            switch (entry.Format)
            {
                case Lucene45DocValuesConsumer.GCD_COMPRESSED:
                    entry.MinValue = meta.ReadLong();
                    entry.Gcd = meta.ReadLong();
                    break;

                case Lucene45DocValuesConsumer.TABLE_COMPRESSED:
                    if (entry.Count > int.MaxValue)
                    {
                        throw new Exception("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta);
                    }
                    int uniqueValues = meta.ReadVInt();
                    if (uniqueValues > 256)
                    {
                        throw new Exception("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta);
                    }
                    entry.Table = new long[uniqueValues];
                    for (int i = 0; i < uniqueValues; ++i)
                    {
                        entry.Table[i] = meta.ReadLong();
                    }
                    break;

                case Lucene45DocValuesConsumer.DELTA_COMPRESSED:
                    break;

                default:
                    throw new Exception("Unknown format: " + entry.Format + ", input=" + meta);
            }
            return entry;
        }
        internal static BinaryEntry ReadBinaryEntry(IndexInput meta)
        {
            BinaryEntry entry = new BinaryEntry();
            entry.Format = meta.ReadVInt();
            entry.MissingOffset = meta.ReadLong();
            entry.MinLength = meta.ReadVInt();
            entry.MaxLength = meta.ReadVInt();
            entry.Count = meta.ReadVLong();
            entry.Offset = meta.ReadLong();
            switch (entry.Format)
            {
                case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED:
                    break;

                case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED:
                    entry.AddressInterval = meta.ReadVInt();
                    entry.AddressesOffset = meta.ReadLong();
                    entry.PackedIntsVersion = meta.ReadVInt();
                    entry.BlockSize = meta.ReadVInt();
                    break;

                case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED:
                    entry.AddressesOffset = meta.ReadLong();
                    entry.PackedIntsVersion = meta.ReadVInt();
                    entry.BlockSize = meta.ReadVInt();
                    break;

                default:
                    throw new Exception("Unknown format: " + entry.Format + ", input=" + meta);
            }
            return entry;
        }
Пример #26
0
        private SortedDocValues LoadBytesVarSorted(FieldInfo field, IndexInput data, IndexInput index)
        {
            CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
            CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            long maxAddress = index.ReadLong();
            PagedBytes bytes = new PagedBytes(16);
            bytes.Copy(data, maxAddress);
            PagedBytes.Reader bytesReader = bytes.Freeze(true);
            PackedInts.Reader addressReader = PackedInts.GetReader(index);
            PackedInts.Reader ordsReader = PackedInts.GetReader(index);

            int valueCount = addressReader.Size() - 1;
            RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + addressReader.RamBytesUsed() + ordsReader.RamBytesUsed());

            return CorrectBuggyOrds(new SortedDocValuesAnonymousInnerClassHelper2(bytesReader, addressReader, ordsReader, valueCount));
        }
Пример #27
0
        public virtual void  Get(int docNum, System.String field, TermVectorMapper mapper)
        {
            if (tvx != null)
            {
                int fieldNumber = fieldInfos.FieldNumber(field);
                //We need to account for the FORMAT_SIZE at when seeking in the tvx
                //We don't need to do this in other seeks because we already have the
                // file pointer
                //that was written in another file
                tvx.Seek(((docNum + docStoreOffset) * 8L) + FORMAT_SIZE);
                //System.out.println("TVX Pointer: " + tvx.getFilePointer());
                long position = tvx.ReadLong();

                tvd.Seek(position);
                int fieldCount = tvd.ReadVInt();
                //System.out.println("Num Fields: " + fieldCount);
                // There are only a few fields per document. We opt for a full scan
                // rather then requiring that they be ordered. We need to read through
                // all of the fields anyway to get to the tvf pointers.
                int number = 0;
                int found  = -1;
                for (int i = 0; i < fieldCount; i++)
                {
                    if (tvdFormat == FORMAT_VERSION)
                    {
                        number = tvd.ReadVInt();
                    }
                    else
                    {
                        number += tvd.ReadVInt();
                    }

                    if (number == fieldNumber)
                    {
                        found = i;
                    }
                }

                // This field, although valid in the segment, was not found in this
                // document
                if (found != -1)
                {
                    // Compute position in the tvf file
                    position = 0;
                    for (int i = 0; i <= found; i++)
                    {
                        position += tvd.ReadVLong();
                    }

                    mapper.SetDocumentNumber(docNum);
                    ReadTermVector(field, position, mapper);
                }
                else
                {
                    //System.out.println("Fieldable not found");
                }
            }
            else
            {
                //System.out.println("No tvx file");
            }
        }
Пример #28
0
        /// <summary> Read a particular segmentFileName.  Note that this may
        /// throw an IOException if a commit is in process.
        ///
        /// </summary>
        /// <param name="directory">-- directory containing the segments file
        /// </param>
        /// <param name="segmentFileName">-- segment file to load
        /// </param>
        public void  Read(Directory directory, System.String segmentFileName)
        {
            bool success = false;

            IndexInput input = directory.OpenInput(segmentFileName);

            if (segmentFileName.Equals(IndexFileNames.SEGMENTS))
            {
                generation = 0;
            }
            else
            {
#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
                generation = Lucene.Net.Documents.NumberTools.ToLong(segmentFileName.Substring(1 + IndexFileNames.SEGMENTS.Length));
#else
                generation = System.Convert.ToInt64(segmentFileName.Substring(1 + IndexFileNames.SEGMENTS.Length), 16);
#endif
            }
            lastGeneration = generation;

            try
            {
                int format = input.ReadInt();
                if (format < 0)
                {
                    // file contains explicit format info
                    // check that it is a format we can understand
                    if (format < FORMAT_SINGLE_NORM_FILE)
                    {
                        throw new System.IO.IOException("Unknown format version: " + format);
                    }
                    version = input.ReadLong();                    // read version
                    counter = input.ReadInt();                     // read counter
                }
                else
                {
                    // file is in old format without explicit format info
                    counter = format;
                }

                for (int i = input.ReadInt(); i > 0; i--)
                {
                    // read segmentInfos
                    Add(new SegmentInfo(directory, format, input));
                }

                if (format >= 0)
                {
                    // in old format the version number may be at the end of the file
                    if (input.GetFilePointer() >= input.Length())
                    {
                        version = System.DateTime.Now.Millisecond;
                    }
                    // old file format without version number
                    else
                    {
                        version = input.ReadLong();                         // read version
                    }
                }
                success = true;
            }
            finally
            {
                input.Close();
                if (!success)
                {
                    // Clear any segment infos we had loaded so we
                    // have a clean slate on retry:
                    Clear();
                }
            }
        }
Пример #29
0
        /// <summary> Retrieve the term vector for the given document and field</summary>
        /// <param name="docNum">The document number to retrieve the vector for
        /// </param>
        /// <param name="field">The field within the document to retrieve
        /// </param>
        /// <returns> The TermFreqVector for the document and field or null if there is no termVector for this field.
        /// </returns>
        /// <throws>  IOException if there is an error reading the term vector files </throws>
        public /*internal*/ virtual TermFreqVector Get(int docNum, System.String field)
        {
            // Check if no term vectors are available for this segment at all
            int            fieldNumber = fieldInfos.FieldNumber(field);
            TermFreqVector result      = null;

            if (tvx != null)
            {
                //We need to account for the FORMAT_SIZE at when seeking in the tvx
                //We don't need to do this in other seeks because we already have the
                // file pointer
                //that was written in another file
                tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
                //System.out.println("TVX Pointer: " + tvx.getFilePointer());
                long position = tvx.ReadLong();

                tvd.Seek(position);
                int fieldCount = tvd.ReadVInt();
                //System.out.println("Num Fields: " + fieldCount);
                // There are only a few fields per document. We opt for a full scan
                // rather then requiring that they be ordered. We need to read through
                // all of the fields anyway to get to the tvf pointers.
                int number = 0;
                int found  = -1;
                for (int i = 0; i < fieldCount; i++)
                {
                    if (tvdFormat == TermVectorsWriter.FORMAT_VERSION)
                    {
                        number = tvd.ReadVInt();
                    }
                    else
                    {
                        number += tvd.ReadVInt();
                    }

                    if (number == fieldNumber)
                    {
                        found = i;
                    }
                }

                // This field, although valid in the segment, was not found in this
                // document
                if (found != -1)
                {
                    // Compute position in the tvf file
                    position = 0;
                    for (int i = 0; i <= found; i++)
                    {
                        position += tvd.ReadVLong();
                    }

                    result = ReadTermVector(field, position);
                }
                else
                {
                    //System.out.println("Field not found");
                }
            }
            else
            {
                //System.out.println("No tvx file");
            }
            return(result);
        }
Пример #30
0
        public override long Get(int index)
        {
            long majorBitPos = (long)index * bitsPerValue;
            long elementPos  = (long)((ulong)majorBitPos >> 3);

            try
            {
                @in.Seek(StartPointer + elementPos);

                int bitPos = (int)(majorBitPos & 7);
                // round up bits to a multiple of 8 to find total bytes needed to read
                int roundedBits = ((bitPos + bitsPerValue + 7) & ~7);
                // the number of extra bits read at the end to shift out
                int shiftRightBits = roundedBits - bitPos - bitsPerValue;

                long rawValue;
                switch ((int)((uint)roundedBits >> 3))
                {
                case 1:
                    rawValue = @in.ReadByte();
                    break;

                case 2:
                    rawValue = @in.ReadShort();
                    break;

                case 3:
                    rawValue = ((long)@in.ReadShort() << 8) | (@in.ReadByte() & 0xFFL);
                    break;

                case 4:
                    rawValue = @in.ReadInt();
                    break;

                case 5:
                    rawValue = ((long)@in.ReadInt() << 8) | (@in.ReadByte() & 0xFFL);
                    break;

                case 6:
                    rawValue = ((long)@in.ReadInt() << 16) | (@in.ReadShort() & 0xFFFFL);
                    break;

                case 7:
                    rawValue = ((long)@in.ReadInt() << 24) | ((@in.ReadShort() & 0xFFFFL) << 8) | (@in.ReadByte() & 0xFFL);
                    break;

                case 8:
                    rawValue = @in.ReadLong();
                    break;

                case 9:
                    // We must be very careful not to shift out relevant bits. So we account for right shift
                    // we would normally do on return here, and reset it.
                    rawValue       = (@in.ReadLong() << (8 - shiftRightBits)) | ((int)((uint)(@in.ReadByte() & 0xFFL) >> shiftRightBits));
                    shiftRightBits = 0;
                    break;

                default:
                    throw new InvalidOperationException("bitsPerValue too large: " + bitsPerValue);
                }
                return(((long)((ulong)rawValue >> shiftRightBits)) & ValueMask);
            }
            catch (System.IO.IOException ioe)
            {
                throw new InvalidOperationException("failed", ioe);
            }
        }
Пример #31
0
        internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
        {
            input         = i;
            fieldInfos    = fis;
            isIndex       = isi;
            maxSkipLevels = 1;             // use single-level skip lists for formats > -3

            int firstInt = input.ReadInt();

            if (firstInt >= 0)
            {
                // original-format file, without explicit format version number
                format = 0;
                size   = firstInt;

                // back-compatible settings
                indexInterval = 128;
                skipInterval  = System.Int32.MaxValue;                // switch off skipTo optimization
            }
            else
            {
                // we have a format version number
                format = firstInt;

                // check that it is a format we can understand
                if (format < TermInfosWriter.FORMAT_CURRENT)
                {
                    throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher");
                }

                size = input.ReadLong();                 // read the size

                if (format == -1)
                {
                    if (!isIndex)
                    {
                        indexInterval        = input.ReadInt();
                        formatM1SkipInterval = input.ReadInt();
                    }
                    // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
                    // skipTo implementation of these versions
                    skipInterval = System.Int32.MaxValue;
                }
                else
                {
                    indexInterval = input.ReadInt();
                    skipInterval  = input.ReadInt();
                    if (format <= TermInfosWriter.FORMAT)
                    {
                        // this new format introduces multi-level skipping
                        maxSkipLevels = input.ReadInt();
                    }
                }
                System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0");
                System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0");
            }
            if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
            {
                termBuffer.SetPreUTF8Strings();
                scanBuffer.SetPreUTF8Strings();
                prevBuffer.SetPreUTF8Strings();
            }
        }