示例#1
0
        public void  Read(IndexInput input, FieldInfos fieldInfos)
        {
            this.term = null;             // invalidate cache
            int start       = input.ReadVInt();
            int length      = input.ReadVInt();
            int totalLength = start + length;

            if (preUTF8Strings)
            {
                text.SetLength(totalLength);
                input.ReadChars(text.result, start, length);
            }
            else
            {
                if (dirty)
                {
                    // Fully convert all bytes since bytes is dirty
                    UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
                    dirty = false;
                }
                else
                {
                    // Incrementally convert only the UTF8 bytes that are new:
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
                }
            }
            this.field = fieldInfos.FieldName(input.ReadVInt());
        }
示例#2
0
        private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int maxDoc   = reader.MaxDoc();
            int docCount = 0;

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                while (docCount < maxDoc)
                {
                    int        len    = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
                    docCount += len;
                    checkAbort.Work(300 * len);
                }
            }
            else
            {
                for (; docCount < maxDoc; docCount++)
                {
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(docCount, fieldSelectorMerge);
                    fieldsWriter.AddDocument(doc);
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
示例#3
0
        protected internal override int ReadSkipData(int level, IndexInput skipStream)
        {
            int delta;

            if (currentFieldStoresPayloads)
            {
                // the current field stores payloads.
                // if the doc delta is odd then we have
                // to read the current payload length
                // because it differs from the length of the
                // previous payload
                delta = skipStream.ReadVInt();
                if ((delta & 1) != 0)
                {
                    payloadLength[level] = skipStream.ReadVInt();
                }
                delta = SupportClass.Number.URShift(delta, 1);
            }
            else
            {
                delta = skipStream.ReadVInt();
            }
            freqPointer[level] += skipStream.ReadVInt();
            proxPointer[level] += skipStream.ReadVInt();

            return(delta);
        }
示例#4
0
		public void  Read(IndexInput input, FieldInfos fieldInfos)
		{
			this.term = null; // invalidate cache
			int start = input.ReadVInt();
			int length = input.ReadVInt();
			int totalLength = start + length;
			if (preUTF8Strings)
			{
				text.SetLength(totalLength);
				input.ReadChars(text.result, start, length);
			}
			else
			{
				
				if (dirty)
				{
					// Fully convert all bytes since bytes is dirty
					UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
					bytes.SetLength(totalLength);
					input.ReadBytes(bytes.result, start, length);
					UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
					dirty = false;
				}
				else
				{
					// Incrementally convert only the UTF8 bytes that are new:
					bytes.SetLength(totalLength);
					input.ReadBytes(bytes.result, start, length);
					UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
				}
			}
			this.field = fieldInfos.FieldName(input.ReadVInt());
		}
示例#5
0
        private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int docCount = 0;
            int maxDoc   = reader.MaxDoc();

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int j = 0; j < maxDoc;)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        ++j;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = j, numDocs = 0;
                    do
                    {
                        j++;
                        numDocs++;
                        if (j >= maxDoc)
                        {
                            break;
                        }
                        if (reader.IsDeleted(j))
                        {
                            j++;
                            break;
                        }
                    }while (numDocs < MAX_RAW_MERGE_DOCS);

                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                    docCount += numDocs;
                    checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int j = 0; j < maxDoc; j++)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        continue;
                    }
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(j, fieldSelectorMerge);
                    fieldsWriter.AddDocument(doc);
                    docCount++;
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
示例#6
0
        private int CheckValidFormat(IndexInput in_Renamed)
        {
            int format = in_Renamed.ReadInt();

            if (format > FORMAT_CURRENT)
            {
                throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FORMAT_CURRENT + " or less");
            }
            return(format);
        }
示例#7
0
 public /*protected internal*/ SegmentTermDocs(SegmentReader parent)
 {
     this.parent     = parent;
     this.freqStream = (IndexInput)parent.core.freqStream.Clone();
     lock (parent)
     {
         this.deletedDocs = parent.deletedDocs;
     }
     this.skipInterval  = parent.core.GetTermsReader().GetSkipInterval();
     this.maxSkipLevels = parent.core.GetTermsReader().GetMaxSkipLevels();
 }
示例#8
0
            private IndexInput GetFieldStream()
            {
                IndexInput localFieldsStream = (IndexInput)Enclosing_Instance.fieldsStreamTL.Get();

                if (localFieldsStream == null)
                {
                    localFieldsStream = (IndexInput)Enclosing_Instance.cloneableFieldsStream.Clone();
                    Enclosing_Instance.fieldsStreamTL.Set(localFieldsStream);
                }
                return(localFieldsStream);
            }
示例#9
0
		public /*protected internal*/ SegmentTermDocs(SegmentReader parent)
		{
			this.parent = parent;
			this.freqStream = (IndexInput) parent.core.freqStream.Clone();
			lock (parent)
			{
				this.deletedDocs = parent.deletedDocs;
			}
			this.skipInterval = parent.core.GetTermsReader().GetSkipInterval();
			this.maxSkipLevels = parent.core.GetTermsReader().GetMaxSkipLevels();
		}
示例#10
0
		public CompoundFileReader(Directory dir, System.String name, int readBufferSize)
		{
			directory = dir;
			fileName = name;
			this.readBufferSize = readBufferSize;
			
			bool success = false;
			
			try
			{
				stream = dir.OpenInput(name, readBufferSize);
				
				// read the directory and init files
				int count = stream.ReadVInt();
				FileEntry entry = null;
				for (int i = 0; i < count; i++)
				{
					long offset = stream.ReadLong();
					System.String id = stream.ReadString();
					
					if (entry != null)
					{
						// set length of the previous entry
						entry.length = offset - entry.offset;
					}
					
					entry = new FileEntry();
					entry.offset = offset;
					entries[id] = entry;
				}
				
				// set the length of the final entry
				if (entry != null)
				{
					entry.length = stream.Length() - entry.offset;
				}
				
				success = true;
			}
			finally
			{
				if (!success && (stream != null))
				{
					try
					{
						stream.Close();
					}
					catch (System.IO.IOException e)
					{
					}
				}
			}
		}
示例#11
0
        public CompoundFileReader(Directory dir, System.String name, int readBufferSize)
        {
            directory           = dir;
            fileName            = name;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                stream = dir.OpenInput(name, readBufferSize);

                // read the directory and init files
                int       count = stream.ReadVInt();
                FileEntry entry = null;
                for (int i = 0; i < count; i++)
                {
                    long          offset = stream.ReadLong();
                    System.String id     = stream.ReadString();

                    if (entry != null)
                    {
                        // set length of the previous entry
                        entry.length = offset - entry.offset;
                    }

                    entry        = new FileEntry();
                    entry.offset = offset;
                    entries[id]  = entry;
                }

                // set the length of the final entry
                if (entry != null)
                {
                    entry.length = stream.Length() - entry.offset;
                }

                success = true;
            }
            finally
            {
                if (!success && (stream != null))
                {
                    try
                    {
                        stream.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                    }
                }
            }
        }
示例#12
0
            public override byte[] GetBinaryValue(byte[] result)
            {
                Enclosing_Instance.EnsureOpen();

                if (isBinary)
                {
                    if (fieldsData == null)
                    {
                        // Allocate new buffer if result is null or too small
                        byte[] b;
                        if (result == null || result.Length < toRead)
                        {
                            b = new byte[toRead];
                        }
                        else
                        {
                            b = result;
                        }

                        IndexInput localFieldsStream = GetFieldStream();

                        // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
                        // since they are already handling this exception when getting the document
                        try
                        {
                            localFieldsStream.Seek(pointer);
                            localFieldsStream.ReadBytes(b, 0, toRead);
                            if (isCompressed == true)
                            {
                                fieldsData = Enclosing_Instance.Uncompress(b);
                            }
                            else
                            {
                                fieldsData = b;
                            }
                        }
                        catch (System.IO.IOException e)
                        {
                            throw new FieldReaderException(e);
                        }

                        binaryOffset = 0;
                        binaryLength = toRead;
                    }

                    return((byte[])fieldsData);
                }
                else
                {
                    return(null);
                }
            }
示例#13
0
        /// <summary>Bulk write a contiguous series of documents.  The
        /// lengths array is the length (in bytes) of each raw
        /// document.  The stream IndexInput is the
        /// fieldsStream from which we should bulk-copy all
        /// bytes.
        /// </summary>
        internal void  AddRawDocuments(IndexInput stream, int[] lengths, int numDocs)
        {
            long position = fieldsStream.GetFilePointer();
            long start    = position;

            for (int i = 0; i < numDocs; i++)
            {
                indexStream.WriteLong(position);
                position += lengths[i];
            }
            fieldsStream.CopyBytes(stream, position - start);
            System.Diagnostics.Debug.Assert(fieldsStream.GetFilePointer() == position);
        }
示例#14
0
		// Used only by clone
		private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream)
		{
			this.fieldInfos = fieldInfos;
			this.numTotalDocs = numTotalDocs;
			this.size = size;
			this.format = format;
			this.formatSize = formatSize;
			this.docStoreOffset = docStoreOffset;
			this.cloneableFieldsStream = cloneableFieldsStream;
			this.cloneableIndexStream = cloneableIndexStream;
			fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
			indexStream = (IndexInput) cloneableIndexStream.Clone();
		}
示例#15
0
 // Used only by clone
 private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream)
 {
     this.fieldInfos            = fieldInfos;
     this.numTotalDocs          = numTotalDocs;
     this.size                  = size;
     this.format                = format;
     this.formatSize            = formatSize;
     this.docStoreOffset        = docStoreOffset;
     this.cloneableFieldsStream = cloneableFieldsStream;
     this.cloneableIndexStream  = cloneableIndexStream;
     fieldsStream               = (IndexInput)cloneableFieldsStream.Clone();
     indexStream                = (IndexInput)cloneableIndexStream.Clone();
 }
示例#16
0
        private void  Read(IndexInput input, System.String fileName)
        {
            int firstInt = input.ReadVInt();

            if (firstInt < 0)
            {
                // This is a real format
                format = firstInt;
            }
            else
            {
                format = FORMAT_PRE;
            }

            if (format != FORMAT_PRE & format != FORMAT_START)
            {
                throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
            }

            int size;

            if (format == FORMAT_PRE)
            {
                size = firstInt;
            }
            else
            {
                size = input.ReadVInt();                 //read in the size
            }

            for (int i = 0; i < size; i++)
            {
                System.String name            = StringHelper.Intern(input.ReadString());
                byte          bits            = input.ReadByte();
                bool          isIndexed       = (bits & IS_INDEXED) != 0;
                bool          storeTermVector = (bits & STORE_TERMVECTOR) != 0;
                bool          storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                bool          storeOffsetWithTermVector    = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
                bool          omitNorms                = (bits & OMIT_NORMS) != 0;
                bool          storePayloads            = (bits & STORE_PAYLOADS) != 0;
                bool          omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;

                AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
            }

            if (input.GetFilePointer() != input.Length())
            {
                throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.GetFilePointer() + " vs size " + input.Length());
            }
        }
示例#17
0
        public override void  Close()
        {
            lock (this)
            {
                if (stream == null)
                {
                    throw new System.IO.IOException("Already closed");
                }

                entries.Clear();
                stream.Close();
                stream = null;
            }
        }
示例#18
0
        /// <summary>Copy the contents of the file with specified extension into the
        /// provided output stream. Use the provided buffer for moving data
        /// to reduce memory allocation.
        /// </summary>
        private void  CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
        {
            IndexInput is_Renamed = null;

            try
            {
                long startPtr = os.GetFilePointer();

                is_Renamed = directory.OpenInput(source.file);
                long length    = is_Renamed.Length();
                long remainder = length;
                int  chunk     = buffer.Length;

                while (remainder > 0)
                {
                    int len = (int)System.Math.Min(chunk, remainder);
                    is_Renamed.ReadBytes(buffer, 0, len, false);
                    os.WriteBytes(buffer, len);
                    remainder -= len;
                    if (checkAbort != null)
                    {
                        // Roughly every 2 MB we will check if
                        // it's time to abort
                        checkAbort.Work(80);
                    }
                }

                // Verify that remainder is 0
                if (remainder != 0)
                {
                    throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
                }

                // Verify that the output length diff is equal to original file
                long endPtr = os.GetFilePointer();
                long diff   = endPtr - startPtr;
                if (diff != length)
                {
                    throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
                }
            }
            finally
            {
                if (is_Renamed != null)
                {
                    is_Renamed.Close();
                }
            }
        }
示例#19
0
        /// <summary>read as a d-gaps list </summary>
        private void  ReadDgaps(IndexInput input)
        {
            size  = input.ReadInt();            // (re)read size
            count = input.ReadInt();            // read count
            bits  = new byte[(size >> 3) + 1];  // allocate bits
            int last = 0;
            int n    = Count();

            while (n > 0)
            {
                last      += input.ReadVInt();
                bits[last] = input.ReadByte();
                n         -= BYTE_COUNTS[bits[last] & 0xFF];
            }
        }
示例#20
0
 /// <summary>The value of the field as a String, or null.  If null, the Reader value,
 /// binary value, or TokenStream value is used.  Exactly one of stringValue(),
 /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
 /// </summary>
 public override System.String StringValue()
 {
     Enclosing_Instance.EnsureOpen();
     if (isBinary)
     {
         return(null);
     }
     else
     {
         if (fieldsData == null)
         {
             IndexInput localFieldsStream = GetFieldStream();
             try
             {
                 localFieldsStream.Seek(pointer);
                 if (isCompressed)
                 {
                     byte[] b = new byte[toRead];
                     localFieldsStream.ReadBytes(b, 0, b.Length);
                     fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
                 }
                 else
                 {
                     if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                     {
                         byte[] bytes = new byte[toRead];
                         localFieldsStream.ReadBytes(bytes, 0, toRead);
                         fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(bytes);
                     }
                     else
                     {
                         //read in chars b/c we already know the length we need to read
                         char[] chars = new char[toRead];
                         localFieldsStream.ReadChars(chars, 0, toRead);
                         fieldsData = new System.String(chars);
                     }
                 }
             }
             catch (System.IO.IOException e)
             {
                 throw new FieldReaderException(e);
             }
         }
         return((System.String)fieldsData);
     }
 }
示例#21
0
 public MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval)
 {
     this.skipStream            = new IndexInput[maxSkipLevels];
     this.skipPointer           = new long[maxSkipLevels];
     this.childPointer          = new long[maxSkipLevels];
     this.numSkipped            = new int[maxSkipLevels];
     this.maxNumberOfSkipLevels = maxSkipLevels;
     this.skipInterval          = new int[maxSkipLevels];
     this.skipStream[0]         = skipStream;
     this.inputIsBuffered       = (skipStream is BufferedIndexInput);
     this.skipInterval[0]       = skipInterval;
     for (int i = 1; i < maxSkipLevels; i++)
     {
         // cache skip intervals
         this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval;
     }
     skipDoc = new int[maxSkipLevels];
 }
示例#22
0
		public MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval)
		{
			this.skipStream = new IndexInput[maxSkipLevels];
			this.skipPointer = new long[maxSkipLevels];
			this.childPointer = new long[maxSkipLevels];
			this.numSkipped = new int[maxSkipLevels];
			this.maxNumberOfSkipLevels = maxSkipLevels;
			this.skipInterval = new int[maxSkipLevels];
			this.skipStream[0] = skipStream;
			this.inputIsBuffered = (skipStream is BufferedIndexInput);
			this.skipInterval[0] = skipInterval;
			for (int i = 1; i < maxSkipLevels; i++)
			{
				// cache skip intervals
				this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval;
			}
			skipDoc = new int[maxSkipLevels];
		}
示例#23
0
        /// <summary> Construct a FieldInfos object using the directory and the name of the file
        /// IndexInput
        /// </summary>
        /// <param name="d">The directory to open the IndexInput from
        /// </param>
        /// <param name="name">The name of the file to open the IndexInput from in the Directory
        /// </param>
        /// <throws>  IOException </throws>
        public /*internal*/ FieldInfos(Directory d, System.String name)
        {
            IndexInput input = d.OpenInput(name);

            try
            {
                try
                {
                    Read(input, name);
                }
                catch (System.IO.IOException ioe)
                {
                    if (format == FORMAT_PRE)
                    {
                        // LUCENE-1623: FORMAT_PRE (before there was a
                        // format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8)
                        // encoding; retry with input set to pre-utf8
                        input.Seek(0);
                        input.SetModifiedUTF8StringsMode();
                        byNumber.Clear();
                        byName.Clear();
                        try
                        {
                            Read(input, name);
                        }
                        catch (System.Exception t)
                        {
                            // Ignore any new exception & throw original IOE
                            throw ioe;
                        }
                    }
                    else
                    {
                        // The IOException cannot be caused by
                        // LUCENE-1623, so re-throw it
                        throw ioe;
                    }
                }
            }
            finally
            {
                input.Close();
            }
        }
示例#24
0
        /// <summary>Constructs a bit vector from the file <code>name</code> in Directory
        /// <code>d</code>, as written by the {@link #write} method.
        /// </summary>
        public BitVector(Directory d, System.String name)
        {
            IndexInput input = d.OpenInput(name);

            try
            {
                size = input.ReadInt();                 // read size
                if (size == -1)
                {
                    ReadDgaps(input);
                }
                else
                {
                    ReadBits(input);
                }
            }
            finally
            {
                input.Close();
            }
        }
示例#25
0
        // It is not always neccessary to move the prox pointer
        // to a new document after the freq pointer has been moved.
        // Consider for example a phrase query with two terms:
        // the freq pointer for term 1 has to move to document x
        // to answer the question if the term occurs in that document. But
        // only if term 2 also matches document x, the positions have to be
        // read to figure out if term 1 and term 2 appear next
        // to each other in document x and thus satisfy the query.
        // So we move the prox pointer lazily to the document
        // as soon as positions are requested.
        private void  LazySkip()
        {
            if (proxStream == null)
            {
                // clone lazily
                proxStream = (IndexInput)parent.core.proxStream.Clone();
            }

            // we might have to skip the current payload
            // if it was not read yet
            SkipPayload();

            if (lazySkipPointer != -1)
            {
                proxStream.Seek(lazySkipPointer);
                lazySkipPointer = -1;
            }

            if (lazySkipProxCount != 0)
            {
                SkipPositions(lazySkipProxCount);
                lazySkipProxCount = 0;
            }
        }
示例#26
0
		internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
		{
			bool success = false;
			isOriginal = true;
			try
			{
				fieldInfos = fn;
				
				cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
				cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);
				
				// First version of fdx did not include a format
				// header, but, the first int will always be 0 in that
				// case
				int firstInt = cloneableIndexStream.ReadInt();
				if (firstInt == 0)
					format = 0;
				else
					format = firstInt;
				
				if (format > FieldsWriter.FORMAT_CURRENT
                    /* extra support for Lucene 3.0 indexes: */ && format != FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS
                    )
					throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");
				
				if (format > FieldsWriter.FORMAT)
					formatSize = 4;
				else
					formatSize = 0;
				
				if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
					cloneableFieldsStream.SetModifiedUTF8StringsMode();
				
				fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
				
				long indexSize = cloneableIndexStream.Length() - formatSize;
				
				if (docStoreOffset != - 1)
				{
					// We read only a slice out of this shared fields file
					this.docStoreOffset = docStoreOffset;
					this.size = size;
					
					// Verify the file is long enough to hold all of our
					// docs
					System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
				}
				else
				{
					this.docStoreOffset = 0;
					this.size = (int) (indexSize >> 3);
				}
				
				indexStream = (IndexInput) cloneableIndexStream.Clone();
				numTotalDocs = (int) (indexSize >> 3);
				success = true;
			}
			finally
			{
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above. In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					Close();
				}
			}
		}
示例#27
0
 internal SkipBuffer(IndexInput input, int length)
 {
     data    = new byte[length];
     pointer = input.GetFilePointer();
     input.ReadBytes(data, 0, length);
 }
示例#28
0
        internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
        {
            input         = i;
            fieldInfos    = fis;
            isIndex       = isi;
            maxSkipLevels = 1;             // use single-level skip lists for formats > -3

            int firstInt = input.ReadInt();

            if (firstInt >= 0)
            {
                // original-format file, without explicit format version number
                format = 0;
                size   = firstInt;

                // back-compatible settings
                indexInterval = 128;
                skipInterval  = System.Int32.MaxValue;                // switch off skipTo optimization
            }
            else
            {
                // we have a format version number
                format = firstInt;

                // check that it is a format we can understand
                if (format < TermInfosWriter.FORMAT_CURRENT)
                {
                    throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher");
                }

                size = input.ReadLong();                 // read the size

                if (format == -1)
                {
                    if (!isIndex)
                    {
                        indexInterval        = input.ReadInt();
                        formatM1SkipInterval = input.ReadInt();
                    }
                    // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
                    // skipTo implementation of these versions
                    skipInterval = System.Int32.MaxValue;
                }
                else
                {
                    indexInterval = input.ReadInt();
                    skipInterval  = input.ReadInt();
                    if (format <= TermInfosWriter.FORMAT)
                    {
                        // this new format introduces multi-level skipping
                        maxSkipLevels = input.ReadInt();
                    }
                }
                System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0");
                System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0");
            }
            if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
            {
                termBuffer.SetPreUTF8Strings();
                scanBuffer.SetPreUTF8Strings();
                prevBuffer.SetPreUTF8Strings();
            }
        }
示例#29
0
        internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;

            isOriginal = true;
            try
            {
                fieldInfos = fn;

                cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
                cloneableIndexStream  = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);

                // First version of fdx did not include a format
                // header, but, the first int will always be 0 in that
                // case
                int firstInt = cloneableIndexStream.ReadInt();
                if (firstInt == 0)
                {
                    format = 0;
                }
                else
                {
                    format = firstInt;
                }

                if (format > FieldsWriter.FORMAT_CURRENT &&
                    /* extra support for Lucene 3.0 indexes: */ format != FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS
                    )
                {
                    throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");
                }

                if (format > FieldsWriter.FORMAT)
                {
                    formatSize = 4;
                }
                else
                {
                    formatSize = 0;
                }

                if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                {
                    cloneableFieldsStream.SetModifiedUTF8StringsMode();
                }

                fieldsStream = (IndexInput)cloneableFieldsStream.Clone();

                long indexSize = cloneableIndexStream.Length() - formatSize;

                if (docStoreOffset != -1)
                {
                    // We read only a slice out of this shared fields file
                    this.docStoreOffset = docStoreOffset;
                    this.size           = size;

                    // Verify the file is long enough to hold all of our
                    // docs
                    System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }
                else
                {
                    this.docStoreOffset = 0;
                    this.size           = (int)(indexSize >> 3);
                }

                indexStream  = (IndexInput)cloneableIndexStream.Clone();
                numTotalDocs = (int)(indexSize >> 3);
                success      = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Close();
                }
            }
        }
示例#30
0
 internal CSIndexInput(IndexInput base_Renamed, long fileOffset, long length, int readBufferSize) : base(readBufferSize)
 {
     this.base_Renamed = (IndexInput)base_Renamed.Clone();
     this.fileOffset   = fileOffset;
     this.length       = length;
 }
示例#31
0
		/// <summary> Construct a new SegmentInfo instance by reading a
		/// previously saved SegmentInfo from input.
		/// 
		/// </summary>
		/// <param name="dir">directory to load from
		/// </param>
		/// <param name="format">format of the segments info file
		/// </param>
		/// <param name="input">input handle to read segment info from
		/// </param>
		internal SegmentInfo(Directory dir, int format, IndexInput input)
		{
			this.dir = dir;
			name = input.ReadString();
			docCount = input.ReadInt();
			if (format <= SegmentInfos.FORMAT_LOCKLESS)
			{
				delGen = input.ReadLong();
				if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
				{
					docStoreOffset = input.ReadInt();
					if (docStoreOffset != - 1)
					{
						docStoreSegment = input.ReadString();
						docStoreIsCompoundFile = (1 == input.ReadByte());
					}
					else
					{
						docStoreSegment = name;
						docStoreIsCompoundFile = false;
					}
				}
				else
				{
					docStoreOffset = - 1;
					docStoreSegment = name;
					docStoreIsCompoundFile = false;
				}
				if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
				{
					hasSingleNormFile = (1 == input.ReadByte());
				}
				else
				{
					hasSingleNormFile = false;
				}
				int numNormGen = input.ReadInt();
				if (numNormGen == NO)
				{
					normGen = null;
				}
				else
				{
					normGen = new long[numNormGen];
					for (int j = 0; j < numNormGen; j++)
					{
						normGen[j] = input.ReadLong();
					}
				}
				isCompoundFile = (sbyte) input.ReadByte();
				preLockless = (isCompoundFile == CHECK_DIR);
				if (format <= SegmentInfos.FORMAT_DEL_COUNT)
				{
					delCount = input.ReadInt();
					System.Diagnostics.Debug.Assert(delCount <= docCount);
				}
				else
					delCount = - 1;
				if (format <= SegmentInfos.FORMAT_HAS_PROX)
					hasProx = input.ReadByte() == 1;
				else
					hasProx = true;
				
				if (format <= SegmentInfos.FORMAT_DIAGNOSTICS)
				{
					diagnostics = input.ReadStringStringMap();
				}
				else
				{
					diagnostics = new System.Collections.Generic.Dictionary<string,string>();
				}
			}
			else
			{
				delGen = CHECK_DIR;
				normGen = null;
				isCompoundFile = (sbyte) (CHECK_DIR);
				preLockless = true;
				hasSingleNormFile = false;
				docStoreOffset = - 1;
				docStoreIsCompoundFile = false;
				docStoreSegment = null;
				delCount = - 1;
				hasProx = true;
				diagnostics = new System.Collections.Generic.Dictionary<string,string>();
			}
		}
示例#32
0
            internal CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor)
			{
				segment = si.name;
				this.readBufferSize = readBufferSize;
				this.dir = dir;
				
				bool success = false;
				
				try
				{
					Directory dir0 = dir;
					if (si.GetUseCompoundFile())
					{
						cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
						dir0 = cfsReader;
					}
					cfsDir = dir0;
					
					fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
					
					this.termsIndexDivisor = termsIndexDivisor;
					TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor);
					if (termsIndexDivisor == - 1)
					{
						tisNoIndex = reader;
					}
					else
					{
						tis = reader;
						tisNoIndex = null;
					}
					
					// make sure that all index files have been read or are kept open
					// so that if an index update removes them we'll still have them
					freqStream = cfsDir.OpenInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize);
					
					if (fieldInfos.HasProx())
					{
						proxStream = cfsDir.OpenInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize);
					}
					else
					{
						proxStream = null;
					}
					success = true;
				}
				finally
				{
					if (!success)
					{
						DecRef();
					}
				}


                // Must assign this at the end -- if we hit an
                // exception above core, we don't want to attempt to
                // purge the FieldCache (will hit NPE because core is
                // not assigned yet).
                this.origInstance = origInstance;
			}
示例#33
0
			internal CSIndexInput(IndexInput base_Renamed, long fileOffset, long length, int readBufferSize):base(readBufferSize)
			{
				this.base_Renamed = (IndexInput) base_Renamed.Clone();
				this.fileOffset = fileOffset;
				this.length = length;
			}
示例#34
0
		public override void  Close()
		{
			lock (this)
			{
				if (stream == null)
					throw new System.IO.IOException("Already closed");
				
				entries.Clear();
				stream.Close();
				stream = null;
			}
		}
示例#35
0
		/// <summary>read as a d-gaps list </summary>
		private void  ReadDgaps(IndexInput input)
		{
			size = input.ReadInt(); // (re)read size
			count = input.ReadInt(); // read count
			bits = new byte[(size >> 3) + 1]; // allocate bits
			int last = 0;
			int n = Count();
			while (n > 0)
			{
				last += input.ReadVInt();
				bits[last] = input.ReadByte();
				n -= BYTE_COUNTS[bits[last] & 0xFF];
			}
		}
示例#36
0
        internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;

            try
            {
                if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
                {
                    tvx    = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
                    format = CheckValidFormat(tvx);
                    tvd    = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
                    int tvdFormat = CheckValidFormat(tvd);
                    tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
                    int tvfFormat = CheckValidFormat(tvf);

                    System.Diagnostics.Debug.Assert(format == tvdFormat);
                    System.Diagnostics.Debug.Assert(format == tvfFormat);

                    if (format >= FORMAT_VERSION2)
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 4);
                    }
                    else
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 3);
                    }

                    if (-1 == docStoreOffset)
                    {
                        this.docStoreOffset = 0;
                        this.size           = numTotalDocs;
                        System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size);
                    }
                    else
                    {
                        this.docStoreOffset = docStoreOffset;
                        this.size           = size;
                        // Verify the file is long enough to hold all of our
                        // docs
                        System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset);
                    }
                }
                else
                {
                    // If all documents flushed in a segment had hit
                    // non-aborting exceptions, it's possible that
                    // FieldInfos.hasVectors returns true yet the term
                    // vector files don't exist.
                    format = 0;
                }


                this.fieldInfos = fieldInfos;
                success         = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Close();
                }
            }
        }
示例#37
0
 internal SegmentTermPositions(SegmentReader p) : base(p)
 {
     this.proxStream = null;             // the proxStream will be cloned lazily when nextPosition() is called for the first time
 }
示例#38
0
		private int CheckValidFormat(IndexInput in_Renamed)
		{
			int format = in_Renamed.ReadInt();
			if (format > FORMAT_CURRENT)
			{
				throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FORMAT_CURRENT + " or less");
			}
			return format;
		}
示例#39
0
 /// <summary>Read as a bit set </summary>
 private void  ReadBits(IndexInput input)
 {
     count = input.ReadInt();             // read count
     bits  = new byte[(size >> 3) + 1];   // allocate bits
     input.ReadBytes(bits, 0, bits.Length);
 }
示例#40
0
		private void  OpenNorms(Directory cfsDir, int readBufferSize)
		{
			long nextNormSeek = SegmentMerger.NORMS_HEADER.Length; //skip header (header unused for now)
			int maxDoc = MaxDoc();
			for (int i = 0; i < core.fieldInfos.Size(); i++)
			{
				FieldInfo fi = core.fieldInfos.FieldInfo(i);
				if (norms.Contains(fi.name))
				{
					// in case this SegmentReader is being re-opened, we might be able to
					// reuse some norm instances and skip loading them here
					continue;
				}
				if (fi.isIndexed && !fi.omitNorms)
				{
					Directory d = Directory();
					System.String fileName = si.GetNormFileName(fi.number);
					if (!si.HasSeparateNorms(fi.number))
					{
						d = cfsDir;
					}
					
					// singleNormFile means multiple norms share this file
					bool singleNormFile = fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION);
					IndexInput normInput = null;
					long normSeek;
					
					if (singleNormFile)
					{
						normSeek = nextNormSeek;
						if (singleNormStream == null)
						{
							singleNormStream = d.OpenInput(fileName, readBufferSize);
							singleNormRef = new Ref();
						}
						else
						{
							singleNormRef.IncRef();
						}
						// All norms in the .nrm file can share a single IndexInput since
						// they are only used in a synchronized context.
						// If this were to change in the future, a clone could be done here.
						normInput = singleNormStream;
					}
					else
					{
						normSeek = 0;
						normInput = d.OpenInput(fileName);
					}
					
					norms[fi.name] = new Norm(this, normInput, fi.number, normSeek);
					nextNormSeek += maxDoc; // increment also if some norms are separate
				}
			}
		}
示例#41
0
			internal SkipBuffer(IndexInput input, int length)
			{
				data = new byte[length];
				pointer = input.GetFilePointer();
				input.ReadBytes(data, 0, length);
			}
示例#42
0
			public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number, long normSeek)
			{
				InitBlock(enclosingInstance);
				this.in_Renamed = in_Renamed;
				this.number = number;
				this.normSeek = normSeek;
			}
示例#43
0
 internal CSIndexInput(IndexInput base_Renamed, long fileOffset, long length) : this(base_Renamed, fileOffset, length, BufferedIndexInput.BUFFER_SIZE)
 {
 }
示例#44
0
			private void  CloseInput()
			{
				if (in_Renamed != null)
				{
					if (in_Renamed != Enclosing_Instance.singleNormStream)
					{
						// It's private to us -- just close it
						in_Renamed.Close();
					}
					else
					{
						// We are sharing this with others -- decRef and
						// maybe close the shared norm stream
						if (Enclosing_Instance.singleNormRef.DecRef() == 0)
						{
							Enclosing_Instance.singleNormStream.Close();
							Enclosing_Instance.singleNormStream = null;
						}
					}
					
					in_Renamed = null;
				}
			}
示例#45
0
			internal CSIndexInput(IndexInput base_Renamed, long fileOffset, long length):this(base_Renamed, fileOffset, length, BufferedIndexInput.BUFFER_SIZE)
			{
			}
示例#46
0
		/// <summary>Bulk write a contiguous series of documents.  The
		/// lengths array is the length (in bytes) of each raw
		/// document.  The stream IndexInput is the
		/// fieldsStream from which we should bulk-copy all
		/// bytes. 
		/// </summary>
		internal void  AddRawDocuments(IndexInput stream, int[] lengths, int numDocs)
		{
			long position = fieldsStream.GetFilePointer();
			long start = position;
			for (int i = 0; i < numDocs; i++)
			{
				indexStream.WriteLong(position);
				position += lengths[i];
			}
			fieldsStream.CopyBytes(stream, position - start);
			System.Diagnostics.Debug.Assert(fieldsStream.GetFilePointer() == position);
		}
示例#47
0
		internal DefaultSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval):base(skipStream, maxSkipLevels, skipInterval)
		{
			freqPointer = new long[maxSkipLevels];
			proxPointer = new long[maxSkipLevels];
			payloadLength = new int[maxSkipLevels];
		}
示例#48
0
        /// <summary>Returns a {@link Status} instance detailing
        /// the state of the index.
        ///
        /// </summary>
        /// <param name="onlySegments">list of specific segment names to check
        ///
        /// <p/>As this method checks every byte in the specified
        /// segments, on a large index it can take quite a long
        /// time to run.
        ///
        /// <p/><b>WARNING</b>: make sure
        /// you only call this when the index is not opened by any
        /// writer.
        /// </param>
        public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis    = new SegmentInfos();
            Status       result = new Status();

            result.dir = dir;
            try
            {
                sis.Read(dir);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read any segments file in directory");
                result.missingSegments = true;
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                return(result);
            }

            int numSegments = sis.Count;

            System.String segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput    input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.cantOpenSegments = true;
                return(result);
            }
            int format = 0;

            try
            {
                format = input.ReadInt();
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.missingSegmentVersion = true;
                return(result);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else
            {
                if (format == SegmentInfos.FORMAT_CHECKSUM)
                {
                    sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_DEL_COUNT)
                {
                    sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_HAS_PROX)
                {
                    sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_USER_DATA)
                {
                    sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
                }
                else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
                }
                else if (format < SegmentInfos.CURRENT_FORMAT)
                {
                    sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                    skip    = true;
                }
                else
                {
                    sFormat = format + " [Lucene 1.3 or prior]";
                }
            }

            result.segmentsFileName = segmentsFileName;
            result.numSegments      = numSegments;
            result.segmentFormat    = sFormat;
            result.userData         = sis.GetUserData();
            System.String userDataString;
            if (sis.GetUserData().Count > 0)
            {
                userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData());
            }
            else
            {
                userDataString = "";
            }

            Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                }
                System.Collections.IEnumerator it = onlySegments.GetEnumerator();
                while (it.MoveNext())
                {
                    if (infoStream != null)
                    {
                        infoStream.Write(" " + it.Current);
                    }
                }
                System.Collections.IEnumerator e = onlySegments.GetEnumerator();
                while (e.MoveNext() == true)
                {
                    result.segmentsChecked.Add(e.Current);
                }
                Msg(":");
            }

            if (skip)
            {
                Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.toolOutOfDate = true;
                return(result);
            }


            result.newSegments = (SegmentInfos)sis.Clone();
            result.newSegments.Clear();

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                if (onlySegments != null && !onlySegments.Contains(info.name))
                {
                    continue;
                }
                Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
                result.segmentInfos.Add(segInfoStat);
                Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                segInfoStat.name     = info.name;
                segInfoStat.docCount = info.docCount;

                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    Msg("    compound=" + info.GetUseCompoundFile());
                    segInfoStat.compound = info.GetUseCompoundFile();
                    Msg("    hasProx=" + info.GetHasProx());
                    segInfoStat.hasProx = info.GetHasProx();
                    Msg("    numFiles=" + info.Files().Count);
                    segInfoStat.numFiles = info.Files().Count;
                    Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics();
                    segInfoStat.diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg("    diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics));
                    }

                    int docStoreOffset = info.GetDocStoreOffset();
                    if (docStoreOffset != -1)
                    {
                        Msg("    docStoreOffset=" + docStoreOffset);
                        segInfoStat.docStoreOffset = docStoreOffset;
                        Msg("    docStoreSegment=" + info.GetDocStoreSegment());
                        segInfoStat.docStoreSegment = info.GetDocStoreSegment();
                        Msg("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
                        segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        Msg("    no deletions");
                        segInfoStat.hasDeletions = false;
                    }
                    else
                    {
                        Msg("    has deletions [delFileName=" + delFileName + "]");
                        segInfoStat.hasDeletions      = true;
                        segInfoStat.deletionsFileName = delFileName;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = SegmentReader.Get(info);

                    segInfoStat.openReaderPassed = true;

                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions())
                    {
                        if (reader.deletedDocs.Count() != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (reader.deletedDocs.Count() > reader.MaxDoc())
                        {
                            throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (info.docCount - numDocs != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        segInfoStat.numDeleted = info.docCount - numDocs;
                        Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.GetDelCount() != 0)
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        Msg("OK");
                    }
                    if (reader.MaxDoc() != info.docCount)
                    {
                        throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount);
                    }

                    // Test getFieldNames()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    Msg("OK [" + fieldNames.Count + " fields]");
                    segInfoStat.numFields = fieldNames.Count;

                    // Test Field Norms
                    segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);

                    // Test the Term Index
                    segInfoStat.termIndexStatus = TestTermIndex(info, reader);

                    // Test Stored Fields
                    segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);

                    // Test Term Vectors
                    segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);

                    // Rethrow the first exception we encountered
                    //  This will cause stats for failed segments to be incremented properly
                    if (segInfoStat.fieldNormStatus.error != null)
                    {
                        throw new System.SystemException("Field Norm test failed");
                    }
                    else if (segInfoStat.termIndexStatus.error != null)
                    {
                        throw new System.SystemException("Term Index test failed");
                    }
                    else if (segInfoStat.storedFieldStatus.error != null)
                    {
                        throw new System.SystemException("Stored Field test failed");
                    }
                    else if (segInfoStat.termVectorStatus.error != null)
                    {
                        throw new System.SystemException("Term Vector test failed");
                    }

                    Msg("");
                }
                catch (System.Exception t)
                {
                    Msg("FAILED");
                    System.String comment;
                    comment = "fixIndex() would remove reference to this segment";
                    Msg("    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        infoStream.WriteLine(t.StackTrace);
                    }
                    Msg("");
                    result.totLoseDocCount += toLoseDocCount;
                    result.numBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                result.newSegments.Add(info.Clone());
            }

            if (0 == result.numBadSegments)
            {
                result.clean = true;
                Msg("No problems were detected with this index.\n");
            }
            else
            {
                Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
            }

            return(result);
        }
示例#49
0
		/// <summary> Subclasses must implement the actual skip data encoding in this method.
		/// 
		/// </summary>
		/// <param name="level">the level skip data shall be read from
		/// </param>
		/// <param name="skipStream">the skip stream to read from
		/// </param>
		protected internal abstract int ReadSkipData(int level, IndexInput skipStream);
示例#50
0
		internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
		{
			bool success = false;
			
			try
			{
                if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
                {
                    tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
                    format = CheckValidFormat(tvx);
                    tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
                    int tvdFormat = CheckValidFormat(tvd);
                    tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
                    int tvfFormat = CheckValidFormat(tvf);

                    System.Diagnostics.Debug.Assert(format == tvdFormat);
                    System.Diagnostics.Debug.Assert(format == tvfFormat);

                    if (format >= FORMAT_VERSION2)
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 4);
                    }
                    else
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 3);
                    }

                    if (-1 == docStoreOffset)
                    {
                        this.docStoreOffset = 0;
                        this.size = numTotalDocs;
                        System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size);
                    }
                    else
                    {
                        this.docStoreOffset = docStoreOffset;
                        this.size = size;
                        // Verify the file is long enough to hold all of our
                        // docs
                        System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset);
                    }
                }
                else
                {
                    // If all documents flushed in a segment had hit
                    // non-aborting exceptions, it's possible that
                    // FieldInfos.hasVectors returns true yet the term
                    // vector files don't exist.
                    format = 0;
                }

				
				this.fieldInfos = fieldInfos;
				success = true;
			}
			finally
			{
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above. In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					Close();
				}
			}
		}
示例#51
0
		protected internal override int ReadSkipData(int level, IndexInput skipStream)
		{
			int delta;
			if (currentFieldStoresPayloads)
			{
				// the current field stores payloads.
				// if the doc delta is odd then we have
				// to read the current payload length
				// because it differs from the length of the
				// previous payload
				delta = skipStream.ReadVInt();
				if ((delta & 1) != 0)
				{
					payloadLength[level] = skipStream.ReadVInt();
				}
				delta = SupportClass.Number.URShift(delta, 1);
			}
			else
			{
				delta = skipStream.ReadVInt();
			}
			freqPointer[level] += skipStream.ReadVInt();
			proxPointer[level] += skipStream.ReadVInt();
			
			return delta;
		}
示例#52
0
        /// <summary> Construct a new SegmentInfo instance by reading a
        /// previously saved SegmentInfo from input.
        ///
        /// </summary>
        /// <param name="dir">directory to load from
        /// </param>
        /// <param name="format">format of the segments info file
        /// </param>
        /// <param name="input">input handle to read segment info from
        /// </param>
        internal SegmentInfo(Directory dir, int format, IndexInput input)
        {
            this.dir = dir;
            name     = input.ReadString();
            docCount = input.ReadInt();
            if (format <= SegmentInfos.FORMAT_LOCKLESS)
            {
                delGen = input.ReadLong();
                if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
                {
                    docStoreOffset = input.ReadInt();
                    if (docStoreOffset != -1)
                    {
                        docStoreSegment        = input.ReadString();
                        docStoreIsCompoundFile = (1 == input.ReadByte());
                    }
                    else
                    {
                        docStoreSegment        = name;
                        docStoreIsCompoundFile = false;
                    }
                }
                else
                {
                    docStoreOffset         = -1;
                    docStoreSegment        = name;
                    docStoreIsCompoundFile = false;
                }
                if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
                {
                    hasSingleNormFile = (1 == input.ReadByte());
                }
                else
                {
                    hasSingleNormFile = false;
                }
                int numNormGen = input.ReadInt();
                if (numNormGen == NO)
                {
                    normGen = null;
                }
                else
                {
                    normGen = new long[numNormGen];
                    for (int j = 0; j < numNormGen; j++)
                    {
                        normGen[j] = input.ReadLong();
                    }
                }
                isCompoundFile = (sbyte)input.ReadByte();
                preLockless    = (isCompoundFile == CHECK_DIR);
                if (format <= SegmentInfos.FORMAT_DEL_COUNT)
                {
                    delCount = input.ReadInt();
                    System.Diagnostics.Debug.Assert(delCount <= docCount);
                }
                else
                {
                    delCount = -1;
                }
                if (format <= SegmentInfos.FORMAT_HAS_PROX)
                {
                    hasProx = input.ReadByte() == 1;
                }
                else
                {
                    hasProx = true;
                }

                if (format <= SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    diagnostics = input.ReadStringStringMap();
                }
                else
                {
                    diagnostics = new System.Collections.Generic.Dictionary <string, string>();
                }
            }
            else
            {
                delGen                 = CHECK_DIR;
                normGen                = null;
                isCompoundFile         = (sbyte)(CHECK_DIR);
                preLockless            = true;
                hasSingleNormFile      = false;
                docStoreOffset         = -1;
                docStoreIsCompoundFile = false;
                docStoreSegment        = null;
                delCount               = -1;
                hasProx                = true;
                diagnostics            = new System.Collections.Generic.Dictionary <string, string>();
            }
        }
示例#53
0
		// It is not always neccessary to move the prox pointer
		// to a new document after the freq pointer has been moved.
		// Consider for example a phrase query with two terms:
		// the freq pointer for term 1 has to move to document x
		// to answer the question if the term occurs in that document. But
		// only if term 2 also matches document x, the positions have to be
		// read to figure out if term 1 and term 2 appear next
		// to each other in document x and thus satisfy the query.
		// So we move the prox pointer lazily to the document
		// as soon as positions are requested.
		private void  LazySkip()
		{
			if (proxStream == null)
			{
				// clone lazily
				proxStream = (IndexInput) parent.core.proxStream.Clone();
			}
			
			// we might have to skip the current payload
			// if it was not read yet
			SkipPayload();
			
			if (lazySkipPointer != - 1)
			{
				proxStream.Seek(lazySkipPointer);
				lazySkipPointer = - 1;
			}
			
			if (lazySkipProxCount != 0)
			{
				SkipPositions(lazySkipProxCount);
				lazySkipProxCount = 0;
			}
		}
示例#54
0
		/// <summary>Read as a bit set </summary>
		private void  ReadBits(IndexInput input)
		{
			count = input.ReadInt(); // read count
			bits = new byte[(size >> 3) + 1]; // allocate bits
			input.ReadBytes(bits, 0, bits.Length);
		}
示例#55
0
		private void  Read(IndexInput input, System.String fileName)
		{
			int firstInt = input.ReadVInt();
			
			if (firstInt < 0)
			{
				// This is a real format
				format = firstInt;
			}
			else
			{
				format = FORMAT_PRE;
			}
			
			if (format != FORMAT_PRE & format != FORMAT_START)
			{
				throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
			}
			
			int size;
			if (format == FORMAT_PRE)
			{
				size = firstInt;
			}
			else
			{
				size = input.ReadVInt(); //read in the size
			}
			
			for (int i = 0; i < size; i++)
			{
				System.String name = StringHelper.Intern(input.ReadString());
				byte bits = input.ReadByte();
				bool isIndexed = (bits & IS_INDEXED) != 0;
				bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
				bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
				bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
				bool omitNorms = (bits & OMIT_NORMS) != 0;
				bool storePayloads = (bits & STORE_PAYLOADS) != 0;
				bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
				
				AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
			}
			
			if (input.GetFilePointer() != input.Length())
			{
				throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.GetFilePointer() + " vs size " + input.Length());
			}
		}
示例#56
0
		internal SegmentTermPositions(SegmentReader p):base(p)
		{
			this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time
		}