/// <summary>Read norms into a pre-allocated array. </summary>
        public override void  Norms(System.String field, byte[] bytes, int offset)
        {
            lock (this)
            {
                Norm norm = (Norm)norms[field];
                if (norm == null)
                {
                    return; // use zeros in array
                }
                if (norm.bytes != null)
                {
                    // can copy from cache
                    Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
                    return;
                }

                InputStream normStream = (InputStream)norm.in_Renamed.Clone();
                try
                {
                    // read from disk
                    normStream.Seek(0);
                    normStream.ReadBytes(bytes, offset, MaxDoc());
                }
                finally
                {
                    normStream.Close();
                }
            }
        }
		public /*internal*/ SegmentTermDocs(SegmentReader parent)
		{
			this.parent = parent;
			this.freqStream = (InputStream) parent.freqStream.Clone();
			this.deletedDocs = parent.deletedDocs;
			this.skipInterval = parent.tis.GetSkipInterval();
		}
 public /*internal*/ SegmentTermDocs(SegmentReader parent)
 {
     this.parent       = parent;
     this.freqStream   = (InputStream)parent.freqStream.Clone();
     this.deletedDocs  = parent.deletedDocs;
     this.skipInterval = parent.tis.GetSkipInterval();
 }
Exemple #4
0
		private void  CheckValidFormat(InputStream in_Renamed)
		{
			int format = in_Renamed.ReadInt();
			if (format > TermVectorsWriter.FORMAT_VERSION)
			{
				throw new System.IO.IOException("Incompatible format version: " + format + " expected " + TermVectorsWriter.FORMAT_VERSION + " or less");
			}
		}
Exemple #5
0
        public /*internal*/ FieldsReader(Directory d, System.String segment, FieldInfos fn)
        {
            fieldInfos = fn;

            fieldsStream = d.OpenFile(segment + ".fdt");
            indexStream  = d.OpenFile(segment + ".fdx");

            size = (int)(indexStream.Length() / 8);
        }
		public /*internal*/ FieldsReader(Directory d, System.String segment, FieldInfos fn)
		{
			fieldInfos = fn;
			
			fieldsStream = d.OpenFile(segment + ".fdt");
			indexStream = d.OpenFile(segment + ".fdx");
			
			size = (int) (indexStream.Length() / 8);
		}
Exemple #7
0
        public CompoundFileReader(Directory dir, System.String name)
        {
            directory = dir;
            fileName  = name;

            bool success = false;

            try
            {
                stream = dir.OpenFile(name);

                // read the directory and init files
                int       count = stream.ReadVInt();
                FileEntry entry = null;
                for (int i = 0; i < count; i++)
                {
                    long          offset = stream.ReadLong();
                    System.String id     = stream.ReadString();

                    if (entry != null)
                    {
                        // set length of the previous entry
                        entry.length = offset - entry.offset;
                    }

                    entry        = new FileEntry();
                    entry.offset = offset;
                    entries[id]  = entry;
                }

                // set the length of the final entry
                if (entry != null)
                {
                    entry.length = stream.Length() - entry.offset;
                }

                success = true;
            }
            finally
            {
                if (!success && (stream != null))
                {
                    try
                    {
                        stream.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                    }
                }
            }
        }
		public CompoundFileReader(Directory dir, System.String name)
		{
			directory = dir;
			fileName = name;
			
			bool success = false;
			
			try
			{
				stream = dir.OpenFile(name);
				
				// read the directory and init files
				int count = stream.ReadVInt();
				FileEntry entry = null;
				for (int i = 0; i < count; i++)
				{
					long offset = stream.ReadLong();
					System.String id = stream.ReadString();
					
					if (entry != null)
					{
						// set length of the previous entry
						entry.length = offset - entry.offset;
					}
					
					entry = new FileEntry();
					entry.offset = offset;
					entries[id] = entry;
				}
				
				// set the length of the final entry
				if (entry != null)
				{
					entry.length = stream.Length() - entry.offset;
				}
				
				success = true;
			}
			finally
			{
				if (!success && (stream != null))
				{
					try
					{
						stream.Close();
					}
					catch (System.IO.IOException e)
					{
					}
				}
			}
		}
Exemple #9
0
        /// <summary> Construct a FieldInfos object using the directory and the name of the file
        /// InputStream
        /// </summary>
        /// <param name="d">The directory to open the InputStream from
        /// </param>
        /// <param name="name">The name of the file to open the InputStream from in the Directory
        /// </param>
        /// <throws>  IOException </throws>
        /// <summary>
        /// </summary>
        /// <seealso cref="#read">
        /// </seealso>
        public /*internal*/ FieldInfos(Directory d, System.String name)
        {
            InputStream input = d.OpenFile(name);

            try
            {
                Read(input);
            }
            finally
            {
                input.Close();
            }
        }
Exemple #10
0
        private void  Read(InputStream input)
        {
            int size = input.ReadVInt(); //read in the size

            for (int i = 0; i < size; i++)
            {
                System.String name            = String.Intern(input.ReadString());
                byte          bits            = input.ReadByte();
                bool          isIndexed       = (bits & 0x1) != 0;
                bool          storeTermVector = (bits & 0x2) != 0;
                AddInternal(name, isIndexed, storeTermVector);
            }
        }
        public void  Read(Directory directory)
        {
            InputStream input = directory.OpenFile("segments");

            try
            {
                int format = input.ReadInt();
                if (format < 0)
                {
                    // file contains explicit format info
                    // check that it is a format we can understand
                    if (format < FORMAT)
                    {
                        throw new System.IO.IOException("Unknown format version: " + format);
                    }
                    version = input.ReadLong(); // read version
                    counter = input.ReadInt();  // read counter
                }
                else
                {
                    // file is in old format without explicit format info
                    counter = format;
                }

                for (int i = input.ReadInt(); i > 0; i--)
                {
                    // read segmentInfos
                    SegmentInfo si = new SegmentInfo(input.ReadString(), input.ReadInt(), directory);
                    Add(si);
                }

                if (format >= 0)
                {
                    // in old format the version number may be at the end of the file
                    if (input.GetFilePointer() >= input.Length())
                    {
                        version = 0;
                    }
                    // old file format without version number
                    else
                    {
                        version = input.ReadLong(); // read version
                    }
                }
            }
            finally
            {
                input.Close();
            }
        }
Exemple #12
0
        public override void  Close()
        {
            lock (this)
            {
                if (stream == null)
                {
                    throw new System.IO.IOException("Already closed");
                }

                entries.Clear();
                stream.Close();
                stream = null;
            }
        }
Exemple #13
0
        internal SegmentTermEnum(InputStream i, FieldInfos fis, bool isi)
        {
            input      = i;
            fieldInfos = fis;
            isIndex    = isi;

            int firstInt = input.ReadInt();

            if (firstInt >= 0)
            {
                // original-format file, without explicit format version number
                format = 0;
                size   = firstInt;

                // back-compatible settings
                indexInterval = 128;
                skipInterval  = System.Int32.MaxValue;                // switch off skipTo optimization
            }
            else
            {
                // we have a format version number
                format = firstInt;

                // check that it is a format we can understand
                if (format < TermInfosWriter.FORMAT)
                {
                    throw new System.IO.IOException("Unknown format version:" + format);
                }

                size = input.ReadLong();                 // read the size

                if (format == -1)
                {
                    if (!isIndex)
                    {
                        indexInterval        = input.ReadInt();
                        formatM1SkipInterval = input.ReadInt();
                    }
                    // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
                    // skipTo implementation of these versions
                    skipInterval = System.Int32.MaxValue;
                }
                else
                {
                    indexInterval = input.ReadInt();
                    skipInterval  = input.ReadInt();
                }
            }
        }
		public /*internal*/ TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos)
		{
			if (d.FileExists(segment + TermVectorsWriter.TVX_EXTENSION))
			{
				tvx = d.OpenFile(segment + TermVectorsWriter.TVX_EXTENSION);
				CheckValidFormat(tvx);
				tvd = d.OpenFile(segment + TermVectorsWriter.TVD_EXTENSION);
				CheckValidFormat(tvd);
				tvf = d.OpenFile(segment + TermVectorsWriter.TVF_EXTENSION);
				CheckValidFormat(tvf);
				size = (int) tvx.Length() / 8;
			}
			
			this.fieldInfos = fieldInfos;
		}
Exemple #15
0
        public /*internal*/ TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos)
        {
            if (d.FileExists(segment + TermVectorsWriter.TVX_EXTENSION))
            {
                tvx = d.OpenFile(segment + TermVectorsWriter.TVX_EXTENSION);
                CheckValidFormat(tvx);
                tvd = d.OpenFile(segment + TermVectorsWriter.TVD_EXTENSION);
                CheckValidFormat(tvd);
                tvf = d.OpenFile(segment + TermVectorsWriter.TVF_EXTENSION);
                CheckValidFormat(tvf);
                size = (int)tvx.Length() / 8;
            }

            this.fieldInfos = fieldInfos;
        }
        /// <summary>Constructs a bit vector from the file <code>name</code> in Directory
        /// <code>d</code>, as written by the {@link #write} method.
        /// </summary>
        public BitVector(Directory d, System.String name)
        {
            InputStream input = d.OpenFile(name);

            try
            {
                size  = input.ReadInt();               // read size
                count = input.ReadInt();               // read count
                bits  = new byte[(size >> 3) + 1];     // allocate bits
                input.ReadBytes(bits, 0, bits.Length); // read bits
            }
            finally
            {
                input.Close();
            }
        }
		internal SegmentTermEnum(InputStream i, FieldInfos fis, bool isi)
		{
			input = i;
			fieldInfos = fis;
			isIndex = isi;
			
			int firstInt = input.ReadInt();
			if (firstInt >= 0)
			{
				// original-format file, without explicit format version number
				format = 0;
				size = firstInt;
				
				// back-compatible settings
				indexInterval = 128;
				skipInterval = System.Int32.MaxValue; // switch off skipTo optimization
			}
			else
			{
				// we have a format version number
				format = firstInt;
				
				// check that it is a format we can understand
				if (format < TermInfosWriter.FORMAT)
					throw new System.IO.IOException("Unknown format version:" + format);
				
				size = input.ReadLong(); // read the size
				
				if (format == - 1)
				{
					if (!isIndex)
					{
						indexInterval = input.ReadInt();
						formatM1SkipInterval = input.ReadInt();
					}
					// switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in 
					// skipTo implementation of these versions
					skipInterval = System.Int32.MaxValue;
				}
				else
				{
					indexInterval = input.ReadInt();
					skipInterval = input.ReadInt();
				}
			}
		}
Exemple #18
0
        /// <summary>Copy the contents of the file with specified extension into the
        /// provided output stream. Use the provided buffer for moving data
        /// to reduce memory allocation.
        /// </summary>
        private void  CopyFile(FileEntry source, OutputStream os, byte[] buffer)
        {
            InputStream is_Renamed = null;

            try
            {
                long startPtr = os.GetFilePointer();

                is_Renamed = directory.OpenFile(source.file);
                long length    = is_Renamed.Length();
                long remainder = length;
                int  chunk     = buffer.Length;

                while (remainder > 0)
                {
                    int len = (int)System.Math.Min(chunk, remainder);
                    is_Renamed.ReadBytes(buffer, 0, len);
                    os.WriteBytes(buffer, len);
                    remainder -= len;
                }

                // Verify that remainder is 0
                if (remainder != 0)
                {
                    throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
                }

                // Verify that the output length diff is equal to original file
                long endPtr = os.GetFilePointer();
                long diff   = endPtr - startPtr;
                if (diff != length)
                {
                    throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
                }
            }
            finally
            {
                if (is_Renamed != null)
                {
                    is_Renamed.Close();
                }
            }
        }
        private void  Initialize(SegmentInfo si)
        {
            segment = si.name;

            // Use compound file directory for some files, if it exists
            Directory cfsDir = Directory();

            if (Directory().FileExists(segment + ".cfs"))
            {
                cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
                cfsDir    = cfsReader;
            }

            // No compound file exists - use the multi-file format
            fieldInfos   = new FieldInfos(cfsDir, segment + ".fnm");
            fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);

            tis = new TermInfosReader(cfsDir, segment, fieldInfos);

            // NOTE: the bitvector is stored using the regular directory, not cfs
            if (HasDeletions(si))
            {
                deletedDocs = new BitVector(Directory(), segment + ".del");
            }

            // make sure that all index files have been read or are kept open
            // so that if an index update removes them we'll still have them
            freqStream = cfsDir.OpenFile(segment + ".frq");
            proxStream = cfsDir.OpenFile(segment + ".prx");
            OpenNorms(cfsDir);

            if (fieldInfos.HasVectors())
            {
                // open term vector files only as needed
                termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
            }
        }
        /// <summary> Current version number from segments file.</summary>
        public static long ReadCurrentVersion(Directory directory)
        {
            InputStream input   = directory.OpenFile("segments");
            int         format  = 0;
            long        version = 0;

            try
            {
                format = input.ReadInt();
                if (format < 0)
                {
                    if (format < FORMAT)
                    {
                        throw new System.IO.IOException("Unknown format version: " + format);
                    }
                    version = input.ReadLong(); // read version
                }
            }
            finally
            {
                input.Close();
            }

            if (format < 0)
            {
                return(version);
            }

            // We cannot be sure about the format of the file.
            // Therefore we have to read the whole file and cannot simply seek to the version entry.

            SegmentInfos sis = new SegmentInfos();

            sis.Read(directory);
            return(sis.GetVersion());
        }
        private System.Collections.ArrayList ReadDeleteableFiles()
        {
            System.Collections.ArrayList result = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
            if (!directory.FileExists("deletable"))
            {
                return(result);
            }

            InputStream input = directory.OpenFile("deletable");

            try
            {
                for (int i = input.ReadInt(); i > 0; i--)
                {
                    // read file names
                    result.Add(input.ReadString());
                }
            }
            finally
            {
                input.Close();
            }
            return(result);
        }
Exemple #22
0
 internal CSInputStream(InputStream base_Renamed, long fileOffset, long length)
 {
     this.base_Renamed = base_Renamed;
     this.fileOffset   = fileOffset;
     this.length       = length; // variable in the superclass
 }
Exemple #23
0
		private void  Read(InputStream input)
		{
			int size = input.ReadVInt(); //read in the size
			for (int i = 0; i < size; i++)
			{
				System.String name = String.Intern(input.ReadString());
				byte bits = input.ReadByte();
				bool isIndexed = (bits & 0x1) != 0;
				bool storeTermVector = (bits & 0x2) != 0;
				AddInternal(name, isIndexed, storeTermVector);
			}
		}
		private void  Initialize(SegmentInfo si)
		{
			segment = si.name;
			
			// Use compound file directory for some files, if it exists
			Directory cfsDir = Directory();
			if (Directory().FileExists(segment + ".cfs"))
			{
				cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
				cfsDir = cfsReader;
			}
			
			// No compound file exists - use the multi-file format
			fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
			fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
			
			tis = new TermInfosReader(cfsDir, segment, fieldInfos);
			
			// NOTE: the bitvector is stored using the regular directory, not cfs
			if (HasDeletions(si))
				deletedDocs = new BitVector(Directory(), segment + ".del");
			
			// make sure that all index files have been read or are kept open
			// so that if an index update removes them we'll still have them
			freqStream = cfsDir.OpenFile(segment + ".frq");
			proxStream = cfsDir.OpenFile(segment + ".prx");
			OpenNorms(cfsDir);
			
			if (fieldInfos.HasVectors())
			{
				// open term vector files only as needed
				termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
			}
		}
			public Norm(SegmentReader enclosingInstance, InputStream in_Renamed, int number)
			{
				InitBlock(enclosingInstance);
				this.in_Renamed = in_Renamed;
				this.number = number;
			}
Exemple #26
0
 internal SegmentTermPositions(SegmentReader p) : base(p)
 {
     this.proxStream = (InputStream)parent.proxStream.Clone();
 }
        /// <summary>Optimized implementation. </summary>
        public virtual bool SkipTo(int target)
        {
            if (df >= skipInterval)
            {
                // optimized case

                if (skipStream == null)
                {
                    skipStream = (InputStream)freqStream.Clone(); // lazily clone
                }
                if (!haveSkipped)
                {
                    // lazily seek skip stream
                    skipStream.Seek(skipPointer);
                    haveSkipped = true;
                }

                // scan skip data
                int  lastSkipDoc     = skipDoc;
                long lastFreqPointer = freqStream.GetFilePointer();
                long lastProxPointer = -1;
                int  numSkipped      = -1 - (count % skipInterval);

                while (target > skipDoc)
                {
                    lastSkipDoc     = skipDoc;
                    lastFreqPointer = freqPointer;
                    lastProxPointer = proxPointer;

                    if (skipDoc != 0 && skipDoc >= doc)
                    {
                        numSkipped += skipInterval;
                    }

                    if (skipCount >= numSkips)
                    {
                        break;
                    }

                    skipDoc     += skipStream.ReadVInt();
                    freqPointer += skipStream.ReadVInt();
                    proxPointer += skipStream.ReadVInt();

                    skipCount++;
                }

                // if we found something to skip, then skip it
                if (lastFreqPointer > freqStream.GetFilePointer())
                {
                    freqStream.Seek(lastFreqPointer);
                    SkipProx(lastProxPointer);

                    doc    = lastSkipDoc;
                    count += numSkipped;
                }
            }

            // done skipping, now just scan
            do
            {
                if (!Next())
                {
                    return(false);
                }
            }while (target > doc);
            return(true);
        }
		/// <summary>Optimized implementation. </summary>
		public virtual bool SkipTo(int target)
		{
			if (df >= skipInterval)
			{
				// optimized case
				
				if (skipStream == null)
					skipStream = (InputStream) freqStream.Clone(); // lazily clone
				
				if (!haveSkipped)
				{
					// lazily seek skip stream
					skipStream.Seek(skipPointer);
					haveSkipped = true;
				}
				
				// scan skip data
				int lastSkipDoc = skipDoc;
				long lastFreqPointer = freqStream.GetFilePointer();
				long lastProxPointer = - 1;
				int numSkipped = - 1 - (count % skipInterval);
				
				while (target > skipDoc)
				{
					lastSkipDoc = skipDoc;
					lastFreqPointer = freqPointer;
					lastProxPointer = proxPointer;
					
					if (skipDoc != 0 && skipDoc >= doc)
						numSkipped += skipInterval;
					
					if (skipCount >= numSkips)
						break;
					
					skipDoc += skipStream.ReadVInt();
					freqPointer += skipStream.ReadVInt();
					proxPointer += skipStream.ReadVInt();
					
					skipCount++;
				}
				
				// if we found something to skip, then skip it
				if (lastFreqPointer > freqStream.GetFilePointer())
				{
					freqStream.Seek(lastFreqPointer);
					SkipProx(lastProxPointer);
					
					doc = lastSkipDoc;
					count += numSkipped;
				}
			}
			
			// done skipping, now just scan
			do 
			{
				if (!Next())
					return false;
			}
			while (target > doc);
			return true;
		}
 public Norm(SegmentReader enclosingInstance, InputStream in_Renamed, int number)
 {
     InitBlock(enclosingInstance);
     this.in_Renamed = in_Renamed;
     this.number     = number;
 }
			internal CSInputStream(InputStream base_Renamed, long fileOffset, long length)
			{
				this.base_Renamed = base_Renamed;
				this.fileOffset = fileOffset;
				this.length = length; // variable in the superclass
			}
		public override void  Close()
		{
			lock (this)
			{
				if (stream == null)
					throw new System.IO.IOException("Already closed");
				
				entries.Clear();
				stream.Close();
				stream = null;
			}
		}
		internal SegmentTermPositions(SegmentReader p):base(p)
		{
			this.proxStream = (InputStream) parent.proxStream.Clone();
		}