Class for accessing a compound stream. this class implements a directory, but is limited to only read operations. Directory methods that would normally modify data throw an exception.

All files belonging to a segment have the same name with varying extensions. The extensions correspond to the different file formats used by the Codec. When using the Compound File format these files are collapsed into a single .cfs file (except for the LiveDocsFormat, with a corresponding .cfe file indexing its sub-files.

Files:

  • .cfs: An optional "virtual" file consisting of all the other index files for systems that frequently run out of file handles.
  • .cfe: The "virtual" compound file's entry table holding all entries in the corresponding .cfs file.

Description:

  • Compound (.cfs) --> Header, FileData FileCount
  • Compound Entry Table (.cfe) --> Header, FileCount, <FileName, DataOffset, DataLength> FileCount, Footer
  • Header --> CodecUtil#writeHeader CodecHeader
  • FileCount --> DataOutput#writeVInt VInt
  • DataOffset,DataLength --> DataOutput#writeLong UInt64
  • FileName --> DataOutput#writeString String
  • FileData --> raw file data
  • Footer --> CodecUtil#writeFooter CodecFooter

Notes:

  • FileCount indicates how many files are contained in this compound file. The entry table that follows has that many entries.
  • Each directory entry contains a long pointer to the start of this file's data section, the files length, and a String with that file's name.
@lucene.experimental
Inheritance: Lucene.Net.Store.BaseDirectory
        public virtual void TestCompoundFileAppendTwice()
        {
            Directory newDir = new NRTCachingDirectory(NewDirectory(), 2.0, 25.0);
            CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true);
            CreateSequenceFile(newDir, "d1", (sbyte)0, 15);
            IndexOutput @out = csw.CreateOutput("d.xyz", NewIOContext(Random()));
            @out.WriteInt(0);
            @out.Dispose();
            Assert.AreEqual(1, csw.ListAll().Length);
            Assert.AreEqual("d.xyz", csw.ListAll()[0]);

            csw.Dispose();

            CompoundFileDirectory cfr = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false);
            Assert.AreEqual(1, cfr.ListAll().Length);
            Assert.AreEqual("d.xyz", cfr.ListAll()[0]);
            cfr.Dispose();
            newDir.Dispose();
        }
示例#2
0
        public virtual void TestAddExternalFile()
        {
            CreateSequenceFile(Dir, "d1", (sbyte)0, 15);

            Directory newDir = NewDirectory();
            CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true);
            Dir.Copy(csw, "d1", "d1", NewIOContext(Random()));
            csw.Dispose();

            CompoundFileDirectory csr = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false);
            IndexInput expected = Dir.OpenInput("d1", NewIOContext(Random()));
            IndexInput actual = csr.OpenInput("d1", NewIOContext(Random()));
            AssertSameStreams("d1", expected, actual);
            AssertSameSeekBehavior("d1", expected, actual);
            expected.Dispose();
            actual.Dispose();
            csr.Dispose();

            newDir.Dispose();
        }
        public virtual void TestDoubleClose()
        {
            Directory newDir = NewDirectory();
            CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true);
            IndexOutput @out = csw.CreateOutput("d.xyz", NewIOContext(Random()));
            @out.WriteInt(0);
            @out.Dispose();

            csw.Dispose();
            // close a second time - must have no effect according to IDisposable
            csw.Dispose();

            csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false);
            IndexInput openInput = csw.OpenInput("d.xyz", NewIOContext(Random()));
            Assert.AreEqual(0, openInput.ReadInt());
            openInput.Dispose();
            csw.Dispose();
            // close a second time - must have no effect according to IDisposable
            csw.Dispose();

            newDir.Dispose();
        }
        public virtual void TestReadNestedCFP()
        {
            Directory newDir = NewDirectory();
            CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true);
            CompoundFileDirectory nested = new CompoundFileDirectory(newDir, "b.cfs", NewIOContext(Random()), true);
            IndexOutput @out = nested.CreateOutput("b.xyz", NewIOContext(Random()));
            IndexOutput out1 = nested.CreateOutput("b_1.xyz", NewIOContext(Random()));
            @out.WriteInt(0);
            out1.WriteInt(1);
            @out.Dispose();
            out1.Dispose();
            nested.Dispose();
            newDir.Copy(csw, "b.cfs", "b.cfs", NewIOContext(Random()));
            newDir.Copy(csw, "b.cfe", "b.cfe", NewIOContext(Random()));
            newDir.DeleteFile("b.cfs");
            newDir.DeleteFile("b.cfe");
            csw.Dispose();

            Assert.AreEqual(2, newDir.ListAll().Length);
            csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false);

            Assert.AreEqual(2, csw.ListAll().Length);
            nested = new CompoundFileDirectory(csw, "b.cfs", NewIOContext(Random()), false);

            Assert.AreEqual(2, nested.ListAll().Length);
            IndexInput openInput = nested.OpenInput("b.xyz", NewIOContext(Random()));
            Assert.AreEqual(0, openInput.ReadInt());
            openInput.Dispose();
            openInput = nested.OpenInput("b_1.xyz", NewIOContext(Random()));
            Assert.AreEqual(1, openInput.ReadInt());
            openInput.Dispose();
            nested.Dispose();
            csw.Dispose();
            newDir.Dispose();
        }
        public virtual void TestEmptyCFS()
        {
            Directory newDir = NewDirectory();
            CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true);
            csw.Dispose();

            CompoundFileDirectory csr = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false);
            Assert.AreEqual(0, csr.ListAll().Length);
            csr.Dispose();

            newDir.Dispose();
        }
        public virtual void TestAppend()
        {
            Directory newDir = NewDirectory();
            CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true);
            int size = 5 + Random().Next(128);
            for (int j = 0; j < 2; j++)
            {
                IndexOutput os = csw.CreateOutput("seg_" + j + "_foo.txt", NewIOContext(Random()));
                for (int i = 0; i < size; i++)
                {
                    os.WriteInt(i * j);
                }
                os.Dispose();
                string[] listAll = newDir.ListAll();
                Assert.AreEqual(1, listAll.Length);
                Assert.AreEqual("d.cfs", listAll[0]);
            }
            CreateSequenceFile(Dir, "d1", (sbyte)0, 15);
            Dir.Copy(csw, "d1", "d1", NewIOContext(Random()));
            string[] listAll_ = newDir.ListAll();
            Assert.AreEqual(1, listAll_.Length);
            Assert.AreEqual("d.cfs", listAll_[0]);
            csw.Dispose();
            CompoundFileDirectory csr = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false);
            for (int j = 0; j < 2; j++)
            {
                IndexInput openInput = csr.OpenInput("seg_" + j + "_foo.txt", NewIOContext(Random()));
                Assert.AreEqual(size * 4, openInput.Length());
                for (int i = 0; i < size; i++)
                {
                    Assert.AreEqual(i * j, openInput.ReadInt());
                }

                openInput.Dispose();
            }
            IndexInput expected = Dir.OpenInput("d1", NewIOContext(Random()));
            IndexInput actual = csr.OpenInput("d1", NewIOContext(Random()));
            AssertSameStreams("d1", expected, actual);
            AssertSameSeekBehavior("d1", expected, actual);
            expected.Dispose();
            actual.Dispose();
            csr.Dispose();
            newDir.Dispose();
        }
        public virtual void TestReadPastEOF()
        {
            SetUp_2();
            var cr = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random()), false);
            IndexInput @is = cr.OpenInput("f2", NewIOContext(Random()));
            @is.Seek(@is.Length() - 10);
            var b = new byte[100];
            @is.ReadBytes(b, 0, 10);

            try
            {
                @is.ReadByte();
                Assert.Fail("Single byte read past end of file");
            }
            catch (IOException e)
            {
                /* success */
                //System.out.println("SUCCESS: single byte read past end of file: " + e);
            }

            @is.Seek(@is.Length() - 10);
            try
            {
                @is.ReadBytes(b, 0, 50);
                Assert.Fail("Block read past end of file");
            }
            catch (IOException e)
            {
                /* success */
                //System.out.println("SUCCESS: block read past end of file: " + e);
            }

            @is.Dispose();
            cr.Dispose();
        }
        public virtual void TestFileNotFound()
        {
            SetUp_2();
            CompoundFileDirectory cr = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random()), false);

            // Open two files
            try
            {
                cr.OpenInput("bogus", NewIOContext(Random()));
                Assert.Fail("File not found");
            }
            catch (Exception e)
            {
                /* success */
                //System.out.println("SUCCESS: File Not Found: " + e);
            }

            cr.Dispose();
        }
        public static void Main(string[] args)
        {
            string filename = null;
            bool extract = false;
            string dirImpl = null;

            int j = 0;
            while (j < args.Length)
            {
                string arg = args[j];
                if ("-extract".Equals(arg))
                {
                    extract = true;
                }
                else if ("-dir-impl".Equals(arg))
                {
                    if (j == args.Length - 1)
                    {
                        Console.WriteLine("ERROR: missing value for -dir-impl option");
                        Environment.Exit(1);
                    }
                    j++;
                    dirImpl = args[j];
                }
                else if (filename == null)
                {
                    filename = arg;
                }
                j++;
            }

            if (filename == null)
            {
                Console.WriteLine("Usage: org.apache.lucene.index.CompoundFileExtractor [-extract] [-dir-impl X] <cfsfile>");
                return;
            }

            Store.Directory dir = null;
            CompoundFileDirectory cfr = null;
            IOContext context = IOContext.READ;

            try
            {
                FileInfo file = new FileInfo(filename);
                string dirname = file.DirectoryName;
                filename = file.Name;
                if (dirImpl == null)
                {
                    dir = FSDirectory.Open(new DirectoryInfo(dirname));
                }
                else
                {
                    dir = CommandLineUtil.NewFSDirectory(dirImpl, new DirectoryInfo(dirname));
                }

                cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false);

                string[] files = cfr.ListAll();
                ArrayUtil.TimSort(files); // sort the array of filename so that the output is more readable

                for (int i = 0; i < files.Length; ++i)
                {
                    long len = cfr.FileLength(files[i]);

                    if (extract)
                    {
                        Console.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory...");
                        using (IndexInput ii = cfr.OpenInput(files[i], context))
                        {

                            using (FileStream f = new FileStream(files[i], FileMode.Open, FileAccess.ReadWrite))
                            {

                                // read and write with a small buffer, which is more effective than reading byte by byte
                                byte[] buffer = new byte[1024];
                                int chunk = buffer.Length;
                                while (len > 0)
                                {
                                    int bufLen = (int)Math.Min(chunk, len);
                                    ii.ReadBytes(buffer, 0, bufLen);
                                    f.Write(buffer, 0, bufLen);
                                    len -= bufLen;
                                }

                            }
                        }
                    }
                    else
                    {
                        Console.WriteLine(files[i] + ": " + len + " bytes");
                    }
                }
            }
            catch (IOException ioe)
            {
                Console.WriteLine(ioe.ToString());
                Console.Write(ioe.StackTrace);
            }
            finally
            {
                try
                {
                    if (dir != null)
                    {
                        dir.Dispose();
                    }
                    if (cfr != null)
                    {
                        cfr.Dispose();
                    }
                }
                catch (IOException ioe)
                {
                    Console.WriteLine(ioe.ToString());
                    Console.Write(ioe.StackTrace);
                }
            }
        }
        public virtual void TestClonedStreamsClosing()
        {
            SetUp_2();
            CompoundFileDirectory cr = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random()), false);

            // basic clone
            IndexInput expected = Dir.OpenInput("f11", NewIOContext(Random()));

            // this test only works for FSIndexInput
            Assert.IsTrue(TestHelper.IsSimpleFSIndexInput(expected));
            Assert.IsTrue(TestHelper.IsSimpleFSIndexInputOpen(expected));

            IndexInput one = cr.OpenInput("f11", NewIOContext(Random()));

            IndexInput two = (IndexInput)one.Clone();

            AssertSameStreams("basic clone one", expected, one);
            expected.Seek(0);
            AssertSameStreams("basic clone two", expected, two);

            // Now close the first stream
            one.Dispose();

            // The following should really fail since we couldn't expect to
            // access a file once close has been called on it (regardless of
            // buffering and/or clone magic)
            expected.Seek(0);
            two.Seek(0);
            AssertSameStreams("basic clone two/2", expected, two);

            // Now close the compound reader
            cr.Dispose();

            // The following may also fail since the compound stream is closed
            expected.Seek(0);
            two.Seek(0);
            //assertSameStreams("basic clone two/3", expected, two);

            // Now close the second clone
            two.Dispose();
            expected.Seek(0);
            two.Seek(0);
            //assertSameStreams("basic clone two/4", expected, two);

            expected.Dispose();
        }
 /// <summary>
 /// Setup a larger compound file with a number of components, each of
 ///  which is a sequential file (so that we can easily tell that we are
 ///  reading in the right byte). The methods sets up 20 files - f0 to f19,
 ///  the size of each file is 1000 bytes.
 /// </summary>
 private void SetUp_2()
 {
     CompoundFileDirectory cw = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random()), true);
     for (int i = 0; i < 20; i++)
     {
         CreateSequenceFile(Dir, "f" + i, (sbyte)0, 2000);
         string fileName = "f" + i;
         Dir.Copy(cw, fileName, fileName, NewIOContext(Random()));
     }
     cw.Dispose();
 }
        public virtual void TestRandomFiles()
        {
            // Setup the test segment
            string segment = "test";
            int chunk = 1024; // internal buffer size used by the stream
            CreateRandomFile(Dir, segment + ".zero", 0);
            CreateRandomFile(Dir, segment + ".one", 1);
            CreateRandomFile(Dir, segment + ".ten", 10);
            CreateRandomFile(Dir, segment + ".hundred", 100);
            CreateRandomFile(Dir, segment + ".big1", chunk);
            CreateRandomFile(Dir, segment + ".big2", chunk - 1);
            CreateRandomFile(Dir, segment + ".big3", chunk + 1);
            CreateRandomFile(Dir, segment + ".big4", 3 * chunk);
            CreateRandomFile(Dir, segment + ".big5", 3 * chunk - 1);
            CreateRandomFile(Dir, segment + ".big6", 3 * chunk + 1);
            CreateRandomFile(Dir, segment + ".big7", 1000 * chunk);

            // Setup extraneous files
            CreateRandomFile(Dir, "onetwothree", 100);
            CreateRandomFile(Dir, segment + ".notIn", 50);
            CreateRandomFile(Dir, segment + ".notIn2", 51);

            // Now test
            CompoundFileDirectory csw = new CompoundFileDirectory(Dir, "test.cfs", NewIOContext(Random()), true);
            string[] data = new string[] { ".zero", ".one", ".ten", ".hundred", ".big1", ".big2", ".big3", ".big4", ".big5", ".big6", ".big7" };
            for (int i = 0; i < data.Length; i++)
            {
                string fileName = segment + data[i];
                Dir.Copy(csw, fileName, fileName, NewIOContext(Random()));
            }
            csw.Dispose();

            CompoundFileDirectory csr = new CompoundFileDirectory(Dir, "test.cfs", NewIOContext(Random()), false);
            for (int i = 0; i < data.Length; i++)
            {
                IndexInput check = Dir.OpenInput(segment + data[i], NewIOContext(Random()));
                IndexInput test = csr.OpenInput(segment + data[i], NewIOContext(Random()));
                AssertSameStreams(data[i], check, test);
                AssertSameSeekBehavior(data[i], check, test);
                test.Dispose();
                check.Dispose();
            }
            csr.Dispose();
        }
        public virtual void TestSingleFile()
        {
            int[] data = new int[] { 0, 1, 10, 100 };
            for (int i = 0; i < data.Length; i++)
            {
                string name = "t" + data[i];
                CreateSequenceFile(Dir, name, (sbyte)0, data[i]);
                CompoundFileDirectory csw = new CompoundFileDirectory(Dir, name + ".cfs", NewIOContext(Random()), true);
                Dir.Copy(csw, name, name, NewIOContext(Random()));
                csw.Dispose();

                CompoundFileDirectory csr = new CompoundFileDirectory(Dir, name + ".cfs", NewIOContext(Random()), false);
                IndexInput expected = Dir.OpenInput(name, NewIOContext(Random()));
                IndexInput actual = csr.OpenInput(name, NewIOContext(Random()));
                AssertSameStreams(name, expected, actual);
                AssertSameSeekBehavior(name, expected, actual);
                expected.Dispose();
                actual.Dispose();
                csr.Dispose();
            }
        }
 public IndexInputSlicerAnonymousInnerClassHelper(CompoundFileDirectory outerInstance, Lucene.Net.Store.CompoundFileDirectory.FileEntry entry)
     : base(outerInstance)
 {
     this.OuterInstance = outerInstance;
     this.Entry = entry;
 }
        public virtual void TestManySubFiles()
        {
            Directory d = NewFSDirectory(CreateTempDir("CFSManySubFiles"));
            int FILE_COUNT = AtLeast(500);

            for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++)
            {
                IndexOutput @out = d.CreateOutput("file." + fileIdx, NewIOContext(Random()));
                @out.WriteByte((byte)(sbyte)fileIdx);
                @out.Dispose();
            }

            CompoundFileDirectory cfd = new CompoundFileDirectory(d, "c.cfs", NewIOContext(Random()), true);
            for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++)
            {
                string fileName = "file." + fileIdx;
                d.Copy(cfd, fileName, fileName, NewIOContext(Random()));
            }
            cfd.Dispose();

            IndexInput[] ins = new IndexInput[FILE_COUNT];
            CompoundFileDirectory cfr = new CompoundFileDirectory(d, "c.cfs", NewIOContext(Random()), false);
            for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++)
            {
                ins[fileIdx] = cfr.OpenInput("file." + fileIdx, NewIOContext(Random()));
            }

            for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++)
            {
                Assert.AreEqual((byte)fileIdx, ins[fileIdx].ReadByte());
            }

            for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++)
            {
                ins[fileIdx].Dispose();
            }
            cfr.Dispose();
            d.Dispose();
        }
 // checks that we can open all files returned by listAll!
 private void CheckFiles(Directory dir)
 {
     foreach (string file in dir.ListAll())
     {
         if (file.EndsWith(IndexFileNames.COMPOUND_FILE_EXTENSION))
         {
             CompoundFileDirectory cfsDir = new CompoundFileDirectory(dir, file, NewIOContext(Random()), false);
             CheckFiles(cfsDir); // recurse into cfs
             cfsDir.Dispose();
         }
         IndexInput @in = null;
         bool success = false;
         try
         {
             @in = dir.OpenInput(file, NewIOContext(Random()));
             success = true;
         }
         finally
         {
             if (success)
             {
                 IOUtils.Close(@in);
             }
             else
             {
                 IOUtils.CloseWhileHandlingException(@in);
             }
         }
     }
 }
        public virtual void TestRandomAccessClones()
        {
            SetUp_2();
            CompoundFileDirectory cr = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random()), false);

            // Open two files
            IndexInput e1 = cr.OpenInput("f11", NewIOContext(Random()));
            IndexInput e2 = cr.OpenInput("f3", NewIOContext(Random()));

            IndexInput a1 = (IndexInput)e1.Clone();
            IndexInput a2 = (IndexInput)e2.Clone();

            // Seek the first pair
            e1.Seek(100);
            a1.Seek(100);
            Assert.AreEqual(100, e1.FilePointer);
            Assert.AreEqual(100, a1.FilePointer);
            byte be1 = e1.ReadByte();
            byte ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            // Now seek the second pair
            e2.Seek(1027);
            a2.Seek(1027);
            Assert.AreEqual(1027, e2.FilePointer);
            Assert.AreEqual(1027, a2.FilePointer);
            byte be2 = e2.ReadByte();
            byte ba2 = a2.ReadByte();
            Assert.AreEqual(be2, ba2);

            // Now make sure the first one didn't move
            Assert.AreEqual(101, e1.FilePointer);
            Assert.AreEqual(101, a1.FilePointer);
            be1 = e1.ReadByte();
            ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            // Now more the first one again, past the buffer length
            e1.Seek(1910);
            a1.Seek(1910);
            Assert.AreEqual(1910, e1.FilePointer);
            Assert.AreEqual(1910, a1.FilePointer);
            be1 = e1.ReadByte();
            ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            // Now make sure the second set didn't move
            Assert.AreEqual(1028, e2.FilePointer);
            Assert.AreEqual(1028, a2.FilePointer);
            be2 = e2.ReadByte();
            ba2 = a2.ReadByte();
            Assert.AreEqual(be2, ba2);

            // Move the second set back, again cross the buffer size
            e2.Seek(17);
            a2.Seek(17);
            Assert.AreEqual(17, e2.FilePointer);
            Assert.AreEqual(17, a2.FilePointer);
            be2 = e2.ReadByte();
            ba2 = a2.ReadByte();
            Assert.AreEqual(be2, ba2);

            // Finally, make sure the first set didn't move
            // Now make sure the first one didn't move
            Assert.AreEqual(1911, e1.FilePointer);
            Assert.AreEqual(1911, a1.FilePointer);
            be1 = e1.ReadByte();
            ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            e1.Dispose();
            e2.Dispose();
            a1.Dispose();
            a2.Dispose();
            cr.Dispose();
        }