Пример #1
0
        /// <summary>
        /// Gets the chunk entry.
        /// </summary>
        /// <param name="hash">The hash.</param>
        /// <returns>The entry can be null if not found.</returns>
        public DataChunk GetChunkEntry(byte[] hash)
        {
            var txnProvider = new NHTransactionProvider(
                new NHSessionProvider(_sessionFactory));

            using (txnProvider) {
                using (var transaction = txnProvider.BeginTransaction()) {
                    var helper = new ChunkDbHelper(
                        txnProvider.SessionProvider.CurrentSession);
                    ICriterion hashEq =
                        Expression.Eq(Projections.Property <DataChunk>(x => x.Hash),
                                      hash);
                    var       session = txnProvider.SessionProvider.CurrentSession;
                    ICriteria crit    = session.CreateCriteria(typeof(DataChunk));
                    crit.Add(hashEq);
                    DataChunk entry = (DataChunk)crit.UniqueResult();
                    if (entry != null)
                    {
                        entry.Count++;
                        session.Update(entry);
                    }
                    return(entry);
                }
            }
        }
Пример #2
0
        /// <summary>
        /// Gets a list of the tuples <path, chunk index, chunk size> for each input (virtual) file chunk.
        /// </summary>
        /// <param name="filePath">The file path.</param>
        /// <param name="fileIndices">The file indices.</param>
        public List <Tuple <string, int, int> > GetChunkIndices(string filePath, int[] fileIndices)
        {
            logger.DebugFormat("Going to get chunks indices for file indices {0} for file {1}.", string.Join(",",
                                                                                                             System.Array.ConvertAll <int, string>(fileIndices, x => x.ToString())), filePath);
            var sessionProvider = new NHSessionProvider(_sessionFactory);

            using (sessionProvider) {
                var         helper      = new ChunkDbHelper(sessionProvider.CurrentSession);
                var         txnProvider = new NHTransactionProvider(sessionProvider);
                ManagedFile file;
                using (var transaction = txnProvider.BeginTransaction()) {
                    file = helper.GetManagedFile(filePath);
                }
                var chunkMap = file.ChunkMap;
                var eofChunk = chunkMap.EofChunk;
                var ret      = new List <Tuple <string, int, int> >();
                foreach (int fileIndex in fileIndices)
                {
                    var entry = chunkMap.GetByFileIndex(fileIndex);
                    logger.DebugFormat("File index {0} maps to chunk index {1}",
                                       fileIndex, entry.ChunkIndex);
                    var fileTuple = Tuple.Create <string, int, int>(
                        filePath,
                        entry.ChunkIndex,
                        entry.ChunkSize);
                    ret.Add(fileTuple);
                }
                return(ret);
            }
        }
Пример #3
0
        /// <summary>
        /// Gets a list of the tuples <path, chunk index, chunk size> for each input chunk.
        /// </summary>
        /// <param name="filePath">The file path.</param>
        /// <param name="chunkIndices">The chunk indices.</param>
        public List <Tuple <string, int, int> > GetChunkLocations(string filePath, int[] chunkIndices)
        {
            logger.DebugFormat("Going to get deduplicated locations of data chunks {0} for file {1}.", string.Join(",",
                                                                                                                   System.Array.ConvertAll <int, string>(chunkIndices, x => x.ToString())), filePath);

            var sessionProvider = new NHSessionProvider(_sessionFactory);

            using (sessionProvider) {
                var         helper      = new ChunkDbHelper(sessionProvider.CurrentSession);
                var         txnProvider = new NHTransactionProvider(sessionProvider);
                ManagedFile file;
                using (var transaction = txnProvider.BeginTransaction()) {
                    file = helper.GetManagedFile(filePath);
                }
                var chunkMap = file.ChunkMap;
                var eofChunk = chunkMap.EofChunk;
                var ret      = new List <Tuple <string, int, int> >();
                using (var transaction = txnProvider.BeginTransaction()) {
                    var dao = new Dao <DataChunk>(sessionProvider.CurrentSession);
                    foreach (int chunkIndex in chunkIndices)
                    {
                        byte[] chunkHash = chunkMap.HashAt(chunkIndex);
                        int    chunkSize = chunkIndex == eofChunk.ChunkIndex ?
                                           eofChunk.ChunkSize : DataChunk.ChunkSize;
                        var chunkInfo = dao.UniqueResultByExample(new
                                                                  DataChunk {
                            Hash = chunkHash
                        });

                        if (chunkInfo == null)
                        {
                            throw new ChunkNotInDbException(string.Format(
                                                                "Chunk {0} in the file hasn't been added yet.",
                                                                chunkIndex))
                                  {
                                      File       = filePath,
                                      ChunkIndex = chunkIndex
                                  };
                        }

                        logger.DebugFormat("Chunk index {0} maps to chunk {1} in file {2}.",
                                           chunkIndex, chunkInfo.ChunkIndex, chunkInfo.File.Path);
                        var fileTuple = Tuple.Create <string, int, int>(
                            chunkInfo.File.Path,
                            chunkInfo.ChunkIndex,
                            chunkSize);
                        ret.Add(fileTuple);
                    }
                }
                return(ret);
            }
        }
Пример #4
0
        public void AddFileWithBasicChunkMap(string filePath)
        {
            var fileIndices  = new List <int>();
            var hashes       = new MemoryStream();
            int eofIndex     = 0;
            int eofChunkSize = 0;

            AddFileAllChunks(filePath,
                             delegate(int chunkIndex, byte[] hash) {
                fileIndices.Add(chunkIndex);
                hashes.Write(hash, 0, hash.Length);
            },
                             (i, s) => { eofIndex = i; eofChunkSize = s; });

            logger.DebugFormat("(EofIndex, EofChunkSize) = ({0}, {1})",
                               eofIndex, eofChunkSize);

            var txnProvider = new NHTransactionProvider(
                new NHSessionProvider(_sessionFactory));
            var session = txnProvider.SessionProvider.CurrentSession;

            using (txnProvider) {
                using (var transaction = txnProvider.BeginTransaction()) {
                    var helper = new ChunkDbHelper(session);
                    // File should have been inserted already.
                    var file = helper.GetManagedFile(filePath);
                    var dto  = new ChunkMapDto {
                        FileIndices        = fileIndices.ToArray(),
                        Hashes             = hashes.ToArray(),
                        EofChunkIndex      = eofIndex,
                        EofChunkSize       = eofChunkSize,
                        LastPieceInProfile = 0
                    };
                    hashes.Dispose();
                    file.ChunkMap = new ChunkMap(dto);

                    if (logger.IsDebugEnabled)
                    {
                        var tempFilePath = Path.Combine(Path.GetTempPath(),
                                                        Path.GetTempFileName());
                        ChunkMapSerializer.SerializeToXml(tempFilePath + ".xml", file.ChunkMap);
                        ChunkMapSerializer.Serialize(tempFilePath, dto);
                        logger.DebugFormat("ChunkMap is logged to file {0}", tempFilePath);
                    }

                    // Have the file committed to DB.
                    transaction.Commit();
                }
                logger.DebugFormat("ChunkMap added to file.");
                FileUtil.PadFileWithZeros(filePath);
            }
        }
Пример #5
0
        public ManagedFile GetManagedFile(string filePath)
        {
            var txnProvider = new NHTransactionProvider(
                new NHSessionProvider(_sessionFactory));

            using (txnProvider) {
                using (var transaction = txnProvider.BeginTransaction()) {
                    var helper = new ChunkDbHelper(
                        txnProvider.SessionProvider.CurrentSession);
                    return(helper.GetManagedFile(filePath));
                }
            }
        }
Пример #6
0
        /// <summary>
        /// Gets the chunk entry by the path and fileIndex.
        /// </summary>
        /// <param name="path">The path.</param>
        /// <param name="chunkIndex">The file index.</param>
        /// <returns></returns>
        public DataChunk GetChunkEntry(string filePath, int chunkIndex)
        {
            var txnProvider = new NHTransactionProvider(
                new NHSessionProvider(_sessionFactory));

            using (txnProvider) {
                using (var transaction = txnProvider.BeginTransaction()) {
                    var helper = new ChunkDbHelper(
                        txnProvider.SessionProvider.CurrentSession);
                    var         session = txnProvider.SessionProvider.CurrentSession;
                    ManagedFile file    = helper.GetManagedFile(filePath);
                    ICriteria   crit    = session.CreateCriteria <DataChunk>();
                    crit.Add(Expression.Eq(Projections.Property <DataChunk>(x => x.File), file))
                    .Add(Expression.Eq(Projections.Property <DataChunk>(x => x.ChunkIndex), chunkIndex));
                    DataChunk entry = crit.UniqueResult <DataChunk>();
                    return(entry);
                }
            }
        }
Пример #7
0
        /// <summary>
        /// Adds the specified entry.
        /// </summary>
        /// <param name="entry">The entry.</param>
        /// <exception cref="DuplicateNameException">Thrown when there is
        /// already an entry with the same hash exists.</exception>
        public void AddChunk(byte[] hash, string filePath, int chunkIndex)
        {
            var txnProvider = new NHTransactionProvider(
                new NHSessionProvider(_sessionFactory));

            using (txnProvider) {
                using (var transaction = txnProvider.BeginTransaction()) {
                    var helper = new ChunkDbHelper(
                        txnProvider.SessionProvider.CurrentSession);
                    ManagedFile file  = helper.GetManagedFile(filePath);
                    var         entry = new DataChunk {
                        File       = file,
                        ChunkIndex = chunkIndex,
                        Hash       = hash,
                        Count      = 0
                    };
                    helper.AddChunk(entry);
                    transaction.Commit();
                }
            }   // Dispose session.
        }
Пример #8
0
        /// <summary>
        /// Register the chunks in the given file to the DB.
        /// </summary>
        /// <param name="filePath">The file path.</param>
        /// <param name="chunks">The chunks.</param>
        public void AddChunks(string filePath, int[] chunks)
        {
            logger.DebugFormat("Adding chunks {0} for file {1}.",
                               string.Join(",", System.Array.ConvertAll <int, string>(chunks,
                                                                                      x => x.ToString())), filePath);
            var txnProvider = new NHTransactionProvider(
                new NHSessionProvider(_sessionFactory));

            using (txnProvider) {
                using (var transaction = txnProvider.BeginTransaction()) {
                    var helper = new ChunkDbHelper(
                        txnProvider.SessionProvider.CurrentSession);
                    ManagedFile file            = helper.GetManagedFile(filePath);
                    var         chunkMap        = file.ChunkMap;
                    int         numAlreadyExist = 0;
                    foreach (var chunkIndex in chunks)
                    {
                        byte[] hash  = chunkMap.HashAt(chunkIndex);
                        var    entry = new DataChunk {
                            File       = file,
                            ChunkIndex = chunkIndex,
                            Hash       = hash,
                            Count      = 0
                        };
                        bool added = helper.AddChunkIfNotExists(entry);
                        if (!added)
                        {
                            numAlreadyExist++;
                        }
                    }
                    transaction.Commit();
                    logger.DebugFormat(
                        "Chunks added. {0} out of {1} chunks already exist.",
                        numAlreadyExist, chunks.Length);
                }
            }
        }
Пример #9
0
        /// <summary>
        /// Adds a file to the Chunk DB.
        /// </summary>
        /// <param name="filePath">The file path.</param>
        /// <param name="forEachChunk">(index, hash)</param>
        /// <param name="forEofChunk">(index, chunk size)</param>
        void AddFileAllChunks(string filePath, Action <int, byte[]> forEachChunk, Action <int, int> forEofChunk)
        {
            var txnProvider = new NHTransactionProvider(
                new NHSessionProvider(_sessionFactory));

            var         session = txnProvider.SessionProvider.CurrentSession;
            ManagedFile file;

            // In a stateful session.
            using (txnProvider) {
                using (var transaction = txnProvider.BeginTransaction()) {
                    var helper = new ChunkDbHelper(session);
                    file = helper.CreateManagedFileFromLocalFile(filePath);
                    // Have the file committed to DB.
                    transaction.Commit();
                }
            }

            // Choose stateless session for bulk insert.
            var statelessSession = _sessionFactory.OpenStatelessSession();

            using (var transaction = statelessSession.BeginTransaction()) {
                SHA1 sha = new SHA1CryptoServiceProvider();
                using (var stream = File.OpenRead(filePath)) {
                    int  chunkIndex = 0;
                    var  chunk      = new byte[DataChunk.ChunkSize];
                    int  duplicates = 0;
                    bool isEofChunk = false;
                    for (; ; chunkIndex++)
                    {
                        long offset     = stream.Position;
                        int  readLength = stream.Read(chunk, 0, chunk.Length);

                        if (readLength == 0)
                        {
                            if (forEofChunk != null)
                            {
                                forEofChunk(chunkIndex - 1, DataChunk.ChunkSize);
                            }
                            break;
                        }

                        if (readLength < DataChunk.ChunkSize)
                        {
                            // Last chunk.
                            isEofChunk = true;
                            // The rest of the buffer is padded with 0s.
                            System.Array.Clear(chunk, readLength,
                                               chunk.Length - readLength);
                            if (forEofChunk != null)
                            {
                                forEofChunk(chunkIndex, readLength);
                            }
                        }

                        // Hash is computed over the full chunk buffer with
                        // padding in case of a small chunk.
                        byte[] hash = sha.ComputeHash(chunk);

                        if (forEachChunk != null)
                        {
                            forEachChunk(chunkIndex, hash);
                        }

                        bool alreadyExists = ChunkDbHelper.AddChunkIfNotExists(statelessSession,
                                                                               new DataChunk {
                            Hash       = hash,
                            File       = file,
                            ChunkIndex = chunkIndex,
                            Count      = 0
                        });
                        if (alreadyExists)
                        {
                            duplicates++;
                        }

                        if (isEofChunk)
                        {
                            break;
                        }
                    }
                    transaction.Commit();
                    logger.DebugFormat("File {0} added to ChunkDb.", filePath);
                    logger.DebugFormat("Number of duplicates {0}", duplicates);
                }
            }
        }