示例#1
0
        public static ICollection<HashAlgorithm> DefaultHashFuncs(int num = 5)
        {
            var hList = new List<HashAlgorithm>();
            for (var i = 0; i < num; ++i)
            {
                var mmh = new MurmurHash3_x86_32();
                mmh.Seed = (uint)i;
                hList.Add(mmh);
            }

            return hList;
        }
示例#2
0
        static void ProcessFileNaive(string filename, List<int> partitionHash)
        {
            var rollingHash = new List<uint>();
            var localMaximaPos = new List<int>();
            var fciBC = new BlockingCollectionDataChunk<FileChunkInfo>();

            var fileBytes = File.ReadAllBytes(filename);
            using (var ms = new MemoryStream(fileBytes, 0, fileBytes.Length, true, true))
            {
                var fh = new FileHash(1024);
                fh.StreamToHashValuesNaive(ms, rollingHash);
            }

            var lm = new LocalMaxima(4 * 1024);
            lm.CalcUsingNaive(rollingHash, localMaximaPos);

            var localMaximaPosBC = new BlockingCollectionDataChunk<int>();
            foreach (var pos in localMaximaPos)
            {
                localMaximaPosBC.Add(pos);
            }
            localMaximaPosBC.CompleteAdding();

            var ph = new BlockingCollectionDataChunk<uint>();
            var mmh = new MurmurHash3_x86_32();
            var fph = new FileParitionHash(mmh);
            using (var fs = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                fph.ProcessStream(fs, localMaximaPosBC, ph, fciBC);
            }

            foreach (var items in ph.BlockingCollection.GetConsumingEnumerable())
            {
                for (var i = 0; i < items.DataSize; ++i)
                {
                    partitionHash.Add((int)items.Data[i]);
                }
            }
        }
示例#3
0
        static void ProcessFile(string filename, List<int> partitionHash, List<FileChunkInfo> fci)
        {
            var rollingHash = new BlockingCollectionDataChunk<uint>();
            var localMaximaPos = new BlockingCollectionDataChunk<int>();
            var ph = new BlockingCollectionDataChunk<uint>();
            var fciBC = new BlockingCollectionDataChunk<FileChunkInfo>();

            //var sw = new Stopwatch();
            //sw.Start();

            //var fLength = 0;
            //using (var fs = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read))
            //{
            //    fLength = (int)fs.Length;
            //}

            //var lmWindow = fLength / (512);
            var lmWindow = 32 * 1024;

            Task.Run(() =>
            {
                using (var fs = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read))
                {
                    var fh = new FileHash(16);
                    fh.StreamToHashValues(fs, rollingHash);
                }
            });

            Task.Run(() =>
            {
                var lm = new LocalMaxima(lmWindow);
                lm.CalcUsingBlockAlgo(rollingHash, localMaximaPos);
            });

            Task.Run(() =>
            {
                var mmh = new MurmurHash3_x86_32();
                var fph = new FileParitionHash(mmh);
                using (var fs = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read))
                {
                    fph.ProcessStream(fs, localMaximaPos, ph, fciBC);
                }
            });

            var count = 0;
            foreach (var items in ph.BlockingCollection.GetConsumingEnumerable())
            {
                count += items.DataSize;
                //Console.WriteLine("File par hash: {0}", i);
                for (var i = 0; i < items.DataSize; ++i)
                {
                    partitionHash.Add((int)items.Data[i]);
                }
            }

            foreach (var items in fciBC.BlockingCollection.GetConsumingEnumerable())
            {
                for (var i = 0; i < items.DataSize; ++i)
                {
                    fci.Add(items.Data[i]);
                }
            }
            //sw.Stop();

            //Console.WriteLine("Number of partitions: {0}", count);
            //Console.WriteLine("Time: {0} ms", sw.ElapsedMilliseconds);
        }
示例#4
0
        public static void GenDeltaFileFromBFFixedSize(string currFile, string bfFile, string deltaFile)
        {
            BloomFilter bf;
            using (var file = File.OpenRead(bfFile))
            {
                bf = Serializer.Deserialize<BloomFilter>(file);
            }
            bf.SetHashFunctions(BloomFilter.DefaultHashFuncs());

            // Hack, do not work for very large file.
            var fileBytes = File.ReadAllBytes(currFile);
            var currIdx = 0;
            var deltaDataList = new List<DeltaData>();
            var currDD = new DeltaData();
            var currRawData = new List<byte>();

            var hFunc = new MurmurHash3_x86_32();

            while (currIdx + BlockSize < fileBytes.Length)
            {
                if (bf.Contains(fileBytes, currIdx, BlockSize))
                {
                    if (currRawData.Count != 0)
                    {
                        currDD.Data = currRawData.ToArray();
                        currRawData.Clear();
                        deltaDataList.Add(currDD);
                    }
                    deltaDataList.Add(new DeltaData()
                    {
                        HashValue = BitConverter.ToInt32(hFunc.ComputeHash(fileBytes, currIdx, BlockSize), 0)
                    });
                    currDD = new DeltaData();
                    currIdx += BlockSize;
                }
                else
                {
                    currRawData.Add(fileBytes[currIdx]);
                    currIdx++;
                }
            }

            if (currIdx != fileBytes.Length)
            {
                for (var i = currIdx; i < fileBytes.Length; ++i)
                {
                    currRawData.Add(fileBytes[i]);
                }
            }

            if (currRawData.Count != 0)
            {
                currDD.Data = currRawData.ToArray();
                currRawData.Clear();
                deltaDataList.Add(currDD);
            }

            var es = 0;
            foreach (var d in deltaDataList)
            {
                es += d.ExpectedSize;
            }

            using (var file = File.Create(deltaFile))
            {
                Serializer.Serialize(file, deltaDataList);
            }
        }
示例#5
0
        public static void PatchFile(string oldFile, string deltaFile, string deltaMissingFile, string outFile)
        {
            // Delta data from server.
            List<DeltaData> deltaDataList;
            using (var fs = File.OpenRead(deltaFile))
            {
                deltaDataList = Serializer.Deserialize<List<DeltaData>>(fs);
            }
            // Missing delta data from server
            Dictionary<int, byte[]> missingDeltaData;
            using (var fs = File.OpenRead(deltaMissingFile))
            {
                missingDeltaData = Serializer.Deserialize<Dictionary<int, byte[]>>(fs);
            }

            // Construct new file.

            // Construct all hash values from client.
            var existingHashValues = new Dictionary<int, int>();
            var currBlockIdx = 0;
            var buff = new byte[BlockSize];
            var hFunc = new MurmurHash3_x86_32();
            using (var fs = new FileStream(oldFile, FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                var byteRead = 0;
                while ((byteRead = fs.Read(buff, 0, BlockSize)) != 0)
                {
                    var hv = BitConverter.ToInt32(hFunc.ComputeHash(buff, 0, byteRead), 0);
                    if (!existingHashValues.ContainsKey(hv))
                    {
                        existingHashValues.Add(hv, currBlockIdx);
                    }
                    currBlockIdx++;
                }
            }

            using (var fsout = new FileStream(outFile, FileMode.Create, FileAccess.Write, FileShare.None))
            {
                using (var fsOld = new FileStream(oldFile, FileMode.Open, FileAccess.Read, FileShare.Read))
                {
                    for (var i = 0; i < deltaDataList.Count; ++i)
                    {
                        var currPatch = deltaDataList[i];
                        if (currPatch.Data == null)
                        {
                            // Existing data.
                            if (existingHashValues.ContainsKey(currPatch.HashValue))
                            {
                                var idx = existingHashValues[currPatch.HashValue];

                                fsOld.Position = idx * BlockSize;

                                var fcData = new byte[BlockSize];
                                var bRead = fsOld.Read(fcData, 0, BlockSize);
                                //if (bRead != currFileChunkInfo.Length)
                                //{
                                //    throw new InvalidDataException();
                                //}
                                fsout.Write(fcData, 0, bRead);
                            }
                            else
                            {
                                // Should be in the missing delta file.
                                var d = missingDeltaData[currPatch.HashValue];
                                fsout.Write(d, 0, d.Length);
                            }
                        }
                        else
                        {
                            // New data.
                            var newdataBytes = currPatch.Data.ToArray();
                            fsout.Write(newdataBytes, 0, newdataBytes.Length);
                        }
                    }
                }
            }
        }
示例#6
0
        public static void GenMissingHashFile(string oldFile, string deltaFile, string missingHashFile)
        {
            // Delta data from server.
            List<DeltaData> deltaDataList;
            using (var fs = File.OpenRead(deltaFile))
            {
                deltaDataList = Serializer.Deserialize<List<DeltaData>>(fs);
            }

            // Construct all hash values from client.
            var existingHashValues = new HashSet<int>();
            var buff = new byte[BlockSize];
            var hFunc = new MurmurHash3_x86_32();
            using (var fs = new FileStream(oldFile, FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                var byteRead = 0;
                while ((byteRead = fs.Read(buff, 0, BlockSize)) != 0)
                {
                    var hv = BitConverter.ToInt32(hFunc.ComputeHash(buff, 0, byteRead), 0);
                    existingHashValues.Add(hv);
                }
            }

            var missingList = new List<int>();
            foreach (var dd in deltaDataList)
            {
                if (dd.Data == null)
                {
                    var currHash = dd.HashValue;
                    if (!existingHashValues.Contains(currHash))
                    {
                        missingList.Add(currHash);
                    }
                }
            }

            using (var fs = File.Create(missingHashFile))
            {
                Serializer.Serialize(fs, missingList);
            }
        }
示例#7
0
        public static void GenDeltaFromMissing(string currFile, string missingHashFile, string missingDeltaFile)
        {
            List<int> missingList;
            using (var fs = File.OpenRead(missingHashFile))
            {
                missingList = Serializer.Deserialize<List<int>>(fs);
            }

            var missingSet = new HashSet<int>(missingList);

            // HACK.
            var fileBytes = File.ReadAllBytes(currFile);
            var currIdx = 0;
            var deltaDataList = new Dictionary<int, byte[]>();

            var hFunc = new MurmurHash3_x86_32();
            while (missingSet.Count != 0 && currIdx + BlockSize < fileBytes.Length)
            {
                var hv = BitConverter.ToInt32(hFunc.ComputeHash(fileBytes, currIdx, BlockSize), 0);
                if (missingSet.Contains(hv))
                {
                    var data = new byte[BlockSize];
                    for (var i = currIdx; i < currIdx + BlockSize; ++i)
                    {
                        data[i - currIdx] = fileBytes[i];
                    }

                    deltaDataList.Add(hv, data);
                    currIdx += BlockSize;

                    missingSet.Remove(hv);
                }
                else
                {
                    currIdx++;
                }
            }

            using (var fs = File.Create(missingDeltaFile))
            {
                Serializer.Serialize(fs, deltaDataList);
            }
        }