public SessionFileFindHelper(BackupInfo currentBkp, List <Tuple <BackupFileInfo, BackupInfo> > prevBackupFiles) { _currentBkp = currentBkp; _prevBackupFilesLookupByLengthByHash = prevBackupFiles .GroupBy(x => x.Item1.Length) .ToDictionary(x => x.Key, x => x.GroupBy(y => y.Item1.Hash).ToDictionary(y => y.Key, y => y.First())); _prevBackupFilesLookupBypathByLength = prevBackupFiles .GroupBy(x => x.Item1.Path) .ToDictionary(x => x.Key, x => x.GroupBy(y => y.Item1.Length).ToDictionary(y => y.Key, y => y.First())); }
public static IEnumerable <BackupInfo> DiscoverBackups(string path) { var dirs = Directory.EnumerateDirectories(path); foreach (var dir in dirs) { var info = new BackupInfo(dir); try { var bkpInfoDir = Directory.EnumerateDirectories(dir).FirstOrDefault(d => new DirectoryInfo(d).Name == BackupInfoDir); if (string.IsNullOrEmpty(bkpInfoDir)) { continue; } var bkpInfoFile = Path.Combine(bkpInfoDir, BackupInfoFile); if (!File.Exists(bkpInfoFile)) { continue; } using (var file = File.OpenText(bkpInfoFile)) { var headerRaw = file.ReadLine(); if (string.IsNullOrEmpty(headerRaw)) { continue; } var headerParts = headerRaw.Contains("|") ? headerRaw.Split('|') : new[] { headerRaw }; if (headerParts.Length == 0) { continue; } if (DateTime.TryParseExact(headerParts[0], DateSerializationFormat, CultureInfo.InvariantCulture, DateTimeStyles.None, out var dt)) { info.DateTime = dt; } else { continue; } if (headerParts.Length > 2) { continue; } info.AttributesAvailable = headerParts.Length == 2 && headerParts[1] == "CM"; while (!file.EndOfStream) { var line = file.ReadLine(); var parts = line?.Split('|'); if (parts == null) { throw new Exception("Failed to parse backup files list. Empty file line"); } if (parts.Length != 3 && parts.Length != 5) { throw new Exception($"Failed to parse backup files list. Unknown file line '{line}'"); } var fi = new BackupFileInfo { Path = parts[0], Hash = parts[1], Length = long.Parse(parts[2]), }; if (parts.Length == 5) { fi.Created = DateTime.ParseExact(parts[3], DateSerializationFormat, CultureInfo.InvariantCulture, DateTimeStyles.None); fi.Modified = DateTime.ParseExact(parts[4], DateSerializationFormat, CultureInfo.InvariantCulture, DateTimeStyles.None); } info._files.Add(fi); } } } catch (Exception) { continue; } yield return(info); } }
private void ProcessSmallFiles(List <BackupFileModel> smallFiles, ref int category, string smallFilesTarPath, string currentBkpDir, SessionFileFindHelper findHelper, int filesCount, BackupInfo currentBkp, ref int linkedCount, ref int processed) { if (smallFiles.Count == 0) { return; } WriteLog("Small files hash calculation...", ++category); var cnt = smallFiles .AsParallel() .Select(x => { try { return(x.FileInfo.FastHashStr); } catch (Exception exception) { Console.WriteLine(exception); return("invalid hash: " + exception.Message); } }) .Count(x => x.StartsWith("invalid hash")); if (cnt > 0) { WriteLog($"Found {cnt} invalid records", ++category); } WriteLog($"{smallFiles.Count} files will be transferred in a batch as tar.gz", ++category); var sw = System.Diagnostics.Stopwatch.StartNew(); bool created; var tmpTarPath = smallFilesTarPath + ".tmp"; var archivedCount = 0; using (var tar = new TarGzHelper(tmpTarPath)) { foreach (var file in smallFiles) { try { var processedLocal = Interlocked.Increment(ref processed); var newFileWin = Path.Combine(currentBkpDir, file.RelativePathWin); var newFileRelativeName = newFileWin.Replace(currentBkpDir, string.Empty); var existingFileWin = findHelper.FindByLengthAndHash(file.FileInfo); if (existingFileWin != null) { _hardLinkHelper.AddHardLinkToQueue(existingFileWin, newFileWin); linkedCount++; WriteLog($"[{processedLocal} of {filesCount}] {{link}} {newFileRelativeName} to {existingFileWin}", Interlocked.Increment(ref category)); } else { var relFileName = file.RelativePathUnix; using (var fl = file.FileInfo.FileInfo.OpenRead()) { tar.AddFile(relFileName, fl); ++archivedCount; } WriteLog($"[{processedLocal} of {filesCount}] {{tar}} {newFileRelativeName} ", Interlocked.Increment(ref category)); } var o = new BackupFileInfo { Path = newFileRelativeName, Hash = file.FileInfo.FastHashStr, Length = file.FileInfo.FileInfo.Length, IsLink = existingFileWin != null, }; currentBkp.AddFile(o); } catch (Exception e) { Console.WriteLine(e); } } created = tar.IsArchiveCreated; } sw.Stop(); if (created) { var tarAndSendDuration = sw.Elapsed; WriteLog("Unpacking small files", Interlocked.Increment(ref category)); sw = System.Diagnostics.Stopwatch.StartNew(); File.Move(tmpTarPath, smallFilesTarPath); _hardLinkHelper.UnpackTar(smallFilesTarPath); sw.Stop(); WriteLog($"{archivedCount} files archived and transferred in {tarAndSendDuration:g} and unpacked in {sw.Elapsed:g}", ++category); } }
public async Task DoBackup() { await Task.Yield(); Validate(); var category = 0; var localFiles = GetFilesToBackup(ref category); WriteLog("Discovering backups...", ++category); var newBkpDate = DateTime.Now; var newBkpName = newBkpDate.ToString(BackupFolderNamingDateFormat, CultureInfo.InvariantCulture); var prevBkps = BackupInfo.DiscoverBackups(_destination).ToList(); if (_backupRoots != null) { foreach (var root in _backupRoots) { prevBkps.AddRange(BackupInfo.DiscoverBackups(root)); } } WriteLog($"Found {prevBkps.Count} backups", ++category); var currentBkpDir = Path.Combine(_destination, newBkpName); var filesCount = localFiles.Count; var currentBkp = new BackupInfo(currentBkpDir) { DateTime = newBkpDate, AttributesAvailable = true, }; var prevBackupFiles = GetFilesFromPrevBackups(prevBkps, ref category); var copiedCount = 0; var linkedCount = 0; var svcDir = currentBkp.CreateFolders(); currentBkp.CreateIncompleteAttribute(); var smallFilesTarPath = Path.Combine(svcDir, "small-files.tar.gz"); var directoriesTarPath = Path.Combine(svcDir, "dir-tree.tar.gz"); var findHelper = new SessionFileFindHelper(currentBkp, prevBackupFiles); WriteLog("Backing up...", ++category); var processed = 0; { // collect changes var byLength = prevBackupFiles .GroupBy(x => x.Item1.Hash) .SelectMany(x => x) .GroupBy(x => x.Item1.Length) .ToDictionary(x => x.Key, x => x.ToList()); var lengthMatch = 0; var lengthFailMatch = 0; var lengthMismatch = 0; var hashMatch = 0; var fails = 0; foreach (var localFileInfo in localFiles) { if (byLength.TryGetValue(localFileInfo.FileInfo.FileInfo.Length, out var files)) { lengthMatch++; try { var backupFile = files.FirstOrDefault(x => x.Item1.Hash == localFileInfo.FileInfo.FastHashStr); if (backupFile == null) { lengthFailMatch++; } else { hashMatch++; } } catch { fails++; } } else { lengthMismatch++; } } WriteLog($"match: {lengthMatch}; mismatch: {lengthMismatch}; fail match: {lengthFailMatch}; hash: {hashMatch}; fails: {fails}", Interlocked.Increment(ref category)); } throw null; CreateDirectories(localFiles, directoriesTarPath, ref category); var smallFiles = GetFilesForCompression(localFiles); ProcessSmallFiles(smallFiles, ref category, smallFilesTarPath, currentBkpDir, findHelper, filesCount, currentBkp, ref linkedCount, ref processed); foreach (var localFileInfo in localFiles) { try { var processedLocal = Interlocked.Increment(ref processed); var newFile = Path.Combine(currentBkpDir, localFileInfo.RelativePathWin); var newFileRelativeName = newFile.Replace(currentBkpDir, string.Empty); var newDir = Path.GetDirectoryName(newFile); if (newDir == null) { throw new InvalidOperationException("Cannot get file's directory"); } if (!Directory.Exists(newDir)) { Directory.CreateDirectory(newDir); } var existingFile = findHelper.FindByLengthAndHash(localFileInfo.FileInfo); if (existingFile != null) { WriteLog($"[{processedLocal} of {filesCount}] {{link}} {localFileInfo.RelativePathWin} ", Interlocked.Increment(ref category)); _hardLinkHelper.AddHardLinkToQueue(existingFile, newFile); linkedCount++; } else { void ProgressCallback(double progress) { WriteLogExt($"{progress:F2} %"); } WriteLog($"[{processedLocal} of {filesCount}] {localFileInfo.RelativePathWin} ", Interlocked.Increment(ref category)); var copiedHash = HashSumHelper.CopyUnbufferedAndComputeHashAsyncXX(localFileInfo.FileInfo.FileName, newFile, ProgressCallback, _allowSimultaneousReadWrite).Result; if (localFileInfo.FileInfo.FastHashStr == string.Concat(copiedHash.Select(b => $"{b:X}"))) { copiedCount++; } else { WriteLog($"{localFileInfo.RelativePathWin} copy failed", Interlocked.Increment(ref category)); System.Diagnostics.Debugger.Break(); } new FileInfo(newFile).Attributes |= FileAttributes.ReadOnly; } var o = new BackupFileInfo { Path = newFileRelativeName, Hash = localFileInfo.FileInfo.FastHashStr, Length = localFileInfo.FileInfo.FileInfo.Length, IsLink = existingFile != null, }; currentBkp.AddFile(o); } catch (Exception e) { Console.WriteLine(); Console.WriteLine(e); Console.WriteLine(); } } WriteLog("Writing hardlinks to target", Interlocked.Increment(ref category)); try { _hardLinkHelper.CreateHardLinks(); } finally { currentBkp.WriteToDisk(); currentBkp.DeleteIncompleteAttribute(); } var log = "Backup done."; if (copiedCount > 0) { log += $" {copiedCount} files copied"; } if (linkedCount > 0) { log += $" {linkedCount} files linked"; } WriteLog(log, ++category); }