private void QueueBlobSegmentDownload(BlobResultSegment blobResultSegment) { int parentId = Thread.CurrentThread.ManagedThreadId; Log.Debug($"enter. current id:{parentId}. results count: {blobResultSegment.Results.Count()}"); long segmentMinDateTicks = Interlocked.Read(ref DiscoveredMinDateTicks); long segmentMaxDateTicks = Interlocked.Read(ref DiscoveredMaxDateTicks); foreach (IListBlobItem blob in blobResultSegment.Results) { ICloudBlob blobRef = null; Log.Debug($"parent id:{parentId} current Id:{Thread.CurrentThread.ManagedThreadId}"); if (blob is CloudBlobDirectory) { if (!string.IsNullOrEmpty(Config.NodeFilter) && !Regex.IsMatch(blob.Uri.ToString(), Config.NodeFilter, RegexOptions.IgnoreCase)) { Log.Debug($"blob:{blob.Uri} does not match nodeFilter pattern:{Config.NodeFilter}, skipping..."); continue; } DownloadBlobsFromDirectory(blob as CloudBlobDirectory); Log.Debug("blob is directory."); continue; } Interlocked.Increment(ref TotalFilesEnumerated); if (Regex.IsMatch(blob.Uri.ToString(), FileFilterPattern, RegexOptions.IgnoreCase)) { long ticks = Convert.ToInt64(Regex.Match(blob.Uri.ToString(), FileFilterPattern, RegexOptions.IgnoreCase).Groups[1].Value); if (ticks <Config.StartTimeUtc.Ticks | ticks> Config.EndTimeUtc.Ticks) { Interlocked.Increment(ref TotalFilesSkipped); Log.Debug($"exclude:bloburi file ticks {new DateTime(ticks).ToString("o")} outside of time range:{blob.Uri}"); SetMinMaxDate(ref segmentMinDateTicks, ref segmentMaxDateTicks, ticks); continue; } } else { Log.Debug($"regex not matched: {blob.Uri.ToString()} pattern: {FileFilterPattern}"); } try { Log.Debug($"file Blob: {blob.Uri}"); blobRef = blob.Container.ServiceClient.GetBlobReferenceFromServerAsync(blob.Uri).Result; } catch (StorageException se) { Interlocked.Increment(ref TotalErrors); Log.Exception($"getting ref for {blob.Uri}, skipping. {se.Message}"); continue; } if (blobRef.Properties.LastModified.HasValue) { DateTimeOffset lastModified = blobRef.Properties.LastModified.Value; SetMinMaxDate(ref segmentMinDateTicks, ref segmentMaxDateTicks, lastModified.Ticks); if (!string.IsNullOrEmpty(Config.UriFilter) && !Regex.IsMatch(blob.Uri.ToString(), Config.UriFilter, RegexOptions.IgnoreCase)) { Interlocked.Increment(ref TotalFilesSkipped); Log.Debug($"blob:{blob.Uri} does not match uriFilter pattern:{Config.UriFilter}, skipping..."); continue; } if (Config.FileType != FileTypesEnum.any && !FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath).Equals(Config.FileType)) { Interlocked.Increment(ref TotalFilesSkipped); Log.Debug($"skipping uri with incorrect file type: {FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath)}"); continue; } if (lastModified >= Config.StartTimeUtc && lastModified <= Config.EndTimeUtc) { Interlocked.Increment(ref TotalFilesMatched); if (Config.List) { Log.Info($"listing file with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}"); continue; } if (ReturnSourceFileLink) { IngestCallback?.Invoke(new FileObject(blob.Uri.AbsolutePath, Config.SasEndpointInfo.BlobEndpoint) { Length = blobRef.Properties.Length, LastModified = lastModified }); continue; } FileObject fileObject = new FileObject(blob.Uri.AbsolutePath, Config.CacheLocation) { Length = blobRef.Properties.Length, LastModified = lastModified }; Log.Info($"queueing blob with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}"); InvokeCallback(blob, fileObject); } else { Interlocked.Increment(ref TotalFilesSkipped); Log.Debug($"exclude:bloburi {lastModified.ToString("o")} outside of time range:{blob.Uri}"); SetMinMaxDate(ref segmentMinDateTicks, ref segmentMaxDateTicks, lastModified.Ticks); continue; } } else { Log.Error("unable to read blob modified date", blobRef); TotalErrors++; } } }
private void QueueBlobSegmentDownload(BlobResultSegment blobResultSegment) { int parentId = Thread.CurrentThread.ManagedThreadId; Log.Debug($"enter. current id:{parentId}. results count: {blobResultSegment.Results.Count()}"); foreach (var blob in blobResultSegment.Results) { ICloudBlob blobRef = null; Log.Debug($"parent id:{parentId} current Id:{Thread.CurrentThread.ManagedThreadId}"); if (blob is CloudBlobDirectory) { if (!string.IsNullOrEmpty(Config.NodeFilter) && !Regex.IsMatch(blob.Uri.ToString(), Config.NodeFilter, RegexOptions.IgnoreCase)) { Log.Warning($"blob:{blob.Uri} does not match nodeFilter pattern:{Config.NodeFilter}, skipping...", ConsoleColor.Yellow); continue; } DownloadBlobsFromDirectory(blob as CloudBlobDirectory); Log.Debug("blob is directory."); continue; } Interlocked.Increment(ref TotalFilesEnumerated); if (!string.IsNullOrEmpty(Config.UriFilter) && !Regex.IsMatch(blob.Uri.ToString(), Config.UriFilter, RegexOptions.IgnoreCase)) { Interlocked.Increment(ref TotalFilesSkipped); Log.Warning($"blob:{blob.Uri} does not match uriFilter pattern:{Config.UriFilter}, skipping...", ConsoleColor.Yellow); continue; } if (Regex.IsMatch(blob.Uri.ToString(), FileFilterPattern, RegexOptions.IgnoreCase)) { long ticks = Convert.ToInt64(Regex.Match(blob.Uri.ToString(), FileFilterPattern, RegexOptions.IgnoreCase).Groups[1].Value); if (ticks <Config.StartTimeUtc.Ticks | ticks> Config.EndTimeUtc.Ticks) { Interlocked.Increment(ref TotalFilesSkipped); Log.Debug($"exclude:bloburi ticks outside of time range:{blob.Uri}"); continue; } } try { Log.Debug($"file Blob: {blob.Uri}"); blobRef = blob.Container.ServiceClient.GetBlobReferenceFromServerAsync(blob.Uri).Result; } catch (StorageException se) { Interlocked.Increment(ref TotalErrors); Log.Exception($"getting ref for {blob.Uri}, skipping. {se.Message}"); continue; } if (blobRef.Properties.LastModified.HasValue) { DateTimeOffset lastModified = blobRef.Properties.LastModified.Value; if (Config.FileType != FileTypesEnum.any && !FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath).Equals(Config.FileType)) { Interlocked.Increment(ref TotalFilesSkipped); Log.Debug($"skipping uri with incorrect file type: {FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath)}"); continue; } if (lastModified >= Config.StartTimeUtc && lastModified <= Config.EndTimeUtc) { Interlocked.Increment(ref TotalFilesMatched); if (Config.List) { Log.Info($"listing file with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}"); continue; } if (ReturnSourceFileLink) { IngestCallback?.Invoke(new FileObject(blob.Uri.AbsolutePath, Config.SasEndpointInfo.BlobEndpoint) { Length = blobRef.Properties.Length }); continue; } FileObject fileObject = new FileObject(blob.Uri.AbsolutePath, Config.CacheLocation); Log.Info($"queueing blob with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}"); if (!fileObject.Exists) { fileObject.DownloadAction = () => { ((CloudBlockBlob)blob).DownloadToStreamAsync(fileObject.Stream.Get(), null, new BlobRequestOptions() { RetryPolicy = new IngestRetryPolicy(), ParallelOperationThreadCount = Config.Threads }, null).Wait(); }; IngestCallback?.Invoke(fileObject); Interlocked.Increment(ref TotalFilesDownloaded); } else { Log.Warning($"destination file exists. skipping download:\r\n file: {fileObject}"); IngestCallback?.Invoke(fileObject); } } } else { Log.Error("unable to read blob modified date", blobRef); TotalErrors++; } } }