Пример #1
0
        private void EnumerateTableRecords(CloudTable cloudTable, string urlFilterPattern)
        {
            if (string.IsNullOrEmpty(urlFilterPattern) || Regex.IsMatch(cloudTable.Uri.ToString(), urlFilterPattern, RegexOptions.IgnoreCase))
            {
                int chunkCount = 0;

                foreach (IList <CsvTableRecord> resultsChunk in EnumerateTable(cloudTable, TableMaxResults))
                {
                    if (resultsChunk.Count < 1)
                    {
                        continue;
                    }

                    if (Config.List)
                    {
                        Log.Info($"cloudtable: {cloudTable.Name} results: {resultsChunk.Count}");
                        continue;
                    }

                    string     relativeUri = $"{Config.StartTimeUtc.Ticks}-{Config.EndTimeUtc.Ticks}-{cloudTable.Name}.{chunkCount++}{TableExtension}";
                    FileObject fileObject  = new FileObject(relativeUri, Config.CacheLocation);
                    resultsChunk.ToList().ForEach(x => x.RelativeUri = relativeUri);

                    fileObject.Stream.Write(resultsChunk);

                    _instance.TotalFilesDownloaded++;
                    IngestCallback?.Invoke(fileObject);
                }
            }
            else
            {
                _instance.TotalFilesSkipped++;
            }
        }
        private void EnumerateTableRecords(CloudTable cloudTable, string urlFilterPattern)
        {
            if (string.IsNullOrEmpty(urlFilterPattern) || Regex.IsMatch(cloudTable.Uri.ToString(), FileFilterPattern))
            {
                int chunkCount = 0;

                foreach (IList <CsvTableRecord> resultsChunk in EnumerateTable(cloudTable, TableMaxResults))
                {
                    if (resultsChunk.Count < 1)
                    {
                        continue;
                    }

                    if (Config.List)
                    {
                        Log.Info($"cloudtable: {cloudTable.Name} results: {resultsChunk.Count}");
                        continue;
                    }

                    FileObject fileObject = new FileObject($"{cloudTable.Name}.{chunkCount++}{TableExtension}", Config.CacheLocation);
                    fileObject.Stream.Write(resultsChunk.ToList());

                    _instance.TotalFilesDownloaded++;
                    IngestCallback?.Invoke(fileObject);
                }
            }
            else
            {
                _instance.TotalFilesSkipped++;
            }
        }
        private void EnumerateTableRecords(CloudTable cloudTable, string urlFilterPattern)
        {
            if (string.IsNullOrEmpty(urlFilterPattern) || Regex.IsMatch(cloudTable.Uri.ToString(), urlFilterPattern, RegexOptions.IgnoreCase))
            {
                int chunkCount = 0;

                foreach (IList <CsvTableRecord> resultsChunk in EnumerateTable(cloudTable, Constants.TableMaxResults))
                {
                    if (resultsChunk.Count < 1)
                    {
                        continue;
                    }

                    if (_config.List)
                    {
                        Log.Info($"cloudtable: {cloudTable.Name} results: {resultsChunk.Count}");
                        continue;
                    }

                    string     relativeUri = $"{_config.StartTimeUtc.Ticks}-{_config.EndTimeUtc.Ticks}-{cloudTable.Name}.{chunkCount++}{Constants.TableExtension}";
                    FileObject fileObject  = new FileObject(relativeUri, _config.CacheLocation)
                    {
                        Status = FileStatus.enumerated
                    };

                    if (_instance.FileObjects.FindByUriFirstOrDefault(relativeUri).Status == FileStatus.existing)
                    {
                        Log.Info($"{relativeUri} already exists. skipping", ConsoleColor.DarkYellow);
                        continue;
                    }

                    _instance.FileObjects.Add(fileObject);
                    resultsChunk.ToList().ForEach(x => x.RelativeUri = relativeUri);
                    fileObject.Stream.Write(resultsChunk);

                    _instance.TotalFilesDownloaded++;
                    IngestCallback?.Invoke(fileObject);
                }
            }
            else
            {
                _instance.TotalFilesSkipped++;
            }
        }
        private void InvokeCallback(IListBlobItem blob, FileObject fileObject, int sourceLength)
        {
            if (!fileObject.Exists)
            {
                BlobRequestOptions blobRequestOptions = new BlobRequestOptions()
                {
                    RetryPolicy = new IngestRetryPolicy(),
                    ParallelOperationThreadCount = _config.Threads
                };

                if (sourceLength > Constants.MaxStreamTransmitBytes)
                {
                    fileObject.DownloadAction = () =>
                    {
                        if (!Directory.Exists(Path.GetDirectoryName(fileObject.FileUri)))
                        {
                            Directory.CreateDirectory(Path.GetDirectoryName(fileObject.FileUri));
                        }

                        ((CloudBlockBlob)blob).DownloadToFileAsync(fileObject.FileUri, FileMode.Create, null, blobRequestOptions, null).Wait();
                    };
                }
                else
                {
                    fileObject.DownloadAction = () =>
                    {
                        ((CloudBlockBlob)blob).DownloadToStreamAsync(fileObject.Stream.Get(), null, blobRequestOptions, null).Wait();
                    };
                }

                _instance.TotalFilesDownloaded++;
            }
            else
            {
                Log.Warning($"destination file exists. skipping download:\r\n file: {fileObject}");
                _instance.TotalFilesSkipped++;
            }

            IngestCallback?.Invoke(fileObject);
        }
        private void QueueBlobSegmentDownload(IEnumerable <IListBlobItem> blobResults)
        {
            int parentId = Thread.CurrentThread.ManagedThreadId;

            Log.Debug($"enter. current id:{parentId}. results count: {blobResults.Count()}");
            long segmentMinDateTicks = _instance.DiscoveredMinDateTicks;
            long segmentMaxDateTicks = _instance.DiscoveredMaxDateTicks;

            foreach (IListBlobItem blob in blobResults)
            {
                ICloudBlob blobRef = null;
                Log.Debug($"parent id:{parentId} current Id:{Thread.CurrentThread.ManagedThreadId}");

                if (blob is CloudBlobDirectory)
                {
                    if (!string.IsNullOrEmpty(_config.NodeFilter) && !Regex.IsMatch(blob.Uri.ToString(), _config.NodeFilter, RegexOptions.IgnoreCase))
                    {
                        Log.Debug($"blob:{blob.Uri} does not match nodeFilter pattern:{_config.NodeFilter}, skipping...");
                        continue;
                    }

                    DownloadBlobsFromDirectory(blob as CloudBlobDirectory);
                    Log.Debug("blob is directory.");
                    continue;
                }

                _instance.TotalFilesEnumerated++;

                if (Regex.IsMatch(blob.Uri.ToString(), _fileFilterPattern, RegexOptions.IgnoreCase))
                {
                    long ticks = Convert.ToInt64(Regex.Match(blob.Uri.ToString(), _fileFilterPattern, RegexOptions.IgnoreCase).Groups[1].Value);

                    if (ticks <_config.StartTimeUtc.Ticks | ticks> _config.EndTimeUtc.Ticks)
                    {
                        _instance.TotalFilesSkipped++;
                        Log.Debug($"exclude:bloburi file ticks {new DateTime(ticks).ToString("o")} outside of time range:{blob.Uri}");

                        _instance.SetMinMaxDate(ticks);
                        continue;
                    }
                }
                else
                {
                    Log.Debug($"regex not matched: {blob.Uri.ToString()} pattern: {_fileFilterPattern}");
                }

                try
                {
                    Log.Debug($"file Blob: {blob.Uri}");
                    blobRef = blob.Container.ServiceClient.GetBlobReferenceFromServerAsync(blob.Uri).Result;
                }
                catch (StorageException se)
                {
                    _instance.TotalErrors++;
                    Log.Exception($"getting ref for {blob.Uri}, skipping. {se.Message}");
                    continue;
                }

                if (blobRef.Properties.LastModified.HasValue)
                {
                    DateTimeOffset lastModified = blobRef.Properties.LastModified.Value;
                    _instance.SetMinMaxDate(lastModified.Ticks);

                    if (!string.IsNullOrEmpty(_config.UriFilter) && !Regex.IsMatch(blob.Uri.ToString(), _config.UriFilter, RegexOptions.IgnoreCase))
                    {
                        _instance.TotalFilesSkipped++;
                        Log.Debug($"blob:{blob.Uri} does not match uriFilter pattern:{_config.UriFilter}, skipping...");
                        continue;
                    }

                    if (_config.FileType != FileTypesEnum.any &&
                        !FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath).Equals(_config.FileType))
                    {
                        _instance.TotalFilesSkipped++;
                        Log.Debug($"skipping uri with incorrect file type: {FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath)}");
                        continue;
                    }

                    if (lastModified >= _config.StartTimeUtc && lastModified <= _config.EndTimeUtc)
                    {
                        _instance.TotalFilesMatched++;

                        if (_config.List)
                        {
                            Log.Info($"listing file with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}");
                            continue;
                        }

                        FileObject fileObject = new FileObject(blob.Uri.AbsolutePath, _config.CacheLocation)
                        {
                            LastModified = lastModified,
                            Status       = FileStatus.enumerated
                        };

                        if (_instance.FileObjects.FindByUriFirstOrDefault(fileObject.RelativeUri).Status == FileStatus.existing)
                        {
                            Log.Info($"{fileObject} already exists. skipping", ConsoleColor.DarkYellow);
                            continue;
                        }

                        _instance.FileObjects.Add(fileObject);

                        if (ReturnSourceFileLink && fileObject.IsSourceFileLinkCompliant())
                        {
                            fileObject.BaseUri = _config.SasEndpointInfo.BlobEndpoint;
                            fileObject.FileUri = blob.Uri.AbsolutePath;
                            IngestCallback?.Invoke(fileObject);
                            continue;
                        }

                        Log.Info($"queueing blob with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}");
                        InvokeCallback(blob, fileObject, (int)blobRef.Properties.Length);
                    }
                    else
                    {
                        _instance.TotalFilesSkipped++;
                        Log.Debug($"exclude:bloburi {lastModified.ToString("o")} outside of time range:{blob.Uri}");

                        _instance.SetMinMaxDate(lastModified.Ticks);
                        continue;
                    }
                }
                else
                {
                    Log.Error("unable to read blob modified date", blobRef);
                    _instance.TotalErrors++;
                }
            }
        }
Пример #6
0
        private void QueueBlobSegmentDownload(BlobResultSegment blobResultSegment)
        {
            int parentId = Thread.CurrentThread.ManagedThreadId;

            Log.Debug($"enter. current id:{parentId}. results count: {blobResultSegment.Results.Count()}");
            long segmentMinDateTicks = Interlocked.Read(ref DiscoveredMinDateTicks);
            long segmentMaxDateTicks = Interlocked.Read(ref DiscoveredMaxDateTicks);

            foreach (IListBlobItem blob in blobResultSegment.Results)
            {
                ICloudBlob blobRef = null;
                Log.Debug($"parent id:{parentId} current Id:{Thread.CurrentThread.ManagedThreadId}");

                if (blob is CloudBlobDirectory)
                {
                    if (!string.IsNullOrEmpty(Config.NodeFilter) && !Regex.IsMatch(blob.Uri.ToString(), Config.NodeFilter, RegexOptions.IgnoreCase))
                    {
                        Log.Debug($"blob:{blob.Uri} does not match nodeFilter pattern:{Config.NodeFilter}, skipping...");
                        continue;
                    }

                    DownloadBlobsFromDirectory(blob as CloudBlobDirectory);
                    Log.Debug("blob is directory.");
                    continue;
                }

                Interlocked.Increment(ref TotalFilesEnumerated);

                if (Regex.IsMatch(blob.Uri.ToString(), FileFilterPattern, RegexOptions.IgnoreCase))
                {
                    long ticks = Convert.ToInt64(Regex.Match(blob.Uri.ToString(), FileFilterPattern, RegexOptions.IgnoreCase).Groups[1].Value);

                    if (ticks <Config.StartTimeUtc.Ticks | ticks> Config.EndTimeUtc.Ticks)
                    {
                        Interlocked.Increment(ref TotalFilesSkipped);
                        Log.Debug($"exclude:bloburi file ticks {new DateTime(ticks).ToString("o")} outside of time range:{blob.Uri}");

                        SetMinMaxDate(ref segmentMinDateTicks, ref segmentMaxDateTicks, ticks);
                        continue;
                    }
                }
                else
                {
                    Log.Debug($"regex not matched: {blob.Uri.ToString()} pattern: {FileFilterPattern}");
                }

                try
                {
                    Log.Debug($"file Blob: {blob.Uri}");
                    blobRef = blob.Container.ServiceClient.GetBlobReferenceFromServerAsync(blob.Uri).Result;
                }
                catch (StorageException se)
                {
                    Interlocked.Increment(ref TotalErrors);
                    Log.Exception($"getting ref for {blob.Uri}, skipping. {se.Message}");
                    continue;
                }

                if (blobRef.Properties.LastModified.HasValue)
                {
                    DateTimeOffset lastModified = blobRef.Properties.LastModified.Value;
                    SetMinMaxDate(ref segmentMinDateTicks, ref segmentMaxDateTicks, lastModified.Ticks);

                    if (!string.IsNullOrEmpty(Config.UriFilter) && !Regex.IsMatch(blob.Uri.ToString(), Config.UriFilter, RegexOptions.IgnoreCase))
                    {
                        Interlocked.Increment(ref TotalFilesSkipped);
                        Log.Debug($"blob:{blob.Uri} does not match uriFilter pattern:{Config.UriFilter}, skipping...");
                        continue;
                    }

                    if (Config.FileType != FileTypesEnum.any &&
                        !FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath).Equals(Config.FileType))
                    {
                        Interlocked.Increment(ref TotalFilesSkipped);
                        Log.Debug($"skipping uri with incorrect file type: {FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath)}");
                        continue;
                    }

                    if (lastModified >= Config.StartTimeUtc && lastModified <= Config.EndTimeUtc)
                    {
                        Interlocked.Increment(ref TotalFilesMatched);

                        if (Config.List)
                        {
                            Log.Info($"listing file with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}");
                            continue;
                        }

                        if (ReturnSourceFileLink)
                        {
                            IngestCallback?.Invoke(new FileObject(blob.Uri.AbsolutePath, Config.SasEndpointInfo.BlobEndpoint)
                            {
                                Length       = blobRef.Properties.Length,
                                LastModified = lastModified
                            });
                            continue;
                        }

                        FileObject fileObject = new FileObject(blob.Uri.AbsolutePath, Config.CacheLocation)
                        {
                            Length       = blobRef.Properties.Length,
                            LastModified = lastModified
                        };

                        Log.Info($"queueing blob with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}");
                        InvokeCallback(blob, fileObject);
                    }
                    else
                    {
                        Interlocked.Increment(ref TotalFilesSkipped);
                        Log.Debug($"exclude:bloburi {lastModified.ToString("o")} outside of time range:{blob.Uri}");

                        SetMinMaxDate(ref segmentMinDateTicks, ref segmentMaxDateTicks, lastModified.Ticks);
                        continue;
                    }
                }
                else
                {
                    Log.Error("unable to read blob modified date", blobRef);
                    TotalErrors++;
                }
            }
        }
        private void QueueBlobSegmentDownload(BlobResultSegment blobResultSegment)
        {
            int parentId = Thread.CurrentThread.ManagedThreadId;

            Log.Debug($"enter. current id:{parentId}. results count: {blobResultSegment.Results.Count()}");

            foreach (var blob in blobResultSegment.Results)
            {
                ICloudBlob blobRef = null;
                Log.Debug($"parent id:{parentId} current Id:{Thread.CurrentThread.ManagedThreadId}");

                if (blob is CloudBlobDirectory)
                {
                    if (!string.IsNullOrEmpty(Config.NodeFilter) && !Regex.IsMatch(blob.Uri.ToString(), Config.NodeFilter, RegexOptions.IgnoreCase))
                    {
                        Log.Warning($"blob:{blob.Uri} does not match nodeFilter pattern:{Config.NodeFilter}, skipping...", ConsoleColor.Yellow);
                        continue;
                    }

                    DownloadBlobsFromDirectory(blob as CloudBlobDirectory);
                    Log.Debug("blob is directory.");
                    continue;
                }

                Interlocked.Increment(ref TotalFilesEnumerated);

                if (!string.IsNullOrEmpty(Config.UriFilter) && !Regex.IsMatch(blob.Uri.ToString(), Config.UriFilter, RegexOptions.IgnoreCase))
                {
                    Interlocked.Increment(ref TotalFilesSkipped);
                    Log.Warning($"blob:{blob.Uri} does not match uriFilter pattern:{Config.UriFilter}, skipping...", ConsoleColor.Yellow);
                    continue;
                }

                if (Regex.IsMatch(blob.Uri.ToString(), FileFilterPattern, RegexOptions.IgnoreCase))
                {
                    long ticks = Convert.ToInt64(Regex.Match(blob.Uri.ToString(), FileFilterPattern, RegexOptions.IgnoreCase).Groups[1].Value);

                    if (ticks <Config.StartTimeUtc.Ticks | ticks> Config.EndTimeUtc.Ticks)
                    {
                        Interlocked.Increment(ref TotalFilesSkipped);
                        Log.Debug($"exclude:bloburi ticks outside of time range:{blob.Uri}");
                        continue;
                    }
                }

                try
                {
                    Log.Debug($"file Blob: {blob.Uri}");
                    blobRef = blob.Container.ServiceClient.GetBlobReferenceFromServerAsync(blob.Uri).Result;
                }
                catch (StorageException se)
                {
                    Interlocked.Increment(ref TotalErrors);
                    Log.Exception($"getting ref for {blob.Uri}, skipping. {se.Message}");
                    continue;
                }

                if (blobRef.Properties.LastModified.HasValue)
                {
                    DateTimeOffset lastModified = blobRef.Properties.LastModified.Value;
                    if (Config.FileType != FileTypesEnum.any && !FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath).Equals(Config.FileType))
                    {
                        Interlocked.Increment(ref TotalFilesSkipped);
                        Log.Debug($"skipping uri with incorrect file type: {FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath)}");
                        continue;
                    }

                    if (lastModified >= Config.StartTimeUtc && lastModified <= Config.EndTimeUtc)
                    {
                        Interlocked.Increment(ref TotalFilesMatched);

                        if (Config.List)
                        {
                            Log.Info($"listing file with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}");
                            continue;
                        }

                        if (ReturnSourceFileLink)
                        {
                            IngestCallback?.Invoke(new FileObject(blob.Uri.AbsolutePath, Config.SasEndpointInfo.BlobEndpoint)
                            {
                                Length = blobRef.Properties.Length
                            });
                            continue;
                        }

                        FileObject fileObject = new FileObject(blob.Uri.AbsolutePath, Config.CacheLocation);
                        Log.Info($"queueing blob with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}");

                        if (!fileObject.Exists)
                        {
                            fileObject.DownloadAction = () =>
                            {
                                ((CloudBlockBlob)blob).DownloadToStreamAsync(fileObject.Stream.Get(), null,
                                                                             new BlobRequestOptions()
                                {
                                    RetryPolicy = new IngestRetryPolicy(),
                                    ParallelOperationThreadCount = Config.Threads
                                }, null).Wait();
                            };

                            IngestCallback?.Invoke(fileObject);
                            Interlocked.Increment(ref TotalFilesDownloaded);
                        }
                        else
                        {
                            Log.Warning($"destination file exists. skipping download:\r\n file: {fileObject}");
                            IngestCallback?.Invoke(fileObject);
                        }
                    }
                }
                else
                {
                    Log.Error("unable to read blob modified date", blobRef);
                    TotalErrors++;
                }
            }
        }