Ejemplo n.º 1
0
        private void DownloadAzureData()
        {
            string containerPrefix = null;
            string tablePrefix     = null;
            string clusterId       = DetermineClusterId();

            if (!Config.FileType.Equals(FileTypesEnum.any))
            {
                containerPrefix = FileTypes.MapFileTypeUriPrefix(Config.FileType);
                tablePrefix     = containerPrefix + clusterId.Replace("-", "");
            }

            if (!string.IsNullOrEmpty(clusterId))
            {
                // 's-' in prefix may not always be correct
                containerPrefix += "s-" + clusterId;
            }

            if (Config.FileType == FileTypesEnum.table)
            {
                TableManager tableMgr = new TableManager()
                {
                    IngestCallback = (exportedFile) => { QueueForIngest(exportedFile); }
                };

                if (tableMgr.Connect())
                {
                    tableMgr.DownloadTables(tablePrefix);
                }
            }
            else
            {
                BlobManager blobMgr = new BlobManager()
                {
                    IngestCallback       = (sourceFileUri) => { QueueForIngest(sourceFileUri); },
                    ReturnSourceFileLink = Config.IsKustoConfigured() & Config.KustoUseBlobAsSource
                };

                if (blobMgr.Connect())
                {
                    blobMgr.DownloadContainers(containerPrefix);
                }
            }
        }
Ejemplo n.º 2
0
        private void QueueBlobSegmentDownload(BlobResultSegment blobResultSegment)
        {
            int parentId = Thread.CurrentThread.ManagedThreadId;

            Log.Debug($"enter. current id:{parentId}. results count: {blobResultSegment.Results.Count()}");
            long segmentMinDateTicks = Interlocked.Read(ref DiscoveredMinDateTicks);
            long segmentMaxDateTicks = Interlocked.Read(ref DiscoveredMaxDateTicks);

            foreach (IListBlobItem blob in blobResultSegment.Results)
            {
                ICloudBlob blobRef = null;
                Log.Debug($"parent id:{parentId} current Id:{Thread.CurrentThread.ManagedThreadId}");

                if (blob is CloudBlobDirectory)
                {
                    if (!string.IsNullOrEmpty(Config.NodeFilter) && !Regex.IsMatch(blob.Uri.ToString(), Config.NodeFilter, RegexOptions.IgnoreCase))
                    {
                        Log.Debug($"blob:{blob.Uri} does not match nodeFilter pattern:{Config.NodeFilter}, skipping...");
                        continue;
                    }

                    DownloadBlobsFromDirectory(blob as CloudBlobDirectory);
                    Log.Debug("blob is directory.");
                    continue;
                }

                Interlocked.Increment(ref TotalFilesEnumerated);

                if (Regex.IsMatch(blob.Uri.ToString(), FileFilterPattern, RegexOptions.IgnoreCase))
                {
                    long ticks = Convert.ToInt64(Regex.Match(blob.Uri.ToString(), FileFilterPattern, RegexOptions.IgnoreCase).Groups[1].Value);

                    if (ticks <Config.StartTimeUtc.Ticks | ticks> Config.EndTimeUtc.Ticks)
                    {
                        Interlocked.Increment(ref TotalFilesSkipped);
                        Log.Debug($"exclude:bloburi file ticks {new DateTime(ticks).ToString("o")} outside of time range:{blob.Uri}");

                        SetMinMaxDate(ref segmentMinDateTicks, ref segmentMaxDateTicks, ticks);
                        continue;
                    }
                }
                else
                {
                    Log.Debug($"regex not matched: {blob.Uri.ToString()} pattern: {FileFilterPattern}");
                }

                try
                {
                    Log.Debug($"file Blob: {blob.Uri}");
                    blobRef = blob.Container.ServiceClient.GetBlobReferenceFromServerAsync(blob.Uri).Result;
                }
                catch (StorageException se)
                {
                    Interlocked.Increment(ref TotalErrors);
                    Log.Exception($"getting ref for {blob.Uri}, skipping. {se.Message}");
                    continue;
                }

                if (blobRef.Properties.LastModified.HasValue)
                {
                    DateTimeOffset lastModified = blobRef.Properties.LastModified.Value;
                    SetMinMaxDate(ref segmentMinDateTicks, ref segmentMaxDateTicks, lastModified.Ticks);

                    if (!string.IsNullOrEmpty(Config.UriFilter) && !Regex.IsMatch(blob.Uri.ToString(), Config.UriFilter, RegexOptions.IgnoreCase))
                    {
                        Interlocked.Increment(ref TotalFilesSkipped);
                        Log.Debug($"blob:{blob.Uri} does not match uriFilter pattern:{Config.UriFilter}, skipping...");
                        continue;
                    }

                    if (Config.FileType != FileTypesEnum.any &&
                        !FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath).Equals(Config.FileType))
                    {
                        Interlocked.Increment(ref TotalFilesSkipped);
                        Log.Debug($"skipping uri with incorrect file type: {FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath)}");
                        continue;
                    }

                    if (lastModified >= Config.StartTimeUtc && lastModified <= Config.EndTimeUtc)
                    {
                        Interlocked.Increment(ref TotalFilesMatched);

                        if (Config.List)
                        {
                            Log.Info($"listing file with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}");
                            continue;
                        }

                        if (ReturnSourceFileLink)
                        {
                            IngestCallback?.Invoke(new FileObject(blob.Uri.AbsolutePath, Config.SasEndpointInfo.BlobEndpoint)
                            {
                                Length       = blobRef.Properties.Length,
                                LastModified = lastModified
                            });
                            continue;
                        }

                        FileObject fileObject = new FileObject(blob.Uri.AbsolutePath, Config.CacheLocation)
                        {
                            Length       = blobRef.Properties.Length,
                            LastModified = lastModified
                        };

                        Log.Info($"queueing blob with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}");
                        InvokeCallback(blob, fileObject);
                    }
                    else
                    {
                        Interlocked.Increment(ref TotalFilesSkipped);
                        Log.Debug($"exclude:bloburi {lastModified.ToString("o")} outside of time range:{blob.Uri}");

                        SetMinMaxDate(ref segmentMinDateTicks, ref segmentMaxDateTicks, lastModified.Ticks);
                        continue;
                    }
                }
                else
                {
                    Log.Error("unable to read blob modified date", blobRef);
                    TotalErrors++;
                }
            }
        }
        private void QueueBlobSegmentDownload(BlobResultSegment blobResultSegment)
        {
            int parentId = Thread.CurrentThread.ManagedThreadId;

            Log.Debug($"enter. current id:{parentId}. results count: {blobResultSegment.Results.Count()}");

            foreach (var blob in blobResultSegment.Results)
            {
                ICloudBlob blobRef = null;
                Log.Debug($"parent id:{parentId} current Id:{Thread.CurrentThread.ManagedThreadId}");

                if (blob is CloudBlobDirectory)
                {
                    if (!string.IsNullOrEmpty(Config.NodeFilter) && !Regex.IsMatch(blob.Uri.ToString(), Config.NodeFilter, RegexOptions.IgnoreCase))
                    {
                        Log.Warning($"blob:{blob.Uri} does not match nodeFilter pattern:{Config.NodeFilter}, skipping...", ConsoleColor.Yellow);
                        continue;
                    }

                    DownloadBlobsFromDirectory(blob as CloudBlobDirectory);
                    Log.Debug("blob is directory.");
                    continue;
                }

                Interlocked.Increment(ref TotalFilesEnumerated);

                if (!string.IsNullOrEmpty(Config.UriFilter) && !Regex.IsMatch(blob.Uri.ToString(), Config.UriFilter, RegexOptions.IgnoreCase))
                {
                    Interlocked.Increment(ref TotalFilesSkipped);
                    Log.Warning($"blob:{blob.Uri} does not match uriFilter pattern:{Config.UriFilter}, skipping...", ConsoleColor.Yellow);
                    continue;
                }

                if (Regex.IsMatch(blob.Uri.ToString(), FileFilterPattern, RegexOptions.IgnoreCase))
                {
                    long ticks = Convert.ToInt64(Regex.Match(blob.Uri.ToString(), FileFilterPattern, RegexOptions.IgnoreCase).Groups[1].Value);

                    if (ticks <Config.StartTimeUtc.Ticks | ticks> Config.EndTimeUtc.Ticks)
                    {
                        Interlocked.Increment(ref TotalFilesSkipped);
                        Log.Debug($"exclude:bloburi ticks outside of time range:{blob.Uri}");
                        continue;
                    }
                }

                try
                {
                    Log.Debug($"file Blob: {blob.Uri}");
                    blobRef = blob.Container.ServiceClient.GetBlobReferenceFromServerAsync(blob.Uri).Result;
                }
                catch (StorageException se)
                {
                    Interlocked.Increment(ref TotalErrors);
                    Log.Exception($"getting ref for {blob.Uri}, skipping. {se.Message}");
                    continue;
                }

                if (blobRef.Properties.LastModified.HasValue)
                {
                    DateTimeOffset lastModified = blobRef.Properties.LastModified.Value;
                    if (Config.FileType != FileTypesEnum.any && !FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath).Equals(Config.FileType))
                    {
                        Interlocked.Increment(ref TotalFilesSkipped);
                        Log.Debug($"skipping uri with incorrect file type: {FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath)}");
                        continue;
                    }

                    if (lastModified >= Config.StartTimeUtc && lastModified <= Config.EndTimeUtc)
                    {
                        Interlocked.Increment(ref TotalFilesMatched);

                        if (Config.List)
                        {
                            Log.Info($"listing file with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}");
                            continue;
                        }

                        if (ReturnSourceFileLink)
                        {
                            IngestCallback?.Invoke(new FileObject(blob.Uri.AbsolutePath, Config.SasEndpointInfo.BlobEndpoint)
                            {
                                Length = blobRef.Properties.Length
                            });
                            continue;
                        }

                        FileObject fileObject = new FileObject(blob.Uri.AbsolutePath, Config.CacheLocation);
                        Log.Info($"queueing blob with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}");

                        if (!fileObject.Exists)
                        {
                            fileObject.DownloadAction = () =>
                            {
                                ((CloudBlockBlob)blob).DownloadToStreamAsync(fileObject.Stream.Get(), null,
                                                                             new BlobRequestOptions()
                                {
                                    RetryPolicy = new IngestRetryPolicy(),
                                    ParallelOperationThreadCount = Config.Threads
                                }, null).Wait();
                            };

                            IngestCallback?.Invoke(fileObject);
                            Interlocked.Increment(ref TotalFilesDownloaded);
                        }
                        else
                        {
                            Log.Warning($"destination file exists. skipping download:\r\n file: {fileObject}");
                            IngestCallback?.Invoke(fileObject);
                        }
                    }
                }
                else
                {
                    Log.Error("unable to read blob modified date", blobRef);
                    TotalErrors++;
                }
            }
        }