Esempio n. 1
0
        public void AddFile(FileObject fileObject)
        {
            Log.Debug("enter");

            if (!CanIngest(fileObject.RelativeUri))
            {
                Log.Warning($"file already ingested. skipping: {fileObject.RelativeUri}");
                return;
            }

            if (_config.KustoUseBlobAsSource && fileObject.IsSourceFileLinkCompliant())
            {
                IngestSingleFile(fileObject);
            }
            else
            {
                IngestMultipleFiles(_instance.FileMgr.ProcessFile(fileObject));
            }
        }
Esempio n. 2
0
        private void IngestSingleFile(FileObject fileObject)
        {
            string blobUriWithSas             = null;
            string ingestionMapping           = SetIngestionMapping(fileObject);
            Tuple <string, string> nextQueues = GetNextIngestionQueue();
            string ingestionQueue             = nextQueues.Item1;
            string tempContainer = nextQueues.Item2;

            if (_config.KustoUseBlobAsSource && fileObject.IsSourceFileLinkCompliant())
            {
                blobUriWithSas = $"{fileObject.FileUri}{_config.SasEndpointInfo.SasToken}";
            }
            else
            {
                string blobName = Path.GetFileName(fileObject.FileUri);
                blobUriWithSas = UploadFileToBlobContainer(fileObject, tempContainer, fileObject.NodeName, blobName);
            }

            PostMessageToQueue(ingestionQueue, PrepareIngestionMessage(blobUriWithSas, fileObject.Length, ingestionMapping), fileObject);
        }
        private void QueueBlobSegmentDownload(IEnumerable <IListBlobItem> blobResults)
        {
            int parentId = Thread.CurrentThread.ManagedThreadId;

            Log.Debug($"enter. current id:{parentId}. results count: {blobResults.Count()}");
            long segmentMinDateTicks = _instance.DiscoveredMinDateTicks;
            long segmentMaxDateTicks = _instance.DiscoveredMaxDateTicks;

            foreach (IListBlobItem blob in blobResults)
            {
                ICloudBlob blobRef = null;
                Log.Debug($"parent id:{parentId} current Id:{Thread.CurrentThread.ManagedThreadId}");

                if (blob is CloudBlobDirectory)
                {
                    if (!string.IsNullOrEmpty(_config.NodeFilter) && !Regex.IsMatch(blob.Uri.ToString(), _config.NodeFilter, RegexOptions.IgnoreCase))
                    {
                        Log.Debug($"blob:{blob.Uri} does not match nodeFilter pattern:{_config.NodeFilter}, skipping...");
                        continue;
                    }

                    DownloadBlobsFromDirectory(blob as CloudBlobDirectory);
                    Log.Debug("blob is directory.");
                    continue;
                }

                _instance.TotalFilesEnumerated++;

                if (Regex.IsMatch(blob.Uri.ToString(), _fileFilterPattern, RegexOptions.IgnoreCase))
                {
                    long ticks = Convert.ToInt64(Regex.Match(blob.Uri.ToString(), _fileFilterPattern, RegexOptions.IgnoreCase).Groups[1].Value);

                    if (ticks <_config.StartTimeUtc.Ticks | ticks> _config.EndTimeUtc.Ticks)
                    {
                        _instance.TotalFilesSkipped++;
                        Log.Debug($"exclude:bloburi file ticks {new DateTime(ticks).ToString("o")} outside of time range:{blob.Uri}");

                        _instance.SetMinMaxDate(ticks);
                        continue;
                    }
                }
                else
                {
                    Log.Debug($"regex not matched: {blob.Uri.ToString()} pattern: {_fileFilterPattern}");
                }

                try
                {
                    Log.Debug($"file Blob: {blob.Uri}");
                    blobRef = blob.Container.ServiceClient.GetBlobReferenceFromServerAsync(blob.Uri).Result;
                }
                catch (StorageException se)
                {
                    _instance.TotalErrors++;
                    Log.Exception($"getting ref for {blob.Uri}, skipping. {se.Message}");
                    continue;
                }

                if (blobRef.Properties.LastModified.HasValue)
                {
                    DateTimeOffset lastModified = blobRef.Properties.LastModified.Value;
                    _instance.SetMinMaxDate(lastModified.Ticks);

                    if (!string.IsNullOrEmpty(_config.UriFilter) && !Regex.IsMatch(blob.Uri.ToString(), _config.UriFilter, RegexOptions.IgnoreCase))
                    {
                        _instance.TotalFilesSkipped++;
                        Log.Debug($"blob:{blob.Uri} does not match uriFilter pattern:{_config.UriFilter}, skipping...");
                        continue;
                    }

                    if (_config.FileType != FileTypesEnum.any &&
                        !FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath).Equals(_config.FileType))
                    {
                        _instance.TotalFilesSkipped++;
                        Log.Debug($"skipping uri with incorrect file type: {FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath)}");
                        continue;
                    }

                    if (lastModified >= _config.StartTimeUtc && lastModified <= _config.EndTimeUtc)
                    {
                        _instance.TotalFilesMatched++;

                        if (_config.List)
                        {
                            Log.Info($"listing file with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}");
                            continue;
                        }

                        FileObject fileObject = new FileObject(blob.Uri.AbsolutePath, _config.CacheLocation)
                        {
                            LastModified = lastModified,
                            Status       = FileStatus.enumerated
                        };

                        if (_instance.FileObjects.FindByUriFirstOrDefault(fileObject.RelativeUri).Status == FileStatus.existing)
                        {
                            Log.Info($"{fileObject} already exists. skipping", ConsoleColor.DarkYellow);
                            continue;
                        }

                        _instance.FileObjects.Add(fileObject);

                        if (ReturnSourceFileLink && fileObject.IsSourceFileLinkCompliant())
                        {
                            fileObject.BaseUri = _config.SasEndpointInfo.BlobEndpoint;
                            fileObject.FileUri = blob.Uri.AbsolutePath;
                            IngestCallback?.Invoke(fileObject);
                            continue;
                        }

                        Log.Info($"queueing blob with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}");
                        InvokeCallback(blob, fileObject, (int)blobRef.Properties.Length);
                    }
                    else
                    {
                        _instance.TotalFilesSkipped++;
                        Log.Debug($"exclude:bloburi {lastModified.ToString("o")} outside of time range:{blob.Uri}");

                        _instance.SetMinMaxDate(lastModified.Ticks);
                        continue;
                    }
                }
                else
                {
                    Log.Error("unable to read blob modified date", blobRef);
                    _instance.TotalErrors++;
                }
            }
        }