public void AddFile(FileObject fileObject) { Log.Debug("enter"); if (!CanIngest(fileObject.RelativeUri)) { Log.Warning($"file already ingested. skipping: {fileObject.RelativeUri}"); return; } if (_config.KustoUseBlobAsSource && fileObject.IsSourceFileLinkCompliant()) { IngestSingleFile(fileObject); } else { IngestMultipleFiles(_instance.FileMgr.ProcessFile(fileObject)); } }
private void IngestSingleFile(FileObject fileObject) { string blobUriWithSas = null; string ingestionMapping = SetIngestionMapping(fileObject); Tuple <string, string> nextQueues = GetNextIngestionQueue(); string ingestionQueue = nextQueues.Item1; string tempContainer = nextQueues.Item2; if (_config.KustoUseBlobAsSource && fileObject.IsSourceFileLinkCompliant()) { blobUriWithSas = $"{fileObject.FileUri}{_config.SasEndpointInfo.SasToken}"; } else { string blobName = Path.GetFileName(fileObject.FileUri); blobUriWithSas = UploadFileToBlobContainer(fileObject, tempContainer, fileObject.NodeName, blobName); } PostMessageToQueue(ingestionQueue, PrepareIngestionMessage(blobUriWithSas, fileObject.Length, ingestionMapping), fileObject); }
private void QueueBlobSegmentDownload(IEnumerable <IListBlobItem> blobResults) { int parentId = Thread.CurrentThread.ManagedThreadId; Log.Debug($"enter. current id:{parentId}. results count: {blobResults.Count()}"); long segmentMinDateTicks = _instance.DiscoveredMinDateTicks; long segmentMaxDateTicks = _instance.DiscoveredMaxDateTicks; foreach (IListBlobItem blob in blobResults) { ICloudBlob blobRef = null; Log.Debug($"parent id:{parentId} current Id:{Thread.CurrentThread.ManagedThreadId}"); if (blob is CloudBlobDirectory) { if (!string.IsNullOrEmpty(_config.NodeFilter) && !Regex.IsMatch(blob.Uri.ToString(), _config.NodeFilter, RegexOptions.IgnoreCase)) { Log.Debug($"blob:{blob.Uri} does not match nodeFilter pattern:{_config.NodeFilter}, skipping..."); continue; } DownloadBlobsFromDirectory(blob as CloudBlobDirectory); Log.Debug("blob is directory."); continue; } _instance.TotalFilesEnumerated++; if (Regex.IsMatch(blob.Uri.ToString(), _fileFilterPattern, RegexOptions.IgnoreCase)) { long ticks = Convert.ToInt64(Regex.Match(blob.Uri.ToString(), _fileFilterPattern, RegexOptions.IgnoreCase).Groups[1].Value); if (ticks <_config.StartTimeUtc.Ticks | ticks> _config.EndTimeUtc.Ticks) { _instance.TotalFilesSkipped++; Log.Debug($"exclude:bloburi file ticks {new DateTime(ticks).ToString("o")} outside of time range:{blob.Uri}"); _instance.SetMinMaxDate(ticks); continue; } } else { Log.Debug($"regex not matched: {blob.Uri.ToString()} pattern: {_fileFilterPattern}"); } try { Log.Debug($"file Blob: {blob.Uri}"); blobRef = blob.Container.ServiceClient.GetBlobReferenceFromServerAsync(blob.Uri).Result; } catch (StorageException se) { _instance.TotalErrors++; Log.Exception($"getting ref for {blob.Uri}, skipping. {se.Message}"); continue; } if (blobRef.Properties.LastModified.HasValue) { DateTimeOffset lastModified = blobRef.Properties.LastModified.Value; _instance.SetMinMaxDate(lastModified.Ticks); if (!string.IsNullOrEmpty(_config.UriFilter) && !Regex.IsMatch(blob.Uri.ToString(), _config.UriFilter, RegexOptions.IgnoreCase)) { _instance.TotalFilesSkipped++; Log.Debug($"blob:{blob.Uri} does not match uriFilter pattern:{_config.UriFilter}, skipping..."); continue; } if (_config.FileType != FileTypesEnum.any && !FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath).Equals(_config.FileType)) { _instance.TotalFilesSkipped++; Log.Debug($"skipping uri with incorrect file type: {FileTypes.MapFileTypeUri(blob.Uri.AbsolutePath)}"); continue; } if (lastModified >= _config.StartTimeUtc && lastModified <= _config.EndTimeUtc) { _instance.TotalFilesMatched++; if (_config.List) { Log.Info($"listing file with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}"); continue; } FileObject fileObject = new FileObject(blob.Uri.AbsolutePath, _config.CacheLocation) { LastModified = lastModified, Status = FileStatus.enumerated }; if (_instance.FileObjects.FindByUriFirstOrDefault(fileObject.RelativeUri).Status == FileStatus.existing) { Log.Info($"{fileObject} already exists. skipping", ConsoleColor.DarkYellow); continue; } _instance.FileObjects.Add(fileObject); if (ReturnSourceFileLink && fileObject.IsSourceFileLinkCompliant()) { fileObject.BaseUri = _config.SasEndpointInfo.BlobEndpoint; fileObject.FileUri = blob.Uri.AbsolutePath; IngestCallback?.Invoke(fileObject); continue; } Log.Info($"queueing blob with timestamp: {lastModified}\r\n file: {blob.Uri.AbsolutePath}"); InvokeCallback(blob, fileObject, (int)blobRef.Properties.Length); } else { _instance.TotalFilesSkipped++; Log.Debug($"exclude:bloburi {lastModified.ToString("o")} outside of time range:{blob.Uri}"); _instance.SetMinMaxDate(lastModified.Ticks); continue; } } else { Log.Error("unable to read blob modified date", blobRef); _instance.TotalErrors++; } } }