private T.Task LoadZipCodesAsync(DataSourceFetche fetch) { return(ExecutePackageAsync("ZipCodes.dtsx", p => { ConfigureCommonParameters(p); })); }
private T.Task LoadInternationalClassificationDiseasesAsync(DataSourceFetche fetch) { return(ExecutePackageAsync("InternationalClassificationDiseases.dtsx", p => { ConfigureCommonParameters(p); })); }
private async Task FetchTheWebItemAsync(DataSourceFetche fetch, Uri u, HttpClientHandler handler) { FileDetails details; using (var client = Runner.HttpClientFactory.Create(handler, false)) { var resp = await client.SendAsync(new HttpRequestMessage(HttpMethod.Head, u)); details = new FileDetails(resp); } await FetchTheItemAsync(fetch, details, DataSourceFetchItem.DataSourceFetchItemTypes.Original, null, async _ => { var tfn = Stuff.FindOrigFileName(Path.Combine(Runner.TempFolderPath, details.Name)); using (var client = Runner.HttpClientFactory.Create(handler, false)) { using (var st = await client.GetStreamAsync(u)) { using (var dst = File.Create(tfn)) { await st.CopyToAsync(dst); } } } return(tfn); }); }
private T.Task LoadCmsGovAsync(DataSourceFetche fetch) { return(ExecutePackageAsync("Cmsgov.dtsx", p => { ConfigureCommonParameters(p); })); }
private T.Task LoadNationalDrugCodeAsync(DataSourceFetche fetch) { return(ExecutePackageAsync("NationalDrugCode.dtsx", p => { var c = ConfigOptions.Value?.NationalDrugCode; ConfigureCommonParameters(p); if (c?.DataUrl != null) { p.Parameters["DataUrl"].Value = c.DataUrl.ToString(); } })); }
async Task ProcessFetchAsync(DataSourceFetche fetch, DataSourceSettings.FtpSettings settings) { Requires.NonNull(settings, nameof(settings)); var cred = await Runner.Vault.GetCredentialsAsync(settings.CredentialsKeyUri); var ci = new ConnectionInfo(settings.Hostname, settings.Port, cred.Username, new PasswordAuthenticationMethod(cred.Username, cred.Password)); var expr = new Regex(settings.FilePattern ?? ".+", RegexOptions.Compiled | RegexOptions.IgnoreCase); Predicate <string> filenameMatcher = (string fn) => expr.IsMatch(fn); using (var client = new SftpClient(ci)) { client.Connect(); await Task.WhenAll(settings.FolderPaths.ConvertAll(fp => FetchFtpFolderFilesAsync(client, fetch, filenameMatcher, fp))); } }
private async Task FetchFtpFolderFilesAsync(SftpClient client, DataSourceFetche fetch, Predicate <string> filenameMatcher, string path) { if (IsAlreadyVisited(path)) { return; } client.ChangeDirectory(path); var entries = client.ListDirectory(path); await TaskWhenAllOneAtATime( entries.ConvertAll(async file => { if (file.IsDirectory) { await FetchFtpFolderFilesAsync(client, fetch, filenameMatcher, file.FullName); } else if (file.IsRegularFile) { if (IsAlreadyVisited(file.FullName)) { return; } if (!filenameMatcher(file.Name)) { return; } await FetchTheItemAsync( fetch, new FileDetails(file), DataSourceFetchItem.DataSourceFetchItemTypes.Original, null, async fd => { var fn = Stuff.GetTempFileName(Path.GetExtension(fd.FullName), Runner.TempFolderPath); using (var st = File.Create(fn)) { Trace.WriteLine($"Starting {fd.FullName} to [{fn}]"); await Task.Factory.FromAsync( client.BeginDownloadFile(fd.FullName, st, null, null, amt => Trace.WriteLine($"Downloading {fd.FullName} to [{fn}] => {amt}/{fd.Size}")), client.EndDownloadFile); Trace.WriteLine($"Finishing {fd.FullName} to [{fn}]"); } return(fn); }); } })); }
public async Task FetchAsync() { var fetch = new DataSourceFetche { DataSource = DS }; Gdb.DataSourceFetches.Add(fetch); if (DS.DataSourceSettings.IsFtp) { await ProcessFetchAsync(fetch, DS.DataSourceSettings.FTP); } else if (DS.DataSourceSettings.IsWeb) { await ProcessFetchAsync(fetch, DS.DataSourceSettings.Web); } else { throw new InvalidOperationException("Unrecognized datasource"); } await Gdb.SaveChangesAsync(); }
private async Task FetchTheItemAsync(DataSourceFetche fetch, FileDetails details, DataSourceFetchItem.DataSourceFetchItemTypes dataSourceFetchItemType, DataSourceFetchItem parentFetchItem, Func <FileDetails, Task <string> > fetchAsync) { string tfn = null; var item = new DataSourceFetchItem { DataSourceFetch = fetch, DataSourceFetchItemType = dataSourceFetchItemType, ParentDataSourceFetchItem = parentFetchItem, Size = details.Size, Name = details.Name, }; item.DataSourceFetchItemProperties.LastModifiedAtUtc = details.LastModifiedAtUtc; item.DataSourceFetchItemProperties.ContentMD5 = details.ContentMD5; item.DataSourceFetchItemProperties.ETag = details.ETag; try { Trace.WriteLine($"Checking {details.FullName} size={details.Size} LastWriteTimeUtc={details.LastModifiedAtUtc}"); var sameDataSourceReplicatedDataSourceFetchItem = FindEvidenceItems(details.CreateEvidence()).FirstOrDefault(); if (sameDataSourceReplicatedDataSourceFetchItem != null) { item.DataSourceFetchItemType = DataSourceFetchItem.DataSourceFetchItemTypes.Duplicate; item.SameDataSourceReplicatedDataSourceFetchItem = sameDataSourceReplicatedDataSourceFetchItem; return; } // Logger.LogInformation("Downloading", file.FullName, file.Length, tfn); tfn = await fetchAsync(details); using (var st = File.OpenRead(tfn)) { item.Size = st.Length; using (var muxer = new StreamMuxer(st, true)) { var p = new BlobStorageServices.FileProperties { LastModifiedAtUtc = details.LastModifiedAtUtc }; p.Metadata[BlobStorageServices.MetaKeyNames.SourcePath] = details.Folder; p.Metadata[BlobStorageServices.MetaKeyNames.SourceFullName] = details.FullName; var urns = new List <string>(); Parallel.ForEach( new[] { Hash.CommonHashAlgorithmNames.Md5, Hash.CommonHashAlgorithmNames.Sha1, Hash.CommonHashAlgorithmNames.Sha512, }, hashAlgName => { var urn = Hash.Compute(muxer.OpenRead(), hashAlgName).Urn; if (urn == null) { return; //yes... in some cases this somehow happens... } urns.Add(urn); }); if (urns.Count > 0) { p.Metadata[BlobStorageServices.MetaKeyNames.Urns] = CSV.FormatLine(urns, false); sameDataSourceReplicatedDataSourceFetchItem = FindEvidenceItems(urns).FirstOrDefault(); if (sameDataSourceReplicatedDataSourceFetchItem != null) { item.DataSourceFetchItemType = DataSourceFetchItem.DataSourceFetchItemTypes.Duplicate; item.SameDataSourceReplicatedDataSourceFetchItem = sameDataSourceReplicatedDataSourceFetchItem; return; } } var res = await BlobStorageServices.StoreStreamAsync( Runner.BlobConfig, BlobStorageServices.ContainerNames.Secure, $"{BlobRootPath}{details.Folder.Substring(1)}{details.Name}", muxer.OpenRead(), p, amt => Trace.WriteLine($"Uploading {amt}/{muxer.Length}") ); item.DataSourceFetchItemProperties = new DataSourceFetchItemProperties(); item.DataSourceFetchItemProperties.Set(p); item.Url = res.Uri.ToString(); PopulateEvidence(item); } } } catch (Exception ex) { item.DataSourceFetchItemType = DataSourceFetchItem.DataSourceFetchItemTypes.Errored; item.DataSourceFetchItemProperties.Error = new ExceptionError(ex); Trace.WriteLine(ex); } finally { if (item != null) { await GdbLocker.GoAsync(async() => { Gdb.DataSourceFetchItems.Add(item); await Gdb.SaveChangesAsync(); }); } } var ext = Path.GetExtension(details.Name).ToLower(); if (ext == ".pgp" || details.Name.ToLower().Contains(".pgp.")) { var name = details.Name; if (name.ToLower().EndsWith(".pgp")) { name = name.Left(name.Length - 4); } else if (name.ToLower().EndsWith(".pgp.asc")) { name = name.Left(name.Length - 8); } else if (name.ToLower().Contains(".pgp.")) { name = new Regex(@"\.pgp\.", RegexOptions.IgnoreCase).Replace(name, "."); } await FetchTheItemAsync( fetch, new FileDetails(details, name), DataSourceFetchItem.DataSourceFetchItemTypes.Decrypted, item, async _ => { var utfp = Path.GetTempFileName(); using (var st = File.OpenRead(tfn)) { await Runner.DecryptAsync(st, utfp); } return(utfp); } ); } else if ( MimeType.Application.Zip.DoesExtensionMatch(details.Name) && DS.DataSourceSettings.DecompressItems && dataSourceFetchItemType != DataSourceFetchItem.DataSourceFetchItemTypes.UnpackedRecompressedSingleton) { var relUnzipFolder = Path.GetFileNameWithoutExtension(details.Name); var unzipFolder = Path.Combine(Path.GetDirectoryName(tfn), relUnzipFolder); using (var st = File.OpenRead(tfn)) { using (var za = new ZipArchive(st, ZipArchiveMode.Read)) { if (za.Entries.Count < 2) { return; } } } ZipFile.ExtractToDirectory(tfn, unzipFolder); await TaskWhenAllOneAtATime( Directory.GetFiles(unzipFolder, "*.*", SearchOption.AllDirectories).ConvertAll( unzipped => { string rezipped = unzipped; bool isRezipped = false; if (!MimeType.Application.Zip.DoesExtensionMatch(unzipped)) { rezipped = unzipped + MimeType.Application.Zip.PrimaryFileExtension; using (var st = File.Create(rezipped)) { using (var za = new ZipArchive(st, ZipArchiveMode.Create)) { za.CreateEntryFromFile(unzipped, Path.GetFileName(unzipped)); } isRezipped = true; } } return(FetchTheItemAsync( fetch, new FileDetails(new FileInfo(rezipped), Path.Combine(details.Folder, relUnzipFolder)), isRezipped ? DataSourceFetchItem.DataSourceFetchItemTypes.UnpackedRecompressedSingleton : DataSourceFetchItem.DataSourceFetchItemTypes.Unpacked, item, _ => Task.FromResult(rezipped) )); })); Stuff.Noop(); } }
async Task ProcessFetchAsync(DataSourceFetche fetch, DataSourceSettings.WebSettings settings) { Requires.NonNull(settings, nameof(settings)); var cookieContainer = new CookieContainer(); var handler = new HttpClientHandler { CookieContainer = cookieContainer, UseCookies = true }; if (settings.LoginPageConfig != null) { var cred = await Runner.Vault.GetCredentialsAsync(settings.CredentialsKeyUri); var client = Runner.HttpClientFactory.Create(handler, false); using (var st = await client.GetStreamAsync(settings.LoginPageConfig.LoginPage)) { var doc = new H.HtmlDocument(); doc.Load(st); foreach (var formNode in doc.DocumentNode.SelectNodesOrEmpty("//form")) { var d = new Dictionary <string, string>(); string action = formNode.GetAttributeValue("action", settings.LoginPageConfig.LoginPage.ToString()); foreach (var inputNode in formNode.SelectNodesOrEmpty("//input|//textarea|//select")) { string val = null; var fieldName = inputNode.GetAttributeValue("name", null); if (fieldName == settings.LoginPageConfig.PasswordFieldName) { val = cred.Password; } else if (fieldName == settings.LoginPageConfig.UsernameFieldName) { val = cred.Username; } else { switch (inputNode.Name) { case "input": var inputType = inputNode.GetAttributeValue("type", "text"); if (inputType == "submit") { continue; } val = inputNode.GetAttributeValue("value", null); break; case "textarea": val = inputNode.InnerText; break; case "select": break; } } d[fieldName] = val; } if (d.ContainsKey(settings.LoginPageConfig.PasswordFieldName) && d.ContainsKey(settings.LoginPageConfig.UsernameFieldName)) { client = Runner.HttpClientFactory.Create(handler, false); var postAction = new Uri(settings.LoginPageConfig.LoginPage, action); var content = new FormUrlEncodedContent(d); await client.PostAsync(postAction, content); goto AuthenticationDone; } } throw new Exception($"Form was not there or missing fields [{settings.LoginPageConfig.UsernameFieldName}] or [{settings.LoginPageConfig.PasswordFieldName}]"); } } AuthenticationDone: await Task.WhenAll(settings.DownloadUrls.ConvertAll(u => FetchTheWebItemAsync(fetch, u, handler))); }