示例#1
0
 private T.Task LoadZipCodesAsync(DataSourceFetche fetch)
 {
     return(ExecutePackageAsync("ZipCodes.dtsx", p =>
     {
         ConfigureCommonParameters(p);
     }));
 }
示例#2
0
 private T.Task LoadInternationalClassificationDiseasesAsync(DataSourceFetche fetch)
 {
     return(ExecutePackageAsync("InternationalClassificationDiseases.dtsx", p =>
     {
         ConfigureCommonParameters(p);
     }));
 }
示例#3
0
            private async Task FetchTheWebItemAsync(DataSourceFetche fetch, Uri u, HttpClientHandler handler)
            {
                FileDetails details;

                using (var client = Runner.HttpClientFactory.Create(handler, false))
                {
                    var resp = await client.SendAsync(new HttpRequestMessage(HttpMethod.Head, u));

                    details = new FileDetails(resp);
                }
                await FetchTheItemAsync(fetch, details, DataSourceFetchItem.DataSourceFetchItemTypes.Original, null, async _ =>
                {
                    var tfn = Stuff.FindOrigFileName(Path.Combine(Runner.TempFolderPath, details.Name));
                    using (var client = Runner.HttpClientFactory.Create(handler, false))
                    {
                        using (var st = await client.GetStreamAsync(u))
                        {
                            using (var dst = File.Create(tfn))
                            {
                                await st.CopyToAsync(dst);
                            }
                        }
                    }
                    return(tfn);
                });
            }
示例#4
0
 private T.Task LoadCmsGovAsync(DataSourceFetche fetch)
 {
     return(ExecutePackageAsync("Cmsgov.dtsx", p =>
     {
         ConfigureCommonParameters(p);
     }));
 }
示例#5
0
 private T.Task LoadNationalDrugCodeAsync(DataSourceFetche fetch)
 {
     return(ExecutePackageAsync("NationalDrugCode.dtsx", p =>
     {
         var c = ConfigOptions.Value?.NationalDrugCode;
         ConfigureCommonParameters(p);
         if (c?.DataUrl != null)
         {
             p.Parameters["DataUrl"].Value = c.DataUrl.ToString();
         }
     }));
 }
示例#6
0
            async Task ProcessFetchAsync(DataSourceFetche fetch, DataSourceSettings.FtpSettings settings)
            {
                Requires.NonNull(settings, nameof(settings));
                var cred = await Runner.Vault.GetCredentialsAsync(settings.CredentialsKeyUri);

                var ci   = new ConnectionInfo(settings.Hostname, settings.Port, cred.Username, new PasswordAuthenticationMethod(cred.Username, cred.Password));
                var expr = new Regex(settings.FilePattern ?? ".+", RegexOptions.Compiled | RegexOptions.IgnoreCase);
                Predicate <string> filenameMatcher = (string fn) => expr.IsMatch(fn);

                using (var client = new SftpClient(ci))
                {
                    client.Connect();
                    await Task.WhenAll(settings.FolderPaths.ConvertAll(fp => FetchFtpFolderFilesAsync(client, fetch, filenameMatcher, fp)));
                }
            }
示例#7
0
            private async Task FetchFtpFolderFilesAsync(SftpClient client, DataSourceFetche fetch, Predicate <string> filenameMatcher, string path)
            {
                if (IsAlreadyVisited(path))
                {
                    return;
                }
                client.ChangeDirectory(path);
                var entries = client.ListDirectory(path);

                await TaskWhenAllOneAtATime(
                    entries.ConvertAll(async file =>
                {
                    if (file.IsDirectory)
                    {
                        await FetchFtpFolderFilesAsync(client, fetch, filenameMatcher, file.FullName);
                    }
                    else if (file.IsRegularFile)
                    {
                        if (IsAlreadyVisited(file.FullName))
                        {
                            return;
                        }
                        if (!filenameMatcher(file.Name))
                        {
                            return;
                        }
                        await FetchTheItemAsync(
                            fetch,
                            new FileDetails(file),
                            DataSourceFetchItem.DataSourceFetchItemTypes.Original,
                            null,
                            async fd =>
                        {
                            var fn = Stuff.GetTempFileName(Path.GetExtension(fd.FullName), Runner.TempFolderPath);
                            using (var st = File.Create(fn))
                            {
                                Trace.WriteLine($"Starting {fd.FullName} to [{fn}]");
                                await Task.Factory.FromAsync(
                                    client.BeginDownloadFile(fd.FullName, st, null, null, amt => Trace.WriteLine($"Downloading {fd.FullName} to [{fn}] => {amt}/{fd.Size}")),
                                    client.EndDownloadFile);
                                Trace.WriteLine($"Finishing {fd.FullName} to [{fn}]");
                            }
                            return(fn);
                        });
                    }
                }));
            }
示例#8
0
            public async Task FetchAsync()
            {
                var fetch = new DataSourceFetche
                {
                    DataSource = DS
                };

                Gdb.DataSourceFetches.Add(fetch);
                if (DS.DataSourceSettings.IsFtp)
                {
                    await ProcessFetchAsync(fetch, DS.DataSourceSettings.FTP);
                }
                else if (DS.DataSourceSettings.IsWeb)
                {
                    await ProcessFetchAsync(fetch, DS.DataSourceSettings.Web);
                }
                else
                {
                    throw new InvalidOperationException("Unrecognized datasource");
                }
                await Gdb.SaveChangesAsync();
            }
示例#9
0
            private async Task FetchTheItemAsync(DataSourceFetche fetch, FileDetails details, DataSourceFetchItem.DataSourceFetchItemTypes dataSourceFetchItemType, DataSourceFetchItem parentFetchItem, Func <FileDetails, Task <string> > fetchAsync)
            {
                string tfn  = null;
                var    item = new DataSourceFetchItem
                {
                    DataSourceFetch           = fetch,
                    DataSourceFetchItemType   = dataSourceFetchItemType,
                    ParentDataSourceFetchItem = parentFetchItem,
                    Size = details.Size,
                    Name = details.Name,
                };

                item.DataSourceFetchItemProperties.LastModifiedAtUtc = details.LastModifiedAtUtc;
                item.DataSourceFetchItemProperties.ContentMD5        = details.ContentMD5;
                item.DataSourceFetchItemProperties.ETag = details.ETag;
                try
                {
                    Trace.WriteLine($"Checking {details.FullName} size={details.Size} LastWriteTimeUtc={details.LastModifiedAtUtc}");
                    var sameDataSourceReplicatedDataSourceFetchItem = FindEvidenceItems(details.CreateEvidence()).FirstOrDefault();
                    if (sameDataSourceReplicatedDataSourceFetchItem != null)
                    {
                        item.DataSourceFetchItemType = DataSourceFetchItem.DataSourceFetchItemTypes.Duplicate;
                        item.SameDataSourceReplicatedDataSourceFetchItem = sameDataSourceReplicatedDataSourceFetchItem;
                        return;
                    }
                    //                      Logger.LogInformation("Downloading", file.FullName, file.Length, tfn);
                    tfn = await fetchAsync(details);

                    using (var st = File.OpenRead(tfn))
                    {
                        item.Size = st.Length;
                        using (var muxer = new StreamMuxer(st, true))
                        {
                            var p = new BlobStorageServices.FileProperties
                            {
                                LastModifiedAtUtc = details.LastModifiedAtUtc
                            };
                            p.Metadata[BlobStorageServices.MetaKeyNames.SourcePath]     = details.Folder;
                            p.Metadata[BlobStorageServices.MetaKeyNames.SourceFullName] = details.FullName;
                            var urns = new List <string>();
                            Parallel.ForEach(
                                new[]
                            {
                                Hash.CommonHashAlgorithmNames.Md5,
                                Hash.CommonHashAlgorithmNames.Sha1,
                                Hash.CommonHashAlgorithmNames.Sha512,
                            },
                                hashAlgName =>
                            {
                                var urn = Hash.Compute(muxer.OpenRead(), hashAlgName).Urn;
                                if (urn == null)
                                {
                                    return;                  //yes... in some cases this somehow happens...
                                }
                                urns.Add(urn);
                            });
                            if (urns.Count > 0)
                            {
                                p.Metadata[BlobStorageServices.MetaKeyNames.Urns] = CSV.FormatLine(urns, false);
                                sameDataSourceReplicatedDataSourceFetchItem       = FindEvidenceItems(urns).FirstOrDefault();
                                if (sameDataSourceReplicatedDataSourceFetchItem != null)
                                {
                                    item.DataSourceFetchItemType = DataSourceFetchItem.DataSourceFetchItemTypes.Duplicate;
                                    item.SameDataSourceReplicatedDataSourceFetchItem = sameDataSourceReplicatedDataSourceFetchItem;
                                    return;
                                }
                            }
                            var res = await BlobStorageServices.StoreStreamAsync(
                                Runner.BlobConfig,
                                BlobStorageServices.ContainerNames.Secure,
                                $"{BlobRootPath}{details.Folder.Substring(1)}{details.Name}",
                                muxer.OpenRead(),
                                p,
                                amt => Trace.WriteLine($"Uploading {amt}/{muxer.Length}")
                                );

                            item.DataSourceFetchItemProperties = new DataSourceFetchItemProperties();
                            item.DataSourceFetchItemProperties.Set(p);
                            item.Url = res.Uri.ToString();
                            PopulateEvidence(item);
                        }
                    }
                }
                catch (Exception ex)
                {
                    item.DataSourceFetchItemType             = DataSourceFetchItem.DataSourceFetchItemTypes.Errored;
                    item.DataSourceFetchItemProperties.Error = new ExceptionError(ex);
                    Trace.WriteLine(ex);
                }
                finally
                {
                    if (item != null)
                    {
                        await GdbLocker.GoAsync(async() => {
                            Gdb.DataSourceFetchItems.Add(item);
                            await Gdb.SaveChangesAsync();
                        });
                    }
                }
                var ext = Path.GetExtension(details.Name).ToLower();

                if (ext == ".pgp" || details.Name.ToLower().Contains(".pgp."))
                {
                    var name = details.Name;
                    if (name.ToLower().EndsWith(".pgp"))
                    {
                        name = name.Left(name.Length - 4);
                    }
                    else if (name.ToLower().EndsWith(".pgp.asc"))
                    {
                        name = name.Left(name.Length - 8);
                    }
                    else if (name.ToLower().Contains(".pgp."))
                    {
                        name = new Regex(@"\.pgp\.", RegexOptions.IgnoreCase).Replace(name, ".");
                    }
                    await FetchTheItemAsync(
                        fetch,
                        new FileDetails(details, name),
                        DataSourceFetchItem.DataSourceFetchItemTypes.Decrypted,
                        item,
                        async _ =>
                    {
                        var utfp = Path.GetTempFileName();
                        using (var st = File.OpenRead(tfn))
                        {
                            await Runner.DecryptAsync(st, utfp);
                        }
                        return(utfp);
                    }
                        );
                }
                else if (
                    MimeType.Application.Zip.DoesExtensionMatch(details.Name) &&
                    DS.DataSourceSettings.DecompressItems &&
                    dataSourceFetchItemType != DataSourceFetchItem.DataSourceFetchItemTypes.UnpackedRecompressedSingleton)
                {
                    var relUnzipFolder = Path.GetFileNameWithoutExtension(details.Name);
                    var unzipFolder    = Path.Combine(Path.GetDirectoryName(tfn), relUnzipFolder);
                    using (var st = File.OpenRead(tfn))
                    {
                        using (var za = new ZipArchive(st, ZipArchiveMode.Read))
                        {
                            if (za.Entries.Count < 2)
                            {
                                return;
                            }
                        }
                    }
                    ZipFile.ExtractToDirectory(tfn, unzipFolder);
                    await TaskWhenAllOneAtATime(
                        Directory.GetFiles(unzipFolder, "*.*", SearchOption.AllDirectories).ConvertAll(
                            unzipped =>
                    {
                        string rezipped = unzipped;
                        bool isRezipped = false;
                        if (!MimeType.Application.Zip.DoesExtensionMatch(unzipped))
                        {
                            rezipped = unzipped + MimeType.Application.Zip.PrimaryFileExtension;
                            using (var st = File.Create(rezipped))
                            {
                                using (var za = new ZipArchive(st, ZipArchiveMode.Create))
                                {
                                    za.CreateEntryFromFile(unzipped, Path.GetFileName(unzipped));
                                }
                                isRezipped = true;
                            }
                        }
                        return(FetchTheItemAsync(
                                   fetch,
                                   new FileDetails(new FileInfo(rezipped), Path.Combine(details.Folder, relUnzipFolder)),
                                   isRezipped ? DataSourceFetchItem.DataSourceFetchItemTypes.UnpackedRecompressedSingleton : DataSourceFetchItem.DataSourceFetchItemTypes.Unpacked,
                                   item,
                                   _ => Task.FromResult(rezipped)
                                   ));
                    }));

                    Stuff.Noop();
                }
            }
示例#10
0
            async Task ProcessFetchAsync(DataSourceFetche fetch, DataSourceSettings.WebSettings settings)
            {
                Requires.NonNull(settings, nameof(settings));
                var cookieContainer = new CookieContainer();
                var handler         = new HttpClientHandler
                {
                    CookieContainer = cookieContainer,
                    UseCookies      = true
                };

                if (settings.LoginPageConfig != null)
                {
                    var cred = await Runner.Vault.GetCredentialsAsync(settings.CredentialsKeyUri);

                    var client = Runner.HttpClientFactory.Create(handler, false);
                    using (var st = await client.GetStreamAsync(settings.LoginPageConfig.LoginPage))
                    {
                        var doc = new H.HtmlDocument();
                        doc.Load(st);
                        foreach (var formNode in doc.DocumentNode.SelectNodesOrEmpty("//form"))
                        {
                            var    d      = new Dictionary <string, string>();
                            string action = formNode.GetAttributeValue("action", settings.LoginPageConfig.LoginPage.ToString());
                            foreach (var inputNode in formNode.SelectNodesOrEmpty("//input|//textarea|//select"))
                            {
                                string val       = null;
                                var    fieldName = inputNode.GetAttributeValue("name", null);
                                if (fieldName == settings.LoginPageConfig.PasswordFieldName)
                                {
                                    val = cred.Password;
                                }
                                else if (fieldName == settings.LoginPageConfig.UsernameFieldName)
                                {
                                    val = cred.Username;
                                }
                                else
                                {
                                    switch (inputNode.Name)
                                    {
                                    case "input":
                                        var inputType = inputNode.GetAttributeValue("type", "text");
                                        if (inputType == "submit")
                                        {
                                            continue;
                                        }
                                        val = inputNode.GetAttributeValue("value", null);
                                        break;

                                    case "textarea":
                                        val = inputNode.InnerText;
                                        break;

                                    case "select":
                                        break;
                                    }
                                }
                                d[fieldName] = val;
                            }
                            if (d.ContainsKey(settings.LoginPageConfig.PasswordFieldName) &&
                                d.ContainsKey(settings.LoginPageConfig.UsernameFieldName))
                            {
                                client = Runner.HttpClientFactory.Create(handler, false);
                                var postAction = new Uri(settings.LoginPageConfig.LoginPage, action);
                                var content    = new FormUrlEncodedContent(d);
                                await client.PostAsync(postAction, content);

                                goto AuthenticationDone;
                            }
                        }
                        throw new Exception($"Form was not there or missing fields [{settings.LoginPageConfig.UsernameFieldName}] or [{settings.LoginPageConfig.PasswordFieldName}]");
                    }
                }
AuthenticationDone:
                await Task.WhenAll(settings.DownloadUrls.ConvertAll(u => FetchTheWebItemAsync(fetch, u, handler)));
            }