public void TestFile() { string localPath = @"C:\temp\master.zip"; bool succeded = WebDataRetriever.TryDownloadFile("https://github.com/gabriele-ricci-kyklos/FantasyFootballStatistics/archive/master.zip", localPath); Assert.IsTrue(succeded); Assert.IsTrue(File.Exists(localPath)); }
private async Task <byte[]> RetrieveZipFileWithBackup(Architecture arch, string backupPath, IProgress <double> progress = null) { backupPath.AssertHasText(nameof(backupPath)); if (arch != Architecture.X86 && arch != Architecture.X64) { throw new ArgumentException($"Architecture {arch} not supported"); } string url = GetDownloadUrl(arch); string fileName = Utilities.GetFileNameFromUriString(url); Logger.Info("Downloading the zip archive"); byte[] zipArchive = null; try { zipArchive = await WebDataRetriever.DownloadFileAsync(url, progress); } catch (Exception ex) { throw new ArgumentException("An error occurred while downloading the file", ex); } finally { Logger.Info("Finished downloading the zip archive"); } string localPath = Path.Combine(backupPath, fileName); Logger.Info($"Writing the zip archive locally to {backupPath}"); File.WriteAllBytes(localPath, zipArchive); Logger.Info("The zip archive has been successfully saved locally"); return(zipArchive); }
public void TestPage() { string html = WebDataRetriever.RetrievePage("https://google.it/"); Assert.IsNotNull(html); }
public static void Regenerate( Uri root, DirectoryInfo destination, UrlRewriter urlRewriter, Func <IRewriteContent, IRewriteContent> optionalRewriteIntercepter, IEnumerable <Uri> optionalAdditionalUrlsToRetrieve = null) { if (root == null) { throw new ArgumentNullException("root"); } if (!root.IsAbsoluteUri) { throw new ArgumentException("root must be an absolute url"); } if (destination == null) { throw new ArgumentNullException("destination"); } destination.Refresh(); if (!destination.Exists) { throw new ArgumentException("The specified destination folder does not exist"); } if (urlRewriter == null) { throw new ArgumentNullException("urlRewriter"); } var webRequester = new WebDataRetriever(); IRewriteContent htmlContentRewriter = new HtmlContentRewriter(urlRewriter); if (optionalRewriteIntercepter != null) { htmlContentRewriter = optionalRewriteIntercepter(htmlContentRewriter); } IRewriteContent cssContentRewriter = new CssContentRewriter(urlRewriter); if (optionalRewriteIntercepter != null) { cssContentRewriter = optionalRewriteIntercepter(cssContentRewriter); } var processedUrls = new HashSet <Uri>(); var urlsToProcess = new HashSet <Uri> { new Uri("/", UriKind.Relative) }; if (optionalAdditionalUrlsToRetrieve != null) { foreach (var url in optionalAdditionalUrlsToRetrieve) { if (url != null) { urlsToProcess.Add(url); } } } while (urlsToProcess.Any(u => !processedUrls.Contains(u))) { foreach (var url in urlsToProcess.ToArray().Where(u => !processedUrls.Contains(u))) { var urlToRequest = new Uri(root, url); var rewrittenUrl = urlRewriter(url); if (rewrittenUrl.IsAbsoluteUri) { processedUrls.Add(url); continue; } var rewrittenUrlString = rewrittenUrl.ToString(); if (rewrittenUrlString.Contains("?")) { // If the rewritten URL still contains QueryString content then don't pull it down - presumably it varies by QueryString (if // we want to always return the same content then the URL Rewriter should have pulled the QueryString content into the page // name) processedUrls.Add(url); continue; } var pageName = rewrittenUrlString.Split('?')[0].Split('#')[0].Replace('/', '\\').TrimEnd('\\'); if (pageName == "") { pageName = "index.html"; } else { var lastUrlSegment = pageName.Split('/').Last(); if (!lastUrlSegment.Contains(".") || (lastUrlSegment.Split('.').Last().Any(c => char.IsUpper(c)))) // TODO: Hack workaround to catch "ASP.Net" and make it "ASP.Net.html" { pageName += ".html"; } } // If it's html or css content then pass it through the appropriate default content rewriter and then the optionalRewriteIntercepter // (if non-null). Otherwise just pull it as binary content, regardless of whether it's an image or a javascript file (no custom // rewriting is done in this case, the optionalRewriteIntercepter is not used even if it isn't null) IRewriteContent contentRewriter; if (pageName.EndsWith(".html", StringComparison.InvariantCultureIgnoreCase)) { contentRewriter = htmlContentRewriter; } else if (pageName.EndsWith(".css", StringComparison.InvariantCultureIgnoreCase)) { contentRewriter = cssContentRewriter; } else { contentRewriter = null; } // 2021-04-07 DWR: Most page names won't have any URL-encoded characters because they are generally formed as "URL friendly" names // but there are some, such as the Archive-by-Tag pages, that DO have characters that will be URL-encoded in the content that is // extracted now but we want the file name that's written to be the NON-encoded version pageName = HttpUtility.UrlDecode(pageName.TrimStart('\\')); var destinationFile = new FileInfo(Path.Combine(destination.FullName, pageName)); if (!destinationFile.Directory.Exists) { destinationFile.Directory.Create(); } if (contentRewriter == null) { File.WriteAllBytes( destinationFile.FullName, webRequester.GetBinary(urlToRequest) ); } else { var rewrittenContent = contentRewriter.Rewrite( webRequester.GetText(urlToRequest), urlToRequest ); File.WriteAllText( destinationFile.FullName, rewrittenContent.Content ); foreach (var urlToAdd in rewrittenContent.ReferencedRelativeUrls) { urlsToProcess.Add(urlToAdd); } } processedUrls.Add(url); } } // TODO: Have to deal with any files referenced by javascript! }