protected override async Task <string> GetRedirectOverrideAsync(string url, CookieAwareWebClient client, CancellationToken cancellation) { using (client.SetUserAgent("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)")) { var redirectTo = Reverse(Regex.Match(await client.DownloadStringTaskAsync(url), @"ysmm\s=\s'(.*?)'").Groups[1].Value); if (cancellation.IsCancellationRequested) { return(redirectTo); } if (Test(redirectTo)) { redirectTo = Unwrap(await client.DownloadStringTaskAsync(redirectTo)) ?? redirectTo; if (cancellation.IsCancellationRequested) { return(redirectTo); } } using (var stream = await client.OpenReadTaskAsync(redirectTo)) { if (cancellation.IsCancellationRequested) { return(redirectTo); } if (client.ResponseHeaders?.Get("Content-Type").Contains(@"text/html", StringComparison.OrdinalIgnoreCase) == true) { redirectTo = Unwrap((await stream.ReadAsBytesAsync()).ToUtf8String()) ?? redirectTo; } } return(redirectTo); } string Unwrap(string html) { var doc = new HtmlDocument(); doc.LoadHtml(html); return(doc.DocumentNode.Descendants(@"a") .FirstOrDefault(x => x.InnerText.Contains(@"click"))? .Attributes[@"href"]?.Value); } }
/// <summary> /// Gets the HTML of the specified URL /// </summary> /// <param name="url">URL of the website to access</param> /// <returns>The HTML of the website</returns> public async Task<string> GetHTML(string url) { string html = null; if (this.p_Cookies == null) this.p_Cookies = new CookieContainer(); using (CookieAwareWebClient client = new CookieAwareWebClient(this.p_Cookies)) { client.Method = "GET"; using (Stream stream = await client.OpenReadTaskAsync(url)) { using (StreamReader reader = new StreamReader(stream)) html = await reader.ReadToEndAsync(); } } return html; }
public override async Task <bool> PrepareAsync(CookieAwareWebClient client, CancellationToken cancellation) { Logging.Debug(Url); if (!Url.Contains("://drive.google.com/uc?", StringComparison.OrdinalIgnoreCase)) { return(true); } // First of all, let’s see if there is an HTML-file under that link Logging.Debug("GET request is coming…"); string webPageContent; using (client.SetAutoRedirect(false)) using (var stream = await client.OpenReadTaskAsync(Url)) { if (cancellation.IsCancellationRequested) { return(false); } // If file is freely available to download, server should redirect user to downloading var location = client.ResponseHeaders?.Get("Location"); if (location != null) { Url = location; FileName = new Uri(Url, UriKind.RelativeOrAbsolute).GetQueryParam("id"); Logging.Debug("Download URL is ready: " + location); client.LogResponseHeaders(); return(true); } Logging.Debug("Content-Type: " + client.ResponseHeaders?.Get("Content-Type")); if (client.ResponseHeaders?.Get("Content-Type").Contains("text/html", StringComparison.OrdinalIgnoreCase) == false) { return(true); } // Looks like it’s a webpage, now we need to download and parse it webPageContent = (await stream.ReadAsBytesAsync()).ToUtf8String(); if (cancellation.IsCancellationRequested) { return(false); } Logging.Debug("…done"); } var doc = new HtmlDocument(); doc.LoadHtml(webPageContent); var link = doc.DocumentNode.SelectSingleNode(@"//a[contains(@href, 'export=download')]").Attributes[@"href"].Value; if (link == null) { NonfatalError.Notify(ToolsStrings.Common_CannotDownloadFile, ToolsStrings.DirectLoader_GoogleDriveChanged); return(false); } Url = @"https://drive.google.com" + HttpUtility.HtmlDecode(link); FileName = HttpUtility.HtmlDecode(doc.DocumentNode.SelectSingleNode(@"//span[@class='uc-name-size']/a")?.InnerText?.Trim()); Logging.Write($"Google Drive download link: {Url}"); try { var totalSize = HttpUtility.HtmlDecode( doc.DocumentNode.SelectSingleNode(@"//span[@class='uc-name-size']/text()")?.InnerText?.Trim(' ', '(', ')')); Logging.Write($"Total size: {totalSize}"); if (totalSize != null && LocalizationHelper.TryParseReadableSize(totalSize, null, out var size)) { Logging.Write($"Parsed size: {size} bytes"); TotalSize = size; } } catch (Exception e) { Logging.Warning(e); } if (OptionManualRedirect) { using (client.SetDebugMode(OptionDebugMode)) using (client.SetAutoRedirect(false)) { var redirect = await client.DownloadStringTaskAsync(Url); Logging.Debug(redirect); if (!redirect.Contains("<TITLE>Moved Temporarily</TITLE>")) { NonfatalError.Notify(ToolsStrings.Common_CannotDownloadFile, ToolsStrings.DirectLoader_GoogleDriveChanged); return(false); } var redirectMatch = Regex.Match(redirect, @"href=""([^""]+)", RegexOptions.IgnoreCase); if (!redirectMatch.Success) { NonfatalError.Notify(ToolsStrings.Common_CannotDownloadFile, ToolsStrings.DirectLoader_GoogleDriveChanged); return(false); } Url = HttpUtility.HtmlDecode(redirectMatch.Groups[1].Value); Logging.Debug(Url); } } return(true); }
private async Task <string> DownloadResumeSupportAsync([NotNull] CookieAwareWebClient client, [NotNull] FlexibleLoaderGetPreferredDestinationCallback getPreferredDestination, [CanBeNull] FlexibleLoaderReportDestinationCallback reportDestination, [CanBeNull] Func <bool> checkIfPaused, IProgress <long> progress, CancellationToken cancellation) { // Common variables string filename = null, selectedDestination = null, actualFootprint = null; Stream remoteData = null; var resumeSupported = ResumeSupported; try { // Read resume-related data and remove it to avoid conflicts var resumeDestination = CacheStorage.Get <string>(_keyDestination); var resumePartiallyLoadedFilename = CacheStorage.Get <string>(_keyPartiallyLoadedFilename); var resumeLastWriteDate = CacheStorage.Get <DateTime?>(_keyLastWriteDate); var resumePreviousFootprint = CacheStorage.Get <string>(_keyFootprint); ClearResumeData(); // Collect known information for destination callback var information = FlexibleLoaderMetaInformation.FromLoader(this); // Opening stream to read… var headRequest = HeadRequestSupported && resumeDestination != null; using (headRequest ? client.SetMethod("HEAD") : null) { Logging.Warning($"Initial request: {(headRequest ? "HEAD" : "GET")}"); remoteData = await client.OpenReadTaskAsync(Url); } cancellation.ThrowIfCancellationRequested(); // Maybe we’ll be lucky enough to load the most accurate data if (client.ResponseHeaders != null) { if (long.TryParse(client.ResponseHeaders[HttpResponseHeader.ContentLength] ?? "", NumberStyles.Any, CultureInfo.InvariantCulture, out var length)) { TotalSize = information.TotalSize = length; } if (TryGetFileName(client.ResponseHeaders, out var fileName)) { FileName = information.FileName = fileName; } // For example, Google Drive responds with “none” and yet allows to download file partially, // so this header will only be checked if value is not defined. if (resumeSupported == null) { var accept = client.ResponseHeaders[HttpResponseHeader.AcceptRanges] ?? ""; if (accept.Contains("bytes")) { resumeSupported = true; } else if (accept.Contains("none")) { resumeSupported = false; } } client.LogResponseHeaders(); } // Was the file partially loaded before? var partiallyLoaded = ResumeSupported != false && resumePartiallyLoadedFilename != null ? new FileInfo(FileUtils.EnsureFilenameIsValid(resumePartiallyLoadedFilename)) : null; if (partiallyLoaded != null) { Logging.Warning("Not finished: " + partiallyLoaded); } // Does it still exist if (partiallyLoaded?.Exists != true) { Logging.Warning($"Partially downloaded file “{partiallyLoaded?.FullName}” does not exist"); partiallyLoaded = null; } // If so, wasn’t it changed since the last time? if (partiallyLoaded?.LastWriteTime > resumeLastWriteDate + TimeSpan.FromMinutes(5)) { Logging.Warning($"Partially downloaded file is newer that it should be: {partiallyLoaded.LastWriteTime}, expected: {resumeLastWriteDate}"); partiallyLoaded = null; } // Looks like file is partially downloaded, but let’s ensure link still leads to the same content actualFootprint = GetFootprint(information, client.ResponseHeaders); if (partiallyLoaded != null && resumePreviousFootprint != actualFootprint) { Logging.Warning($"Footprints don’t match: {resumePreviousFootprint}≠{actualFootprint}"); partiallyLoaded = null; } // Let’s check where to load data, which is potentially the most actual data at this point var destination = getPreferredDestination(Url, information); selectedDestination = destination.Filename; if (partiallyLoaded != null && (!destination.CanResumeDownload || !FileUtils.ArePathsEqual(selectedDestination, resumeDestination))) { Logging.Warning($"Different destination chosen: {selectedDestination} (before: {resumeDestination})"); partiallyLoaded = null; } // TODO: Check that header? // Where to write? // ReSharper disable once MergeConditionalExpression filename = partiallyLoaded != null ? partiallyLoaded.FullName : FileUtils.EnsureUnique(true, destination.Filename); reportDestination?.Invoke(filename); // Set cancellation token cancellation.Register(o => client.CancelAsync(), null); // Open write stream if (partiallyLoaded != null) { var rangeFrom = partiallyLoaded.Length; using (client.SetRange(new Tuple <long, long>(rangeFrom, -1))) { Logging.Warning($"Trying to resume download from {rangeFrom} bytes…"); remoteData.Dispose(); remoteData = await client.OpenReadTaskAsync(Url); cancellation.ThrowIfCancellationRequested(); client.LogResponseHeaders(); // It’s unknown if resume is supported or not at this point if (resumeSupported == null) { var bytes = new byte[16]; var firstBytes = await remoteData.ReadAsync(bytes, 0, bytes.Length); cancellation.ThrowIfCancellationRequested(); if (CouldBeBeginningOfAFile(bytes)) { using (var file = File.Create(filename)) { Logging.Warning("File beginning found, restart download"); file.Write(bytes, 0, firstBytes); await CopyToAsync(remoteData, file, checkIfPaused, progress, cancellation); cancellation.ThrowIfCancellationRequested(); } Logging.Write("Download finished"); return(filename); } rangeFrom += firstBytes; } using (var file = new FileStream(filename, FileMode.Append, FileAccess.Write)) { await CopyToAsync(remoteData, file, checkIfPaused, new Progress <long>(v => { progress?.Report(v + rangeFrom); }), cancellation); cancellation.ThrowIfCancellationRequested(); } } } else { if (headRequest) { Logging.Warning("Re-open request to be GET"); remoteData.Dispose(); remoteData = await client.OpenReadTaskAsync(Url); } using (var file = File.Create(filename)) { Logging.Debug("Downloading the whole file…"); await CopyToAsync(remoteData, file, checkIfPaused, progress, cancellation); cancellation.ThrowIfCancellationRequested(); } } Logging.Write("Download finished"); return(filename); } catch (Exception e) when(e is WebException || e.IsCancelled()) { Logging.Write("Download is interrupted! Saving details to resume later…"); var download = filename == null ? null : new FileInfo(filename); if (download?.Exists == true && filename.Length > 0) { CacheStorage.Set(_keyDestination, selectedDestination); CacheStorage.Set(_keyPartiallyLoadedFilename, filename); CacheStorage.Set(_keyFootprint, actualFootprint); CacheStorage.Set(_keyLastWriteDate, download.LastWriteTime); } else { ClearResumeData(); } throw; } finally { remoteData?.Dispose(); } }