示例#1
0
        protected override async Task <string> GetRedirectOverrideAsync(string url, CookieAwareWebClient client, CancellationToken cancellation)
        {
            using (client.SetUserAgent("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)")) {
                var redirectTo = Reverse(Regex.Match(await client.DownloadStringTaskAsync(url), @"ysmm\s=\s'(.*?)'").Groups[1].Value);
                if (cancellation.IsCancellationRequested)
                {
                    return(redirectTo);
                }

                if (Test(redirectTo))
                {
                    redirectTo = Unwrap(await client.DownloadStringTaskAsync(redirectTo)) ?? redirectTo;
                    if (cancellation.IsCancellationRequested)
                    {
                        return(redirectTo);
                    }
                }

                using (var stream = await client.OpenReadTaskAsync(redirectTo)) {
                    if (cancellation.IsCancellationRequested)
                    {
                        return(redirectTo);
                    }
                    if (client.ResponseHeaders?.Get("Content-Type").Contains(@"text/html", StringComparison.OrdinalIgnoreCase) == true)
                    {
                        redirectTo = Unwrap((await stream.ReadAsBytesAsync()).ToUtf8String()) ?? redirectTo;
                    }
                }

                return(redirectTo);
            }

            string Unwrap(string html)
            {
                var doc = new HtmlDocument();

                doc.LoadHtml(html);
                return(doc.DocumentNode.Descendants(@"a")
                       .FirstOrDefault(x => x.InnerText.Contains(@"click"))?
                       .Attributes[@"href"]?.Value);
            }
        }
示例#2
0
文件: Client.cs 项目: discorly/OWA
        /// <summary>
        /// Gets the HTML of the specified URL
        /// </summary>
        /// <param name="url">URL of the website to access</param>
        /// <returns>The HTML of the website</returns>
        public async Task<string> GetHTML(string url)
        {
            string html = null;

            if (this.p_Cookies == null)
                this.p_Cookies = new CookieContainer();

            using (CookieAwareWebClient client = new CookieAwareWebClient(this.p_Cookies))
            {
                client.Method = "GET";

                using (Stream stream = await client.OpenReadTaskAsync(url))
                {
                    using (StreamReader reader = new StreamReader(stream))
                        html = await reader.ReadToEndAsync();
                }
            }

            return html;
        }
示例#3
0
        public override async Task <bool> PrepareAsync(CookieAwareWebClient client, CancellationToken cancellation)
        {
            Logging.Debug(Url);
            if (!Url.Contains("://drive.google.com/uc?", StringComparison.OrdinalIgnoreCase))
            {
                return(true);
            }

            // First of all, let’s see if there is an HTML-file under that link
            Logging.Debug("GET request is coming…");
            string webPageContent;

            using (client.SetAutoRedirect(false))
                using (var stream = await client.OpenReadTaskAsync(Url)) {
                    if (cancellation.IsCancellationRequested)
                    {
                        return(false);
                    }

                    // If file is freely available to download, server should redirect user to downloading
                    var location = client.ResponseHeaders?.Get("Location");
                    if (location != null)
                    {
                        Url      = location;
                        FileName = new Uri(Url, UriKind.RelativeOrAbsolute).GetQueryParam("id");
                        Logging.Debug("Download URL is ready: " + location);
                        client.LogResponseHeaders();
                        return(true);
                    }

                    Logging.Debug("Content-Type: " + client.ResponseHeaders?.Get("Content-Type"));
                    if (client.ResponseHeaders?.Get("Content-Type").Contains("text/html", StringComparison.OrdinalIgnoreCase) == false)
                    {
                        return(true);
                    }

                    // Looks like it’s a webpage, now we need to download and parse it
                    webPageContent = (await stream.ReadAsBytesAsync()).ToUtf8String();
                    if (cancellation.IsCancellationRequested)
                    {
                        return(false);
                    }

                    Logging.Debug("…done");
                }

            var doc = new HtmlDocument();

            doc.LoadHtml(webPageContent);

            var link = doc.DocumentNode.SelectSingleNode(@"//a[contains(@href, 'export=download')]").Attributes[@"href"].Value;

            if (link == null)
            {
                NonfatalError.Notify(ToolsStrings.Common_CannotDownloadFile, ToolsStrings.DirectLoader_GoogleDriveChanged);
                return(false);
            }

            Url      = @"https://drive.google.com" + HttpUtility.HtmlDecode(link);
            FileName = HttpUtility.HtmlDecode(doc.DocumentNode.SelectSingleNode(@"//span[@class='uc-name-size']/a")?.InnerText?.Trim());
            Logging.Write($"Google Drive download link: {Url}");

            try {
                var totalSize = HttpUtility.HtmlDecode(
                    doc.DocumentNode.SelectSingleNode(@"//span[@class='uc-name-size']/text()")?.InnerText?.Trim(' ', '(', ')'));
                Logging.Write($"Total size: {totalSize}");
                if (totalSize != null && LocalizationHelper.TryParseReadableSize(totalSize, null, out var size))
                {
                    Logging.Write($"Parsed size: {size} bytes");
                    TotalSize = size;
                }
            } catch (Exception e) {
                Logging.Warning(e);
            }

            if (OptionManualRedirect)
            {
                using (client.SetDebugMode(OptionDebugMode))
                    using (client.SetAutoRedirect(false)) {
                        var redirect = await client.DownloadStringTaskAsync(Url);

                        Logging.Debug(redirect);

                        if (!redirect.Contains("<TITLE>Moved Temporarily</TITLE>"))
                        {
                            NonfatalError.Notify(ToolsStrings.Common_CannotDownloadFile, ToolsStrings.DirectLoader_GoogleDriveChanged);
                            return(false);
                        }

                        var redirectMatch = Regex.Match(redirect, @"href=""([^""]+)", RegexOptions.IgnoreCase);
                        if (!redirectMatch.Success)
                        {
                            NonfatalError.Notify(ToolsStrings.Common_CannotDownloadFile, ToolsStrings.DirectLoader_GoogleDriveChanged);
                            return(false);
                        }

                        Url = HttpUtility.HtmlDecode(redirectMatch.Groups[1].Value);
                        Logging.Debug(Url);
                    }
            }

            return(true);
        }
示例#4
0
        private async Task <string> DownloadResumeSupportAsync([NotNull] CookieAwareWebClient client,
                                                               [NotNull] FlexibleLoaderGetPreferredDestinationCallback getPreferredDestination,
                                                               [CanBeNull] FlexibleLoaderReportDestinationCallback reportDestination, [CanBeNull] Func <bool> checkIfPaused,
                                                               IProgress <long> progress, CancellationToken cancellation)
        {
            // Common variables
            string filename = null, selectedDestination = null, actualFootprint = null;
            Stream remoteData = null;

            var resumeSupported = ResumeSupported;

            try {
                // Read resume-related data and remove it to avoid conflicts
                var resumeDestination             = CacheStorage.Get <string>(_keyDestination);
                var resumePartiallyLoadedFilename = CacheStorage.Get <string>(_keyPartiallyLoadedFilename);
                var resumeLastWriteDate           = CacheStorage.Get <DateTime?>(_keyLastWriteDate);
                var resumePreviousFootprint       = CacheStorage.Get <string>(_keyFootprint);
                ClearResumeData();

                // Collect known information for destination callback
                var information = FlexibleLoaderMetaInformation.FromLoader(this);

                // Opening stream to read…
                var headRequest = HeadRequestSupported && resumeDestination != null;
                using (headRequest ? client.SetMethod("HEAD") : null) {
                    Logging.Warning($"Initial request: {(headRequest ? "HEAD" : "GET")}");
                    remoteData = await client.OpenReadTaskAsync(Url);
                }

                cancellation.ThrowIfCancellationRequested();

                // Maybe we’ll be lucky enough to load the most accurate data
                if (client.ResponseHeaders != null)
                {
                    if (long.TryParse(client.ResponseHeaders[HttpResponseHeader.ContentLength] ?? "",
                                      NumberStyles.Any, CultureInfo.InvariantCulture, out var length))
                    {
                        TotalSize = information.TotalSize = length;
                    }

                    if (TryGetFileName(client.ResponseHeaders, out var fileName))
                    {
                        FileName = information.FileName = fileName;
                    }

                    // For example, Google Drive responds with “none” and yet allows to download file partially,
                    // so this header will only be checked if value is not defined.
                    if (resumeSupported == null)
                    {
                        var accept = client.ResponseHeaders[HttpResponseHeader.AcceptRanges] ?? "";
                        if (accept.Contains("bytes"))
                        {
                            resumeSupported = true;
                        }
                        else if (accept.Contains("none"))
                        {
                            resumeSupported = false;
                        }
                    }

                    client.LogResponseHeaders();
                }

                // Was the file partially loaded before?
                var partiallyLoaded = ResumeSupported != false && resumePartiallyLoadedFilename != null
                        ? new FileInfo(FileUtils.EnsureFilenameIsValid(resumePartiallyLoadedFilename)) : null;
                if (partiallyLoaded != null)
                {
                    Logging.Warning("Not finished: " + partiallyLoaded);
                }

                // Does it still exist
                if (partiallyLoaded?.Exists != true)
                {
                    Logging.Warning($"Partially downloaded file “{partiallyLoaded?.FullName}” does not exist");
                    partiallyLoaded = null;
                }

                // If so, wasn’t it changed since the last time?
                if (partiallyLoaded?.LastWriteTime > resumeLastWriteDate + TimeSpan.FromMinutes(5))
                {
                    Logging.Warning($"Partially downloaded file is newer that it should be: {partiallyLoaded.LastWriteTime}, expected: {resumeLastWriteDate}");
                    partiallyLoaded = null;
                }

                // Looks like file is partially downloaded, but let’s ensure link still leads to the same content
                actualFootprint = GetFootprint(information, client.ResponseHeaders);
                if (partiallyLoaded != null && resumePreviousFootprint != actualFootprint)
                {
                    Logging.Warning($"Footprints don’t match: {resumePreviousFootprint}≠{actualFootprint}");
                    partiallyLoaded = null;
                }

                // Let’s check where to load data, which is potentially the most actual data at this point
                var destination = getPreferredDestination(Url, information);
                selectedDestination = destination.Filename;
                if (partiallyLoaded != null && (!destination.CanResumeDownload || !FileUtils.ArePathsEqual(selectedDestination, resumeDestination)))
                {
                    Logging.Warning($"Different destination chosen: {selectedDestination} (before: {resumeDestination})");
                    partiallyLoaded = null;
                }

                // TODO: Check that header?

                // Where to write?
                // ReSharper disable once MergeConditionalExpression
                filename = partiallyLoaded != null ? partiallyLoaded.FullName : FileUtils.EnsureUnique(true, destination.Filename);
                reportDestination?.Invoke(filename);

                // Set cancellation token
                cancellation.Register(o => client.CancelAsync(), null);

                // Open write stream
                if (partiallyLoaded != null)
                {
                    var rangeFrom = partiallyLoaded.Length;
                    using (client.SetRange(new Tuple <long, long>(rangeFrom, -1))) {
                        Logging.Warning($"Trying to resume download from {rangeFrom} bytes…");

                        remoteData.Dispose();
                        remoteData = await client.OpenReadTaskAsync(Url);

                        cancellation.ThrowIfCancellationRequested();
                        client.LogResponseHeaders();

                        // It’s unknown if resume is supported or not at this point
                        if (resumeSupported == null)
                        {
                            var bytes      = new byte[16];
                            var firstBytes = await remoteData.ReadAsync(bytes, 0, bytes.Length);

                            cancellation.ThrowIfCancellationRequested();

                            if (CouldBeBeginningOfAFile(bytes))
                            {
                                using (var file = File.Create(filename)) {
                                    Logging.Warning("File beginning found, restart download");
                                    file.Write(bytes, 0, firstBytes);
                                    await CopyToAsync(remoteData, file, checkIfPaused, progress, cancellation);

                                    cancellation.ThrowIfCancellationRequested();
                                }

                                Logging.Write("Download finished");
                                return(filename);
                            }

                            rangeFrom += firstBytes;
                        }

                        using (var file = new FileStream(filename, FileMode.Append, FileAccess.Write)) {
                            await CopyToAsync(remoteData, file, checkIfPaused, new Progress <long>(v => {
                                progress?.Report(v + rangeFrom);
                            }), cancellation);

                            cancellation.ThrowIfCancellationRequested();
                        }
                    }
                }
                else
                {
                    if (headRequest)
                    {
                        Logging.Warning("Re-open request to be GET");
                        remoteData.Dispose();
                        remoteData = await client.OpenReadTaskAsync(Url);
                    }

                    using (var file = File.Create(filename)) {
                        Logging.Debug("Downloading the whole file…");
                        await CopyToAsync(remoteData, file, checkIfPaused, progress, cancellation);

                        cancellation.ThrowIfCancellationRequested();
                    }
                }

                Logging.Write("Download finished");
                return(filename);
            } catch (Exception e) when(e is WebException || e.IsCancelled())
            {
                Logging.Write("Download is interrupted! Saving details to resume later…");
                var download = filename == null ? null : new FileInfo(filename);

                if (download?.Exists == true && filename.Length > 0)
                {
                    CacheStorage.Set(_keyDestination, selectedDestination);
                    CacheStorage.Set(_keyPartiallyLoadedFilename, filename);
                    CacheStorage.Set(_keyFootprint, actualFootprint);
                    CacheStorage.Set(_keyLastWriteDate, download.LastWriteTime);
                }
                else
                {
                    ClearResumeData();
                }

                throw;
            } finally {
                remoteData?.Dispose();
            }
        }