public void AppendCookies(IsolatedCookieContainer cookies) { AppendCookies(cookies.Cookies); }
private static async Task <string> DownloadAttemptAsync(Uri url, bool force, IProgress <DataTransferProgress> progress) { var originalUrl = url; string hash; using (var sha1 = new System.Security.Cryptography.SHA1Cng()) { hash = ToHex(sha1.ComputeHash(Encoding.UTF8.GetBytes(url.AbsoluteUri))).ToLower().Substring(0, 16); } lock (typeof(Paper)) { if (cachedPapers == null) { cachedPapers = new Dictionary <string, string>(); foreach (var x in Directory.EnumerateFiles("/Awdee/SciHub", "*.pdf")) { var name = Path.GetFileName(x); var p = name.IndexOf('-'); cachedPapers[p == -1 ? x : name.Substring(0, p)] = x; } } } var r = cachedPapers.TryGetValue(hash); if (r != null) { return(r); } progress.Report("Initializing"); // return; WebFile pdfFile = null; string title = null; HtmlNode original = null; var cookies = new IsolatedCookieContainer(); string doi = null; if (url.IsHostedOn("dx.doi.org")) { doi = HttpUtils.UnescapeDataString(url.AbsolutePath.Substring(1)); } (pdfFile, title) = await TryGetLibgenAsync(doi, null, progress); if (pdfFile == null && url.IsHostedOn("academia.edu")) { cookies.AddRange(HttpUtils.ParseCookies(Configuration_AcademiaEduCookie.Trim())); progress.Report("Retrieving from Academia.edu"); original = await url.GetHtmlNodeAsync(null, cookies); var u = new LazyUri(original.GetLinkUrl("a[data-download],.js-swp-download-button")); u.AppendCookies(cookies); u.AppendFragmentParameter("$header-Referer", url.AbsoluteUri); title = original.GetValue(":property('citation_title')"); pdfFile = WebFile.FromUrlUntracked(u.Url); } if (pdfFile == null) { try { progress.Report("Retrieving plain page"); original = await url.GetHtmlNodeAsync(); doi = original.TryGetValue(":property('citation_doi'),meta[scheme='doi']:property('dc.Identifier')"); if (doi == null && url.IsHostedOn("nih.gov")) { doi = original.TryGetValue("a[ref='aid_type=doi'],.doi > a"); if (doi == null && url.AbsolutePath.StartsWith("/pubmed/")) { progress.Report("Finding DOI on EuropePMC.org"); var alt = await HttpUtils.FormatEscaped("http://europepmc.org/abstract/med/{0}", url.GetPathComponent(1)).GetHtmlNodeAsync(); doi = alt.TryGetValue("meta[name='citation_doi']", "content"); } } if (doi == null && url.IsHostedOn("sciencedirect.com")) { doi = original.TryGetValue("script:json-token('SDM.doi = ')"); } if (doi != null) { (pdfFile, title) = await TryGetLibgenAsync(doi, null, progress); } if (pdfFile == null && url.IsHostedOn("researchgate.net")) { var u = FindPdfLink(original); if (u != null) { pdfFile = WebFile.FromUrlUntracked(u); } } if (title == null) { title = original.TryGetValue(":property('citation_title')"); if (title == null) { title = original.TryGetValue("title")?.TrimEnd(" - PubMed - NCBI"); } } } catch (NotSupportedResponseException ex) when(ex.ContentType == "application/pdf") { pdfFile = WebFile.FromUrlUntracked(url); } } if (pdfFile == null) { if (url.IsHostedOn("nlm.nih.gov")) { var a = original.TryGetLinkUrl(".portlet a"); if (a != null) { url = a; } else { var k = FindPdfLink(original); if (k != null) { pdfFile = WebFile.FromUrlUntracked(k); } } } if (pdfFile == null) { if (!url.IsHostedOn("scielo.br")) { var u = new LazyUri("http://" + url.Host + ".sci-hub.cc" + url.AbsolutePath + url.Query + url.Fragment); progress.Report("Trying on SciHub"); u.AppendFragmentParameter("$allow-same-redirect", "1"); url = u.Url; } else { progress.Report("Trying on " + url.Host); } var scihub = await url.GetHtmlNodeAsync(null, cookies); if (scihub.FindSingle("img#captcha") != null) { throw new CaptchaException(scihub.OwnerDocument.PageUrl); } if (scihub.OwnerDocument.PageUrl.IsHostedOn("libgen.io")) { var u = scihub.GetLinkUrl("a[href*='/ads.php?']"); progress.Report("Found on LibGen.IO"); (pdfFile, title) = await TryGetLibgenAsync(null, u, progress); } else { var pdflink = scihub.TryGetLinkUrl("iframe#pdf") ?? FindPdfLink(scihub); if (pdflink != null) { var u = new LazyUri(pdflink); u.AppendCookies(cookies); pdfFile = WebFile.FromUrlUntracked(u.Url); } } } } if (pdfFile != null) { var uu = new LazyUri(pdfFile.Url); uu.AppendFragmentParameter("$allow-same-redirect", "1"); uu.AppendFragmentParameter("$forbid-html", "1"); pdfFile = WebFile.FromUrlUntracked(uu.Url); if (title == null) { var z = pdfFile.SuggestedFileName; if (z != null) { title = Path.GetFileNameWithoutExtension(z); } } else { title = title.Trim().TrimEnd(".").RegexReplace(@"\s+", "-"); } progress.Report("Downloading from " + pdfFile.Url.Host); string path; try { path = await pdfFile.DownloadAsync("/Awdee/SciHub", hash + "-" + title + ".pdf", WebFile.FileOverwriteMode.Skip, CancellationToken.None, progress); } catch (NotSupportedResponseException ex) { if (ex.Page != null && ex.Page.FindSingle("img#captcha") != null) { throw new CaptchaException(ex.Page.OwnerDocument.PageUrl); } throw; } var filename = Path.GetFileName(path); lock (typeof(Paper)) { cachedPapers[hash] = path; File.AppendAllText("/Awdee/SciHubDownloads.csv", string.Join("\t", originalUrl, title, doi, filename, new FileInfo(path).Length) + "\r\n", Encoding.UTF8); } progress.Report("Done."); return(path); } throw new Exception("Could not find any PDF links."); }