public async Task LoadDetailsAsync(HttpClient client = null) { { #endif var l = new LazyUri("https://m.facebook.com/photo.php?fbid=" + Id); #if SHAMAN l.AppendFragmentParameter("$cookie-c_user", Blog.Configuration_FacebookUserId.ToString()); l.AppendFragmentParameter("$cookie-xs", Blog.Configuration_FacebookXs); var page = await l.GetHtmlNodeAsync(); #else HtmlNode page; if (client != null) { page = await l.Url.GetHtmlNodeAsync(new WebRequestOptions() { CustomHttpClient = client, AllowCachingEvenWithCustomRequestOptions = true }); } else { page = await GetNodeAsync(l.Url); } #endif var url = page.GetLinkUrl("a:text-is('View Full Size')"); #if SHAMAN LargestImage = WebImage.FromUrlUntracked(url); #else LargestImage = WebFile.FromUrl(url); #endif Date = Conversions.TryParseDateTime(page.TryGetValue("abbr"), null, false, null); /* * var k = await ("https://graph.facebook.com/" + Id + "?fields=images,from,created_time,backdated_time&access_token=" + Utils.EscapeDataString(Blog.Configuration_FacebookUserAccessToken)).AsLazyUri().GetJsonAsync<JObject>(); * * var img = ((JArray)k["images"]).MaxByOrDefault(x => ((JObject)x).Value<int>("height")); * LargestImage = WebImage.FromUrl(img.Value<string>("source").AsUri()); * var backdated = img.Value<string>("backdated_time"); * var created = img.Value<string>("created_time"); * * if (created != null) DateCreated = Conversions.ParseDateTime(created, null, null); * if (backdated != null) DateBackdated = Conversions.ParseDateTime(backdated, null, null); */ } }
// Supported formats: // a=1&b=c (isUnprefixedExtraParameters) // §a=1&b=c // .link-next // .link-next§§preserve // .link-next (alwaysPreserveRemainingParameters) // .link-next§§preserve§§a={z} public static bool UpdateNextLink(ref LazyUri modifiableUrl, HtmlNode node, string rule, bool isUnprefixedExtraParameters = false, bool alwaysPreserveRemainingParameters = false) { var anyVarying = false; bool preserve = alwaysPreserveRemainingParameters; if (!isUnprefixedExtraParameters) { string additionalChanges = null; if (!rule.StartsWith("§")) { if (rule.Contains("§§preserve")) { preserve = true; rule = rule.Replace("§§preserve", string.Empty); } if (rule.Contains("§§")) { additionalChanges = rule.CaptureAfter("§§"); rule = rule.CaptureBefore("§§"); } var nextlink = node.FindSingle(rule); if (nextlink == null) { modifiableUrl = null; return(false); } var url = nextlink.TryGetLinkUrl(); if (url == null) { url = nextlink?.TryGetValue()?.AsUri(); } if (!HttpUtils.IsHttp(url)) { modifiableUrl = null; return(false); } if (!string.IsNullOrEmpty(url.Fragment)) { url = url.GetLeftPart_UriPartial_Query().AsUri(); } var defaults = preserve ? modifiableUrl.QueryParameters.Concat(modifiableUrl.FragmentParameters).ToList() : null; modifiableUrl = new LazyUri(url); if (defaults != null) { foreach (var kv in defaults) { if (kv.Key.StartsWith("$json-query-") && modifiableUrl.GetQueryParameter(kv.Key.CaptureBetween("-query-", "-")) != null) { continue; } if (modifiableUrl.GetQueryParameter(kv.Key) == null && modifiableUrl.GetFragmentParameter(kv.Key) == null) { if (kv.Key.StartsWith("$")) { modifiableUrl.AppendFragmentParameter(kv.Key, kv.Value); } else { modifiableUrl.AppendQueryParameter(kv.Key, kv.Value); } } } } anyVarying = true; if (additionalChanges == null) { return(anyVarying); } } if (additionalChanges != null) { rule = additionalChanges; } else { rule = rule.Substring(1); } } var z = HttpUtils.GetParameters(rule); foreach (var kv in z) { var val = kv.Value; var key = kv.Key; if (key.StartsWith("£")) { key = "$" + key.Substring(1); } if (val == "{delete}") { if (key.StartsWith("$")) { modifiableUrl.RemoveFragmentParameter(key); } else { modifiableUrl.RemoveQueryParameter(key); } continue; } if (val.StartsWith("{") && val.EndsWith("}")) { val = val.Substring(1, val.Length - 2); var optional = false; var leaveUnchanged = false; if (val.StartsWith("optional:")) { optional = true; val = val.CaptureAfter(":"); } if (val.StartsWith("unchanged:")) { leaveUnchanged = true; val = val.CaptureAfter(":"); } var v = node.TryGetValue(val); anyVarying = true; if (v == null) { if (leaveUnchanged) { continue; } if (optional) { if (key.StartsWith("$")) { modifiableUrl.RemoveFragmentParameter(key); } else { modifiableUrl.RemoveQueryParameter(key); } continue; } modifiableUrl = null; return(anyVarying); } val = v; } if (key.StartsWith("$")) { modifiableUrl.AppendFragmentParameter(key, val); } else { modifiableUrl.AppendQueryParameter(key, val); } } return(anyVarying); }
private static async Task <string> DownloadAttemptAsync(Uri url, bool force, IProgress <DataTransferProgress> progress) { var originalUrl = url; string hash; using (var sha1 = new System.Security.Cryptography.SHA1Cng()) { hash = ToHex(sha1.ComputeHash(Encoding.UTF8.GetBytes(url.AbsoluteUri))).ToLower().Substring(0, 16); } lock (typeof(Paper)) { if (cachedPapers == null) { cachedPapers = new Dictionary <string, string>(); foreach (var x in Directory.EnumerateFiles("/Awdee/SciHub", "*.pdf")) { var name = Path.GetFileName(x); var p = name.IndexOf('-'); cachedPapers[p == -1 ? x : name.Substring(0, p)] = x; } } } var r = cachedPapers.TryGetValue(hash); if (r != null) { return(r); } progress.Report("Initializing"); // return; WebFile pdfFile = null; string title = null; HtmlNode original = null; var cookies = new IsolatedCookieContainer(); string doi = null; if (url.IsHostedOn("dx.doi.org")) { doi = HttpUtils.UnescapeDataString(url.AbsolutePath.Substring(1)); } (pdfFile, title) = await TryGetLibgenAsync(doi, null, progress); if (pdfFile == null && url.IsHostedOn("academia.edu")) { cookies.AddRange(HttpUtils.ParseCookies(Configuration_AcademiaEduCookie.Trim())); progress.Report("Retrieving from Academia.edu"); original = await url.GetHtmlNodeAsync(null, cookies); var u = new LazyUri(original.GetLinkUrl("a[data-download],.js-swp-download-button")); u.AppendCookies(cookies); u.AppendFragmentParameter("$header-Referer", url.AbsoluteUri); title = original.GetValue(":property('citation_title')"); pdfFile = WebFile.FromUrlUntracked(u.Url); } if (pdfFile == null) { try { progress.Report("Retrieving plain page"); original = await url.GetHtmlNodeAsync(); doi = original.TryGetValue(":property('citation_doi'),meta[scheme='doi']:property('dc.Identifier')"); if (doi == null && url.IsHostedOn("nih.gov")) { doi = original.TryGetValue("a[ref='aid_type=doi'],.doi > a"); if (doi == null && url.AbsolutePath.StartsWith("/pubmed/")) { progress.Report("Finding DOI on EuropePMC.org"); var alt = await HttpUtils.FormatEscaped("http://europepmc.org/abstract/med/{0}", url.GetPathComponent(1)).GetHtmlNodeAsync(); doi = alt.TryGetValue("meta[name='citation_doi']", "content"); } } if (doi == null && url.IsHostedOn("sciencedirect.com")) { doi = original.TryGetValue("script:json-token('SDM.doi = ')"); } if (doi != null) { (pdfFile, title) = await TryGetLibgenAsync(doi, null, progress); } if (pdfFile == null && url.IsHostedOn("researchgate.net")) { var u = FindPdfLink(original); if (u != null) { pdfFile = WebFile.FromUrlUntracked(u); } } if (title == null) { title = original.TryGetValue(":property('citation_title')"); if (title == null) { title = original.TryGetValue("title")?.TrimEnd(" - PubMed - NCBI"); } } } catch (NotSupportedResponseException ex) when(ex.ContentType == "application/pdf") { pdfFile = WebFile.FromUrlUntracked(url); } } if (pdfFile == null) { if (url.IsHostedOn("nlm.nih.gov")) { var a = original.TryGetLinkUrl(".portlet a"); if (a != null) { url = a; } else { var k = FindPdfLink(original); if (k != null) { pdfFile = WebFile.FromUrlUntracked(k); } } } if (pdfFile == null) { if (!url.IsHostedOn("scielo.br")) { var u = new LazyUri("http://" + url.Host + ".sci-hub.cc" + url.AbsolutePath + url.Query + url.Fragment); progress.Report("Trying on SciHub"); u.AppendFragmentParameter("$allow-same-redirect", "1"); url = u.Url; } else { progress.Report("Trying on " + url.Host); } var scihub = await url.GetHtmlNodeAsync(null, cookies); if (scihub.FindSingle("img#captcha") != null) { throw new CaptchaException(scihub.OwnerDocument.PageUrl); } if (scihub.OwnerDocument.PageUrl.IsHostedOn("libgen.io")) { var u = scihub.GetLinkUrl("a[href*='/ads.php?']"); progress.Report("Found on LibGen.IO"); (pdfFile, title) = await TryGetLibgenAsync(null, u, progress); } else { var pdflink = scihub.TryGetLinkUrl("iframe#pdf") ?? FindPdfLink(scihub); if (pdflink != null) { var u = new LazyUri(pdflink); u.AppendCookies(cookies); pdfFile = WebFile.FromUrlUntracked(u.Url); } } } } if (pdfFile != null) { var uu = new LazyUri(pdfFile.Url); uu.AppendFragmentParameter("$allow-same-redirect", "1"); uu.AppendFragmentParameter("$forbid-html", "1"); pdfFile = WebFile.FromUrlUntracked(uu.Url); if (title == null) { var z = pdfFile.SuggestedFileName; if (z != null) { title = Path.GetFileNameWithoutExtension(z); } } else { title = title.Trim().TrimEnd(".").RegexReplace(@"\s+", "-"); } progress.Report("Downloading from " + pdfFile.Url.Host); string path; try { path = await pdfFile.DownloadAsync("/Awdee/SciHub", hash + "-" + title + ".pdf", WebFile.FileOverwriteMode.Skip, CancellationToken.None, progress); } catch (NotSupportedResponseException ex) { if (ex.Page != null && ex.Page.FindSingle("img#captcha") != null) { throw new CaptchaException(ex.Page.OwnerDocument.PageUrl); } throw; } var filename = Path.GetFileName(path); lock (typeof(Paper)) { cachedPapers[hash] = path; File.AppendAllText("/Awdee/SciHubDownloads.csv", string.Join("\t", originalUrl, title, doi, filename, new FileInfo(path).Length) + "\r\n", Encoding.UTF8); } progress.Report("Done."); return(path); } throw new Exception("Could not find any PDF links."); }