internal static Uri GetAzureUrl(LazyUri url) { var container = GetAzureContainer(url); var name = GetFileCachePath(url); return(AzureApi.GetUrlForBlob(container, name)); }
private static async Task DeleteFileCacheAsync(LazyUri url, string cachedir, string extension, bool fileCache) { if (cachedir == null) { throw new InvalidOperationException("The cache path is not configured."); } var path = GetFileSystemName(url, cachedir, extension, false, false); #if !STANDALONE if (fileCache && AzureApi != null) { await DeleteAzureBlobAsync(url); } else #endif { BlobStore.Delete(path); if (path.Contains('\u2194')) // ↔ { BlobStore.Delete(Path.ChangeExtension(path, ".lng")); } BlobStore.Delete(Path.ChangeExtension(path, ".err")); } }
internal static WebCache GetWebCacheForException(Exception ex, LazyUri respondingUrl, long responseSize) { #if DESKTOP && !CORECLR if (ex.RecursiveEnumeration(x => x.InnerException).Any(x => x is ThreadAbortException)) { return(null); } #endif ex = ex.RecursiveEnumeration(x => x.InnerException).FirstOrDefault(x => (x is WebException) || (x is TimeoutException) || (x is NotSupportedResponseException)) ?? ex.RecursiveEnumeration(x => x.InnerException).SkipWhile(x => x is AggregateException).FirstOrDefault() ?? ex; int status = 0; var webex = ex as WebException; if (webex != null) { status = (int)webex.GetResponseStatusCode(); if (status == 0) { status = (int)webex.Status; } } var unexpectedContentType = ex as NotSupportedResponseException; return(new WebCache() { ErrorCode = status, ExceptionMessage = ex.Message, ExceptionType = ex.GetType().FullName, ContentType = unexpectedContentType != null ? unexpectedContentType.ContentType : null, RedirectUrl = respondingUrl }); }
internal static void SetDummyResponseWithUrl(WebException w, LazyUri url, HttpStatusCode statusCode) { #if DESKTOP if (w != null && url != null && w.Response == null) { var respfield = typeof(WebException).GetField("response", BindingFlags.Instance | BindingFlags.NonPublic); // m_Response on netfx if (respfield == null) { return; } #if CORECLR Sanity.NotImplementedButTryToContinue(); return; #else var response = FormatterServices.GetUninitializedObject(typeof(HttpWebResponse)); var uriField = typeof(HttpWebResponse).GetField("uri", BindingFlags.Instance | BindingFlags.NonPublic); // m_Uri on netfx if (uriField == null) { return; } var statusField = typeof(HttpWebResponse).GetField("statusCode", BindingFlags.Instance | BindingFlags.NonPublic); // m_StatusCode on netfx if (statusField == null) { return; } statusField.SetValue(response, statusCode); uriField.SetValue(response, url.Url); respfield.SetValue(w, response); #endif } #endif }
private static HttpRequestMessage CreateRequestInternal(LazyUri url, WebRequestOptions options, bool forceSameUrl, int redirectionIndex #if WEBCLIENT , out HttpContent requestContent #endif ) { if (!forceSameUrl) { url = MaybeAddAdditionalQueryParameters(url, options); } var pathAndQueryConsistentUrl = url.GetPathAndQueryConsistentUrlIfCached(); if (pathAndQueryConsistentUrl == null) { if ( #if NET35 url.FragmentParameters.Count() #else url.FragmentParameters.Count #endif >= Configuration_MinNumberOfFragmentParametersForAdHocRequestUri) { pathAndQueryConsistentUrl = url.GetPathAndQueryAsUri(); } else { pathAndQueryConsistentUrl = url.PathAndQueryConsistentUrl; } }
internal static string GetPrefetchedFilePath(LazyUri url, bool checkExistence) { if (Caching.AzureApi != null) { Sanity.NotImplemented(); } var path = Caching.GetFileCachePath(url); if (checkExistence) { if (!BlobStore.Exists(path)) { return(null); } if (BlobStore.GetLength(path) == 0) { if (BlobStore.Exists(Path.ChangeExtension(path, ".err"))) { return(null); } } } return(path); }
public async Task <HttpResponseMessage> GetResponseAsync(long startPosition) { if (partialDownload != null && startPosition == 0) { var c = partialDownload; partialDownload = null; return(c); } var options = new WebRequestOptions() { Timeout = 30000, TimeoutSecondRetrialAfterError = 10000, TimeoutStartSecondRetrial = null }; var url = new LazyUri(this.Url); HttpExtensionMethods.ProcessMetaParameters(url, options); if (startPosition != 0) { options.AddHeader("Range", "bytes=" + startPosition + "-"); } var response = await HttpExtensionMethods.GetResponseAsync(url, options); SaveResponseInfo(response, null); return(response); //return (await HttpExtensionMethods.SendAsync(url, options, null)).Response; }
private Uri NormalizeGoogleResultsUrl(Uri resultsPage) { var lazy = new LazyUri(resultsPage); lazy.RemoveQueryParameter("ei"); lazy.RemoveQueryParameter("sei"); return(lazy.Url); }
internal static string GetWebCachePath(LazyUri url, bool hasExtraOptions, bool createLng) { if (!hasExtraOptions) { return(Caching.GetFileSystemName(url, ThreadWebCache ?? WebCachePath, ".awc", createLng, false)); } return(null); }
public static void DeleteWebCache(LazyUri url) { var task = DeleteFileCacheAsync(url, ThreadWebCache ?? WebCachePath, ".awc", false); if (!task.IsCompleted && !task.IsFaulted && !task.IsCanceled) { throw new Exception("Task did not complete synchronosly."); } task.GetAwaiter().GetResult(); }
public NotSupportedResponseException(string retrievedContentType, LazyUri finalUrl) : base( (retrievedContentType != null && retrievedContentType.Contains("html", StringComparison.OrdinalIgnoreCase) ? "The server returned data which, although marked as " + retrievedContentType + ", doesn't look like actual HTML." : "The server returned an unsupported Content-Type: " + retrievedContentType + ".") + " If the response is supposed to be interpreted as plain text, add the #$assume-text=1 meta parameter. If it is HTML, add #$assume-html=1", HttpUtils.UnexpectedResponseType) { this.ContentType = retrievedContentType; this.ResponseUrl = finalUrl; }
public NotSupportedResponseException(string retrievedContentType, LazyUri finalUrl) : base( (retrievedContentType != null && retrievedContentType.Contains("html", StringComparison.OrdinalIgnoreCase) ? "The server returned data which, although marked as " + retrievedContentType + ", doesn't look like actual HTML." : "The server returned an unsupported Content-Type: " + retrievedContentType + ".") + " If the response is supposed to be interpreted as plain text, add the #$assume-text=1 meta parameter. If it is HTML, add #$assume-html=1", HttpUtils.Error_UnexpectedResponseType) { this.ContentType = retrievedContentType; this.ResponseUrl = finalUrl; }
private static async Task DeleteAzureBlobAsync(LazyUri url) { var c = GetAzureContainer(url); var m = await GetAzureCachedFiles(c); var name = Caching.GetFileCachePath(url); if (m.Contains(name)) { await AzureApi.DeleteBlob(c, name); } }
public virtual void ResetError() { manager = null; #if DESKTOP prefetchingTask = null; var z = new LazyUri(Url); var cache = Caching.GetFileCachePath(z); if (cache != null) { Caching.DeleteFileCacheAsync(z).FireAndForget(); } #endif OnChanged(); }
private static string GetAzureContainerInternal(LazyUri url) { var host = url.DnsSafeHost; var p = host.SplitFast('.'); if (p.Length == 3 && p[0] == "www") { return(p[1] + "." + p[2]); } if (p.Length <= 3) { return(host); } return(string.Join(".", p.Skip(p.Length - 3))); }
internal static HtmlNode TryReadFromCache(string cachePath, LazyUri url, WebRequestOptions cookieDestination) { var data = Caching.TryReadCacheFile(cachePath); if (data == null) { return(null); } if (data.ExceptionType == null || !Caching.IgnoreCachedFailedRequests) { Utils.RaiseWebRequestEvent(url, true); return(data.RecreateNode(url, cookieDestination, cachePath)); } return(null); }
public async Task LoadDetailsAsync(HttpClient client = null) { { #endif var l = new LazyUri("https://m.facebook.com/photo.php?fbid=" + Id); #if SHAMAN l.AppendFragmentParameter("$cookie-c_user", Blog.Configuration_FacebookUserId.ToString()); l.AppendFragmentParameter("$cookie-xs", Blog.Configuration_FacebookXs); var page = await l.GetHtmlNodeAsync(); #else HtmlNode page; if (client != null) { page = await l.Url.GetHtmlNodeAsync(new WebRequestOptions() { CustomHttpClient = client, AllowCachingEvenWithCustomRequestOptions = true }); } else { page = await GetNodeAsync(l.Url); } #endif var url = page.GetLinkUrl("a:text-is('View Full Size')"); #if SHAMAN LargestImage = WebImage.FromUrlUntracked(url); #else LargestImage = WebFile.FromUrl(url); #endif Date = Conversions.TryParseDateTime(page.TryGetValue("abbr"), null, false, null); /* * var k = await ("https://graph.facebook.com/" + Id + "?fields=images,from,created_time,backdated_time&access_token=" + Utils.EscapeDataString(Blog.Configuration_FacebookUserAccessToken)).AsLazyUri().GetJsonAsync<JObject>(); * * var img = ((JArray)k["images"]).MaxByOrDefault(x => ((JObject)x).Value<int>("height")); * LargestImage = WebImage.FromUrl(img.Value<string>("source").AsUri()); * var backdated = img.Value<string>("backdated_time"); * var created = img.Value<string>("created_time"); * * if (created != null) DateCreated = Conversions.ParseDateTime(created, null, null); * if (backdated != null) DateBackdated = Conversions.ParseDateTime(backdated, null, null); */ } }
private static LazyUri MaybeAddAdditionalQueryParameters(LazyUri url, WebRequestOptions options) { bool cloned = false; if (options != null && options.AdditionalQueryParameters != null) { foreach (var item in Flatten(options.AdditionalQueryParameters)) { if (!cloned) { cloned = true; url = url.Clone(); } url.AppendQueryParameter(item.Key, ToString(item.Value)); } } return(url); }
internal static Exception RebuildException(WebCache data, LazyUri url) { var unexpectedResponseType = data.ExceptionType == typeof(NotSupportedResponseException).FullName; if (unexpectedResponseType) { return(new NotSupportedResponseException(data.ContentType, data.RedirectUrl)); } var webex = data.ExceptionType == "System.Net.WebException" || data.ExceptionType == "System.Net.Reimpl.WebException"; if (!webex) { try { if (data.ExceptionType == "Shaman.Runtime.NotSupportedResponseException") { data.ExceptionType = typeof(NotSupportedResponseException).FullName; } var type = new[] { typeof(int).GetTypeInfo().Assembly, typeof(Uri).GetTypeInfo().Assembly } .Select(x => x.GetType(data.ExceptionType)).FirstOrDefault(x => x != null); return(type != null ? (Exception)Activator.CreateInstance(type) : new WebException(data.ExceptionMessage + ": " + data.ExceptionType)); } catch (Exception) { } } if (data.ErrorCode != 0 || webex) { var s = (HttpStatusCode)data.ErrorCode; var w = new WebException(data.ExceptionMessage, (WebExceptionStatus)(int)s); SetDummyResponseWithUrl(w, url, s); return(w); } return(new WebException(data.ExceptionMessage + " (" + data.ExceptionType + ")")); }
private async Task PrefetchAsyncInternal() { Utils.AssertMainThread(); try { // allow the task to be saved to the object; await TaskEx.Yield(); #if !STANDALONE if (Caching.AzureApi != null) { var url = new LazyUri(Url); var container = Caching.GetAzureContainer(url); var files = await Caching.GetAzureCachedFiles(container); var name = Caching.GetFileCachePath(url); if (files.Contains(name)) { return; } try { await Caching.AzureApi.UploadAzureFileAsync(container, name, this); } finally { files.Add(name); } return; } #endif var path = Caching.GetFileCachePath(new LazyUri(Url), true); if (path == null) { throw new InvalidOperationException("Caching.FileCachePath must be configured for file prefetching to work."); } if (File.Exists(path)) { return; } var folder = Path.GetDirectoryName(path); using (var c = new FileStream(path + "$", FileMode.Create, FileAccess.Write, FileShare.Delete)) { try { using (var s = await OpenStreamAsync()) { await s.CopyToAsync(c); } } catch (Exception ex) { lock (this) { c.SetLength(0); c.Dispose(); File.WriteAllText(Path.ChangeExtension(path, ".err"), GetExceptionSummary(ex), Encoding.UTF8); File.Delete(path); File.Move(path + "$", path); return; } } } lock (this) { File.Delete(path); File.Move(path + "$", path); File.Delete(Path.ChangeExtension(path, ".err")); } } finally { prefetchingTask = null; } }
public ExtractionException(HtmlNode node = null, Entity obj = null, ExtractionAttribute extraction = null, Exception innerException = null, string sourceData = null, string beginString = null, string endString = null, string nodeQuery = null, string attribute = null, string regex = null, string userQuery = null, LazyUri url = null, string message = null, ListExtractionAttribute listExtraction = null) : base(innerException: innerException, sourceData: sourceData, beginString: beginString, endString: endString, nodeQuery: nodeQuery, attribute: attribute, regex: regex, userQuery: userQuery, url: url, message: message) { this.Node = node; this.Entity = obj; this.Extraction = extraction; this.ListExtraction = listExtraction; }
internal async Task<Stream> OpenStreamAsync(bool synchronous, bool skipCache = false, bool linger = true) { var url = new LazyUri(Url); await Utils.CheckLocalFileAccessAsync(url); var mgr = this.manager; Func<long, Task<HttpResponseMessage>> createStream = null; #if !STANDALONE && DESKTOP if (Caching.AzureApi != null && (mgr == null || !mgr.IsAlive) && !skipCache) { var container = Caching.GetAzureContainer(url); HashSet<string> files = null; if (synchronous) { ObjectManager.SynchronizationContext.Send(async () => { files = await Caching.GetAzureCachedFiles(container); }); } else { await ObjectManager.SynchronizationContext.SendAsync(async () => { files = await Caching.GetAzureCachedFiles(container); }); } var name = Caching.GetFileCachePath(url); if (files.Contains(name)) { createStream = offset => Caching.GetAzureResponseAsync(container, name, offset, this); } } else #endif if ( #if !STANDALONE && DESKTOP Caching.AzureApi == null && #endif !skipCache) { #if DESKTOP var cache = Caching.GetFileCachePath(url); if (File.Exists(cache)) { var str = new FileStream(cache, FileMode.Open, FileAccess.Read, FileShare.Read | FileShare.Delete); if (str.Length == 0) { var errfile = Path.ChangeExtension(cache, ".err"); if (File.Exists(errfile)) { str.Dispose(); var errlines = File.ReadAllText(errfile); return new MediaStream(MediaStream.ExceptionFromCachedResponse(errlines), this); } } Sanity.AssertFastReadByte(str); return str; } #endif } lock (this) { if (manager == null) manager = new MediaStreamManager(createStream ?? GetResponseAsync, true); var stream = manager.TryCreateStream(this, 0, linger); if (stream == null) { manager = new MediaStreamManager(createStream ?? GetResponseAsync, true); stream = manager.TryCreateStream(this, 0, linger); Sanity.Assert(stream != null); } Sanity.AssertFastReadByte(stream); return stream; } }
public async Task<HttpResponseMessage> GetResponseAsync(long startPosition) { if (partialDownload != null && startPosition == 0) { var c = partialDownload; partialDownload = null; return c; } var options = new WebRequestOptions() { Timeout = 30000, TimeoutSecondRetrialAfterError = 10000, TimeoutStartSecondRetrial = null }; var url = new LazyUri(this.Url); HttpExtensionMethods.ProcessMetaParameters(url, options); if (startPosition != 0) options.AddHeader("Range", "bytes=" + startPosition + "-"); return await HttpExtensionMethods.GetResponseAsync(url, options); //return (await HttpExtensionMethods.SendAsync(url, options, null)).Response; }
public static string GetFileCachePath(LazyUri url, bool createLng = false) { return(GetFileSystemName(url, FileCachePath, ".dat", createLng)); }
public static string GetPath(LazyUri url, string folder, string extension = null) { return(GetFileSystemName(url, folder, extension ?? ".dat", true, false)); }
// Supported formats: // a=1&b=c (isUnprefixedExtraParameters) // §a=1&b=c // .link-next // .link-next§§preserve // .link-next (alwaysPreserveRemainingParameters) // .link-next§§preserve§§a={z} public static bool UpdateNextLink(ref LazyUri modifiableUrl, HtmlNode node, string rule, bool isUnprefixedExtraParameters = false, bool alwaysPreserveRemainingParameters = false) { var anyVarying = false; bool preserve = alwaysPreserveRemainingParameters; if (!isUnprefixedExtraParameters) { string additionalChanges = null; if (!rule.StartsWith("§")) { if (rule.Contains("§§preserve")) { preserve = true; rule = rule.Replace("§§preserve", string.Empty); } if (rule.Contains("§§")) { additionalChanges = rule.CaptureAfter("§§"); rule = rule.CaptureBefore("§§"); } var nextlink = node.FindSingle(rule); if (nextlink == null) { modifiableUrl = null; return(false); } var url = nextlink.TryGetLinkUrl(); if (url == null) { url = nextlink?.TryGetValue()?.AsUri(); } if (!HttpUtils.IsHttp(url)) { modifiableUrl = null; return(false); } if (!string.IsNullOrEmpty(url.Fragment)) { url = url.GetLeftPart_UriPartial_Query().AsUri(); } var defaults = preserve ? modifiableUrl.QueryParameters.Concat(modifiableUrl.FragmentParameters).ToList() : null; modifiableUrl = new LazyUri(url); if (defaults != null) { foreach (var kv in defaults) { if (kv.Key.StartsWith("$json-query-") && modifiableUrl.GetQueryParameter(kv.Key.CaptureBetween("-query-", "-")) != null) { continue; } if (modifiableUrl.GetQueryParameter(kv.Key) == null && modifiableUrl.GetFragmentParameter(kv.Key) == null) { if (kv.Key.StartsWith("$")) { modifiableUrl.AppendFragmentParameter(kv.Key, kv.Value); } else { modifiableUrl.AppendQueryParameter(kv.Key, kv.Value); } } } } anyVarying = true; if (additionalChanges == null) { return(anyVarying); } } if (additionalChanges != null) { rule = additionalChanges; } else { rule = rule.Substring(1); } } var z = HttpUtils.GetParameters(rule); foreach (var kv in z) { var val = kv.Value; var key = kv.Key; if (key.StartsWith("£")) { key = "$" + key.Substring(1); } if (val == "{delete}") { if (key.StartsWith("$")) { modifiableUrl.RemoveFragmentParameter(key); } else { modifiableUrl.RemoveQueryParameter(key); } continue; } if (val.StartsWith("{") && val.EndsWith("}")) { val = val.Substring(1, val.Length - 2); var optional = false; var leaveUnchanged = false; if (val.StartsWith("optional:")) { optional = true; val = val.CaptureAfter(":"); } if (val.StartsWith("unchanged:")) { leaveUnchanged = true; val = val.CaptureAfter(":"); } var v = node.TryGetValue(val); anyVarying = true; if (v == null) { if (leaveUnchanged) { continue; } if (optional) { if (key.StartsWith("$")) { modifiableUrl.RemoveFragmentParameter(key); } else { modifiableUrl.RemoveQueryParameter(key); } continue; } modifiableUrl = null; return(anyVarying); } val = v; } if (key.StartsWith("$")) { modifiableUrl.AppendFragmentParameter(key, val); } else { modifiableUrl.AppendQueryParameter(key, val); } } return(anyVarying); }
private async Task PrefetchAsyncInternal() { Utils.AssertMainThread(); try { // allow the task to be saved to the object; await TaskEx.Yield(); #if !STANDALONE if (Caching.AzureApi != null) { var url = new LazyUri(Url); var container = Caching.GetAzureContainer(url); var files = await Caching.GetAzureCachedFiles(container); var name = Caching.GetFileCachePath(url); if (files.Contains(name)) return; try { await Caching.AzureApi.UploadAzureFileAsync(container, name, this); } finally { files.Add(name); } return; } #endif var path = Caching.GetFileCachePath(new LazyUri(Url), true); if (path == null) throw new InvalidOperationException("Caching.FileCachePath must be configured for file prefetching to work."); if (File.Exists(path)) return; var folder = Path.GetDirectoryName(path); using (var c = new FileStream(path + "$", FileMode.Create, FileAccess.Write, FileShare.Delete)) { try { using (var s = await OpenStreamAsync()) { await s.CopyToAsync(c); } } catch (Exception ex) { lock (this) { c.SetLength(0); c.Dispose(); File.WriteAllText(Path.ChangeExtension(path, ".err"), GetExceptionSummary(ex), Encoding.UTF8); File.Delete(path); File.Move(path + "$", path); return; } } } lock (this) { File.Delete(path); File.Move(path + "$", path); File.Delete(Path.ChangeExtension(path, ".err")); } } finally { prefetchingTask = null; } }
private static HttpRequestMessage CreateRequestInternal(LazyUri url, WebRequestOptions options, bool forceSameUrl, int redirectionIndex #if WEBCLIENT , out HttpContent requestContent #endif ) { if (!forceSameUrl) url = MaybeAddAdditionalQueryParameters(url, options); #if WEBCLIENT var message = (HttpWebRequest)WebRequest.Create(url.PathAndQueryConsistentUrl); message.CachePolicy = new System.Net.Cache.RequestCachePolicy(System.Net.Cache.RequestCacheLevel.NoCacheNoStore); message.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.Deflate; requestContent = null; #else var message = new HttpRequestMessage(); message.RequestUri = url.PathAndQueryConsistentUrl; #endif if (options != null) { if (redirectionIndex != 0) message.Method = HttpMethod.Get; else if (options._method != null) #if WEBCLIENT message.Method = options._method; #else message.Method = new HttpMethod(options._method); #endif else message.Method = (options.PostData != null || options.PostString != null || options.WriteRequest != null) ? HttpMethod.Post : HttpMethod.Get; }
public static Task DeleteFileCacheAsync(LazyUri url) { return(DeleteFileCacheAsync(url, FileCachePath, ".dat", true)); }
internal static async Task<HttpResponseInfo> SendAsync(this LazyUri url, WebRequestOptions options, HttpRequestMessageBox messageBox, bool alwaysCatchAndForbidRedirects = false, bool keepResponseAliveOnError = false) { await Utils.CheckLocalFileAccessAsync(url); Utils.RaiseWebRequestEvent(url, true); HttpResponseMessage result = null; LazyUri previousResponse2 = null; try { if (options == WebRequestOptions.DefaultOptions) throw new ArgumentException(); if (options.WaitBefore.Ticks != 0) await TaskEx.Delay(options.WaitBefore); LazyUri previousResponse1 = null; previousResponse2 = url.Clone(); previousResponse2 = MaybeAddAdditionalQueryParameters(previousResponse2, options); var redirectIndex = 0; while (true) { #if WEBCLIENT HttpContent requestContent; #endif var message = CreateRequestInternal(previousResponse2, options, true, redirectIndex #if WEBCLIENT , out requestContent #endif ); if (messageBox != null) { messageBox.Dispose(); messageBox.Message = message; } #if WEBCLIENT if(requestContent != null) { if(requestContent.ContentType != null) message.ContentType = requestContent.ContentType; if(requestContent.ContentDisposition != null) message.Headers["Content-Disposition"] = requestContent.ContentDisposition; using(var req = await message.GetRequestStreamAsync()) { await requestContent.CopyToAsync(req); } } result = (HttpWebResponse)await message.GetResponseAsync(); #else var client = CreateHttpClient(); result = await client.SendAsync(message, HttpCompletionOption.ResponseHeadersRead); #endif #if !WEBCLIENT if (result.Content != null && result.Content.Headers.ContentType != null && result.Content.Headers.ContentType.CharSet == "utf8") result.Content.Headers.ContentType.CharSet = "utf-8"; #endif if ((int)result.StatusCode >= 400) { if(!keepResponseAliveOnError) result.Dispose(); // Hackish, change purpose of enumeration type throw new WebException("The web server returned: " + result.StatusCode.ToString(), (WebExceptionStatus)result.StatusCode); } #if WEBCLIENT var zz = result.Headers["Location"]; var redirectUrlNative = zz != null ? HttpUtils.GetAbsoluteUri(url.PathConsistentUrl, zz) : null; #else var redirectUrlNative = result.Headers.Location; #endif if (redirectUrlNative == null) { return new HttpResponseInfo() { RespondingUrl = previousResponse2, Response = result }; } else { if (alwaysCatchAndForbidRedirects) return new HttpResponseInfo() { Response = result, RespondingUrl = previousResponse2, Exception = new WebException("Unexpected redirect", HttpUtils.UnexpectedRedirect) }; result.Dispose(); var redirectUrl = new LazyUri(redirectUrlNative); if (!redirectUrl.IsAbsoluteUri) redirectUrl = new LazyUri(new Uri(previousResponse2.PathConsistentUrl, redirectUrlNative)); if (options != null && !options.AllowRedirects) throw new WebException("Unexpected redirect was received.", HttpUtils.UnexpectedRedirect); if (redirectIndex == Configuration_MaximumNumberOfRedirects) throw new WebException("The maximum number of redirects has been reached.", HttpUtils.MaximumNumberOfRedirectsExceeded); if (!(redirectIndex == 0 && options != null && (options.PostData != null || options.PostString != null))) if ((previousResponse1 != null && HttpUtils.UrisEqual(redirectUrl.PathAndQueryConsistentUrl, previousResponse1.PathAndQueryConsistentUrl)) || HttpUtils.UrisEqual(redirectUrl, previousResponse2)) throw new WebException("The server isn't redirecting the requested resource properly.", HttpUtils.RedirectLoopDetected); previousResponse1 = previousResponse2; previousResponse2 = redirectUrl; redirectIndex++; } } } catch (Exception ex) { var orig = ex; #if !WEBCLIENT var hre = ex as HttpRequestException; if (hre != null && hre.InnerException != null) ex = hre.InnerException; #endif if (alwaysCatchAndForbidRedirects) return new HttpResponseInfo() { Exception = ex, Response = result, RespondingUrl = previousResponse2 }; else if (ex == orig) throw; else throw ex; } }
public static string GetWebCachePath(LazyUri url) { return(GetWebCachePath(url, false, false)); }
internal static string GetFileSystemName(LazyUri url, string cacheFolder, string extension, bool createLng, bool partition = true) { var hashed = false; var isazure = cacheFolder != null && cacheFolder.StartsWith("azure:"); if (cacheFolder == null) { return(null); } if (!isazure && cacheFolder.Length > CacheFolderMaxLength) { throw new ArgumentException("The path of the file cache folder must not be longer than " + CacheFolderMaxLength + " characters."); } var str = url.AbsoluteUri; var hashcode = Math.Abs((long)str.GetHashCode()); var sb = ReseekableStringBuilder.AcquirePooledStringBuilder(); if (url.Scheme != "http" && url.Scheme != "https") { throw new NotSupportedException("URI scheme is not supported."); } var https = url.Scheme == "https"; sb.Append((string)url.DnsSafeHost); if (!url.IsDefaultPort) { sb.Append("∴"); sb.AppendFast((int)url.Port); } sb.Append(https ? "₰" : "ℓ"); var abspath = url.AbsolutePath; sb.Append(abspath, 1, abspath.Length - 1); sb.Append((string)url.Query); sb.Append((string)url.Fragment); if (sb.Length <= CacheFileNameMaxLength) { FixupFabulousUrl(sb); foreach (var item in Path.GetInvalidFileNameChars()) { if (sb.IndexOf(item) != -1) { sb.Replace(item.ToString(), "℅" + ((int)item).ToString("X2")); } } sb.Append(extension); } var folder = isazure ? null : partition?Path.Combine(cacheFolder, (hashcode % 1000).ToString("000")) : cacheFolder; if (sb.Length > CacheFileNameMaxLength) { #if NET35 sb.Length = 0; #else sb.Clear(); #endif sb.Append(url.DnsSafeHost.TrimSize(60, 0, false)); if (!url.IsDefaultPort) { sb.Append("∴"); sb.AppendFast((int)url.Port); } sb.Append(https ? "₰" : "ℓ"); sb.Append("↔"); using (var hashAlgorithm = #if NATIVE_HTTP System.Security.Cryptography.SHA256.Create() #else System.Security.Cryptography.Reimpl.SHA256.Create() #endif ) { byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(str); byte[] hash = hashAlgorithm.ComputeHash(inputBytes); for (int i = 0; i < hash.Length; i++) { sb.Append(hash[i].ToString("x2")); } } // IPv6 addresses FixupFabulousUrl(sb); if (!isazure) { sb.Append(extension); } hashed = true; } if (isazure) { sb.Length -= 4; // remove .dat sb.Replace("₰", "/s/"); sb.Replace("ℓ", "/h/"); sb.Replace("↑", ""); sb.Replace('\u222F', '/'); // ∯ return(ReseekableStringBuilder.GetValueAndRelease(sb)); } var path = Path.Combine(folder, ReseekableStringBuilder.GetValueAndRelease(sb)); if (createLng) { Directory.CreateDirectory(folder); if (hashed) { var p = Path.ChangeExtension(path, ".lng"); if (!BlobStore.Exists(p)) { BlobStore.WriteAllText(p, (string)url.AbsoluteUri, Encoding.UTF8); } } } return(path); }
internal static async Task <HttpResponseInfo> SendAsync(this LazyUri url, WebRequestOptions options, HttpRequestMessageBox messageBox, bool alwaysCatchAndForbidRedirects = false, bool keepResponseAliveOnError = false, bool synchronous = false) { HttpUtils.EnsureInitialized(); if (!synchronous) { await Utils.CheckLocalFileAccessAsync(url); } Utils.RaiseWebRequestEvent(url, false); HttpResponseMessage result = null; LazyUri previousResponse2 = null; try { if (options == WebRequestOptions.DefaultOptions) { throw new ArgumentException(); } if (options.WaitBefore.Ticks != 0 && !synchronous) { await TaskEx.Delay(options.WaitBefore); } LazyUri previousResponse1 = null; previousResponse2 = url.Clone(); previousResponse2 = MaybeAddAdditionalQueryParameters(previousResponse2, options); var redirectIndex = 0; while (true) { #if WEBCLIENT HttpContent requestContent = null; #endif var message = messageBox?.PrebuiltRequest ?? CreateRequestInternal(previousResponse2, options, true, redirectIndex #if WEBCLIENT , out requestContent #endif ); if (messageBox != null) { messageBox.Dispose(); messageBox.Message = message; } #if WEBCLIENT if (requestContent != null) { if (requestContent.ContentType != null) { message.ContentType = requestContent.ContentType; } if (requestContent.ContentDisposition != null) { message.Headers["Content-Disposition"] = requestContent.ContentDisposition; } using (var req = await message.GetRequestStreamAsync()) { await requestContent.CopyToAsync(req); } } result = (HttpWebResponse)await message.GetResponseAsync(); #else message.Properties["ShamanURL"] = url; if (options.CustomHttpClient != null) { result = await options.CustomHttpClient.SendAsync(message, HttpCompletionOption.ResponseHeadersRead); } else { if (defaultHttpClient == null) { defaultHttpClient = CreateHttpClient(); } result = messageBox?.PrebuiltResponse ?? await defaultHttpClient.SendAsync(message, HttpCompletionOption.ResponseHeadersRead); } #endif #if !WEBCLIENT if (result.Content != null && result.Content.Headers.ContentType != null && result.Content.Headers.ContentType.CharSet == "utf8") { result.Content.Headers.ContentType.CharSet = "utf-8"; } #endif if ((int)result.StatusCode >= 400) { if (!keepResponseAliveOnError) { result.Dispose(); } // Hackish, change purpose of enumeration type throw new WebException("The web server returned: " + result.StatusCode.ToString(), (WebExceptionStatus)result.StatusCode); } #if WEBCLIENT var zz = result.Headers["Location"]; var redirectUrlNative = zz != null?HttpUtils.GetAbsoluteUri(url.PathConsistentUrl, zz) : null; #else var redirectUrlNative = result.Headers.Location; #endif if (redirectUrlNative == null) { return(new HttpResponseInfo() { RespondingUrl = previousResponse2, Response = result }); } else { if (alwaysCatchAndForbidRedirects) { return new HttpResponseInfo() { Response = result, RespondingUrl = previousResponse2, Exception = new WebException("Unexpected redirect", HttpUtils.Error_UnexpectedRedirect) } } ; result.Dispose(); var redirectUrl = new LazyUri(redirectUrlNative); if (!redirectUrl.IsAbsoluteUri) { redirectUrl = new LazyUri(new Uri(previousResponse2.PathConsistentUrl, redirectUrlNative)); } if (options != null && !options.AllowRedirects) { throw new WebException("Unexpected redirect was received.", HttpUtils.Error_UnexpectedRedirect); } if (redirectIndex == Configuration_MaximumNumberOfRedirects) { throw new WebException("The maximum number of redirects has been reached.", HttpUtils.Error_MaximumNumberOfRedirectsExceeded); } if (!(redirectIndex == 0 && options != null && (options.PostData != null || options.PostString != null))) { if (( (previousResponse1 != null && HttpUtils.UrisEqual(redirectUrl.PathAndQueryConsistentUrl, previousResponse1.PathAndQueryConsistentUrl)) || HttpUtils.UrisEqual(redirectUrl, previousResponse2))) { if (url.GetFragmentParameter("$allow-same-redirect") == "1") { if (!synchronous) { #if NET35 await TaskEx.Delay(Configuration_SameRedirectDelayTimeMs); #else await Task.Delay(Configuration_SameRedirectDelayTimeMs); #endif } } else { throw new WebException("The server isn't redirecting the requested resource properly.", HttpUtils.Error_RedirectLoopDetected); } } } previousResponse1 = previousResponse2; previousResponse2 = redirectUrl; redirectIndex++; } } } catch (Exception ex) { var orig = ex; #if !WEBCLIENT var hre = ex as HttpRequestException; if (hre != null && hre.InnerException != null) { ex = hre.InnerException; } #endif if (alwaysCatchAndForbidRedirects) { return new HttpResponseInfo() { Exception = ex, Response = result, RespondingUrl = previousResponse2 } } ; else if (ex == orig) { throw; } else { throw ex.Rethrow(); } } }
public WarcItem WriteRecord(string url, bool isresponse, MemoryStream req, DateTime date, string ip, string recordId, string concurrentTo, LazyUri shamanUrl) { var initialPosition = outstream.Position; StartRecord(); currentRecord.WriteClrStringLine("WARC/1.0"); if (isresponse) { currentRecord.WriteClrStringLine("WARC-Type: response"); } else { currentRecord.WriteClrStringLine("WARC-Type: request"); } if (isresponse) { currentRecord.WriteClrStringLine("Content-Type: application/http;msgtype=response"); } else { currentRecord.WriteClrStringLine("Content-Type: application/http;msgtype=request"); } currentRecord.WriteClrString("WARC-Date: "); currentRecord.WriteClrString(date.ToString("o").Substring(0, 19)); currentRecord.WriteClrStringLine("Z"); currentRecord.WriteClrString("WARC-Record-ID: <urn:uuid:"); currentRecord.WriteClrString(recordId); currentRecord.WriteClrStringLine(">"); currentRecord.WriteClrString("WARC-Target-URI: "); currentRecord.WriteClrStringLine(url); if (shamanUrl != null) { var abs = shamanUrl.AbsoluteUri; if (abs != url) { currentRecord.WriteClrString("WARC-Shaman-URI: "); currentRecord.WriteClrStringLine(abs); } } currentRecord.WriteClrString("WARC-IP-Address: "); currentRecord.WriteClrStringLine(ip); if (concurrentTo != null) { currentRecord.WriteClrString("WARC-Concurrent-To: <urn:uuid:"); currentRecord.WriteClrString(concurrentTo); currentRecord.WriteClrStringLine(">"); } currentRecord.WriteClrString("Content-Length: "); currentRecord.Write(req.Length); currentRecord.WriteLine(); currentRecord.WriteClrString("WARC-Warcinfo-ID: <urn:uuid:"); currentRecord.WriteClrString(WarcInfoId); currentRecord.WriteClrStringLine(">"); currentRecord.WriteLine(); req.TryGetBuffer(out var buf); currentRecord.Write(buf.Array.Slice(buf.Offset, (int)req.Length)); EndRecord(); if (isresponse) { req.Seek(0, SeekOrigin.Begin); scratchpad.Reset(); using (var http = new Utf8StreamReader(req, true)) { using (var s = WarcItem.OpenHttp(http, scratchpad, url.AsUri(), req.Length, out var payloadLength, out var location, out var responseCode, out var contentType, out var lastModified, null)) { if (payloadLength == -1) { var l = 0; while (true) { var m = s.Read(lengthCalculationBuffer, 0, lengthCalculationBuffer.Length); if (m == 0) { break; } l += m; } payloadLength = l; } var warcItem = new WarcItem() { Url = shamanUrl?.AbsoluteUri ?? url, Date = date, ContentType = contentType.ToStringCached(), LastModified = lastModified, PayloadLength = payloadLength, ResponseCode = (HttpStatusCode)responseCode, CompressedLength = outstream.Position - initialPosition, CompressedOffset = initialPosition, WarcFile = WarcName }; recordedResponses.Add(warcItem); onNewWarcItem?.Invoke(warcItem); return(warcItem); } } } return(null); }
internal static string GetAzureContainer(LazyUri url) { var p = GetAzureContainerInternal(url).ToLowerFast().Replace('.', '-'); return(p.TrimSize(40)); }
internal async Task <Stream> OpenStreamAsync(bool synchronous, bool skipCache = false, bool linger = true) { var url = new LazyUri(Url); await Utils.CheckLocalFileAccessAsync(url); var mgr = this.manager; Func <long, Task <HttpResponseMessage> > createStream = null; #if !STANDALONE && DESKTOP if (Caching.AzureApi != null && (mgr == null || !mgr.IsAlive) && !skipCache) { var container = Caching.GetAzureContainer(url); HashSet <string> files = null; if (synchronous) { ObjectManager.SynchronizationContext.Send(async() => { files = await Caching.GetAzureCachedFiles(container); }); } else { await ObjectManager.SynchronizationContext.SendAsync(async() => { files = await Caching.GetAzureCachedFiles(container); }); } var name = Caching.GetFileCachePath(url); if (files.Contains(name)) { createStream = offset => Caching.GetAzureResponseAsync(container, name, offset, this); } } else #endif if ( #if !STANDALONE && DESKTOP Caching.AzureApi == null && #endif !skipCache) { #if DESKTOP var cache = Caching.GetFileCachePath(url); if (File.Exists(cache)) { var str = new FileStream(cache, FileMode.Open, FileAccess.Read, FileShare.Read | FileShare.Delete); if (str.Length == 0) { var errfile = Path.ChangeExtension(cache, ".err"); if (File.Exists(errfile)) { str.Dispose(); var errlines = File.ReadAllText(errfile); return(new MediaStream(MediaStream.ExceptionFromCachedResponse(errlines), this)); } } Sanity.AssertFastReadByte(str); return(str); } #endif } lock (this) { if (manager == null) { manager = new MediaStreamManager(createStream ?? GetResponseAsync, true); } var stream = manager.TryCreateStream(this, 0, linger); if (stream == null) { manager = new MediaStreamManager(createStream ?? GetResponseAsync, true); stream = manager.TryCreateStream(this, 0, linger); Sanity.Assert(stream != null); } Sanity.AssertFastReadByte(stream); return(stream); } }