internal static void SetDummyResponseWithUrl(WebException w, LazyUri url, HttpStatusCode statusCode) { #if DESKTOP if (w != null && url != null && w.Response == null) { var respfield = typeof(WebException).GetField("response", BindingFlags.Instance | BindingFlags.NonPublic); // m_Response on netfx if (respfield == null) { return; } #if CORECLR Sanity.NotImplementedButTryToContinue(); return; #else var response = FormatterServices.GetUninitializedObject(typeof(HttpWebResponse)); var uriField = typeof(HttpWebResponse).GetField("uri", BindingFlags.Instance | BindingFlags.NonPublic); // m_Uri on netfx if (uriField == null) { return; } var statusField = typeof(HttpWebResponse).GetField("statusCode", BindingFlags.Instance | BindingFlags.NonPublic); // m_StatusCode on netfx if (statusField == null) { return; } statusField.SetValue(response, statusCode); uriField.SetValue(response, url.Url); respfield.SetValue(w, response); #endif } #endif }
internal static string GetPrefetchedFilePath(LazyUri url, bool checkExistence) { if (Caching.AzureApi != null) { Sanity.NotImplemented(); } var path = Caching.GetFileCachePath(url); if (checkExistence) { if (!BlobStore.Exists(path)) { return(null); } if (BlobStore.GetLength(path) == 0) { if (BlobStore.Exists(Path.ChangeExtension(path, ".err"))) { return(null); } } } return(path); }
public int TryReadFromCache( int position, byte[] buffer, int bufferOffset, int count, bool readFully ) { maxRequestedByte = Math.Max(maxRequestedByte, position + count); if (error != null) { throw error; } if (size.HasValue && position == size.Value) { return(0); } var avail = Interlocked.Read(ref availableBytes); if (position >= avail) { return(-1); } var slot = (int)(position / SlotSize); var offset = (int)(position % SlotSize); var slotFirstMissing = (int)(avail / SlotSize); var offsetFirstMissing = (int)(avail % SlotSize); var actualBytes = Math.Min(count, SlotSize - offset); if (slot == slotFirstMissing) { actualBytes = Math.Min(actualBytes, offsetFirstMissing - offset); } if (readFully && actualBytes != count && !completed) { return(-1); } var slotData = data[slot]; if (slotData == null) { throw new Exception("The requested data interval has been discarded and is no longer available."); } Buffer.BlockCopy(slotData, offset, buffer, bufferOffset, actualBytes); Sanity.Assert(actualBytes >= 0); return(actualBytes); }
internal void NotifyConsumerRemoved(int id, bool linger) { lock (syncObj) { var newCount = consumers--; Sanity.Assert(newCount >= 0); mediaStreams[id] = null; if (newCount == 0) { disposalCancellation = new CancellationTokenSource(); DelayedDisposalAsync(disposalCancellation.Token, false, linger); } } }
public static async Task RetryFailedRequestsAsync() { await GetCachedResponses().ForEachThrottledAsync(async item => { var c = TryReadCacheFile(item.Value, true); if (c != null && c.ExceptionType != null) { DeleteWebCache(item.Key); try { var dummy = await item.Key.GetHtmlNodeAsync(); } catch (Exception ex) { Sanity.ReportError(ex, ErrorCategory.CacheRetry); } } } , Configuration_RetryFailedRequestsConcurrency); }
internal static void SaveCache(string cachePath, WebCache webCache) { if (cachePath == null || webCache == null) { return; } using (var stream = BlobStore.OpenWrite(cachePath)) { using (var gz = new GZipStream2(stream, CompressionMode.Compress)) using (var bw = new BinaryWriter(gz, Encoding.UTF8)) { Sanity.AssertFastWriteByte(gz.BaseStream); bw.Write((byte)52); bw.WriteNullableString(webCache.ContentType); bw.Write(webCache.DateRetrieved.Ticks); bw.Write(webCache.ErrorCode); bw.WriteNullableString(webCache.ExceptionMessage); bw.WriteNullableString(webCache.ExceptionType); bw.Write(webCache.Headers != null ? webCache.Headers.Count : 0); if (webCache.Headers != null) { foreach (var item in webCache.Headers) { bw.Write(item.Key); bw.Write(item.Value); } } bw.Write(webCache.Cookies != null ? webCache.Cookies.Count : 0); if (webCache.Cookies != null) { foreach (var item in webCache.Cookies) { bw.Write(item.Key); bw.Write(item.Value); } } bw.Write((byte)webCache.DataType); bw.WriteNullableString(webCache.RedirectUrl != null ? webCache.RedirectUrl.AbsoluteUri : null); bw.WriteNullableString(webCache.Url != null ? webCache.Url.AbsoluteUri : null); bw.WriteNullableString(webCache.Result); bw.WriteNullableString(webCache.JsExecutionResults); bw.WriteNullableString(webCache.PageUrl?.AbsoluteUri); } } if (lastFlush == null) { lastFlush = Stopwatch.StartNew(); } else if (lastFlush.ElapsedMilliseconds > Configuration_FlushIntervalMs) { BlobStore.FlushDirectory(Path.GetDirectoryName(cachePath)); #if NET35 lastFlush.Stop(); lastFlush.Start(); #else lastFlush.Restart(); #endif } }
internal static WebCache TryReadCacheFile(string path, bool onlyIfFailed = false, bool fromFileSystem = false) { #if STANDALONE HttpUtils.EnsureInitialized(); #else Utils.EnsureInitialized(); #endif Stream stream; if (fromFileSystem) { if (!File.Exists(path)) { return(null); } try { stream = File.Open(path, FileMode.Open, FileAccess.Read, FileShare.Delete | FileShare.Read); } catch (FileNotFoundException) { return(null); } } else { if (!BlobStore.Exists(path)) { return(null); } try { stream = BlobStore.OpenRead(path); } catch (FileNotFoundException) { return(null); } } Sanity.AssertFastReadByte(stream); using (stream) { var q = stream.ReadByte(); if (q == 0xEF) { stream.ReadByte(); stream.ReadByte(); using (var sr = new StreamReader(stream, Encoding.UTF8)) { var qq = JsonConvert.DeserializeObject <WebCache>(sr.ReadToEnd()); stream.Dispose(); SaveCache(path, qq); return(qq); } } if (q != 0x1F) { throw new ArgumentException("Invalid cache file."); } stream.Seek(0, SeekOrigin.Begin); var gz = new GZipStream2(stream, CompressionMode.Decompress); q = gz.ReadByte(); if (q < 50 || q > 80) { throw new ArgumentException("Invalid cache file."); } using (var br = new BinaryReader(gz, Encoding.UTF8)) { Sanity.AssertFastReadByte(br.BaseStream); var cache = new WebCache(); cache.ContentType = br.ReadNullableString(); cache.DateRetrieved = new DateTime(br.ReadInt64(), DateTimeKind.Utc); cache.ErrorCode = br.ReadInt32(); cache.ExceptionMessage = br.ReadNullableString(); cache.ExceptionType = br.ReadNullableString(); if (onlyIfFailed && cache.ExceptionType == null) { return(null); } var headerCount = br.ReadInt32(); cache.Headers = new Dictionary <string, string>(headerCount); for (int i = 0; i < headerCount; i++) { var name = br.ReadString(); var value = br.ReadString(); cache.Headers[name] = value; } var cookieCount = br.ReadInt32(); cache.Cookies = new Dictionary <string, string>(cookieCount); for (int i = 0; i < cookieCount; i++) { var name = br.ReadString(); var value = br.ReadString(); cache.Cookies[name] = value; } cache.DataType = (WebCacheDataType)br.ReadByte(); cache.RedirectUrl = br.ReadNullableString()?.AsLazyUri(); var p = br.ReadNullableString(); cache.Url = p != null ? new LazyUri(p) : null; cache.Result = br.ReadNullableString(); if (q >= 51) { cache.JsExecutionResults = br.ReadNullableString(); if (q >= 52) { var pp = br.ReadNullableString(); cache.PageUrl = pp != null ? new LazyUri(pp) : null; } } return(cache); } } }
internal void PopulateHeaders(HtmlDocument doc) { throw Sanity.NotImplemented(); }
private async Task <HttpResponseMessage> SendAsyncInternal(HttpRequestMessage request, CancellationToken cancellationToken) { if (disposed) { throw new ObjectDisposedException(nameof(CurlWarcHandler)); } if (request.Properties.TryGetValue("ShamanURL", out var shamanUrlObj) && shamanUrlObj is LazyUri shamanUrl) { shamanUrl.RemoveFragmentParameter("$assume-text"); request.Properties["ShamanURL"] = shamanUrl; } if (TryGetCached != null) { var cached = TryGetCached(request); if (cached != null) { return(cached); } else { } } CurlEasy easy = null; MemoryStream requestMs = null; MemoryStream responseMs = null; lock (lockObj) { easy = BorrowPooled(pooledEasyHandles); requestMs = BorrowPooled(pooledRequestMemoryStreams); responseMs = BorrowPooled(pooledResponseMemoryStreams); } Sanity.Assert(requestMs != null); var response = new HttpResponseMessage(); var(httpCode, curlCode, warcItem) = await WebsiteScraper.ScrapeAsync(easy, request, request.RequestUri.AbsoluteUri, requestMs, responseMs, ea => { return(GetDestinationWarc(request.RequestUri, easy, requestMs, responseMs)); }, syncObj, cancellationToken); if (curlCode != CurlCode.Ok) { Release(easy, requestMs, responseMs); throw new WebException("Curl: " + curlCode, (WebExceptionStatus)(800 + curlCode)); } responseMs.Seek(0, SeekOrigin.Begin); var httpResponse = new Utf8StreamReader(responseMs); response.RequestMessage = request; response.StatusCode = httpCode; using (var scratchpad = new Scratchpad()) { var stream = WarcItem.OpenHttp(httpResponse, scratchpad, request.RequestUri, responseMs.Length, out long payloadLength, out var _, out var _, out var contentType, out var _, (key, val) => { response.Headers.TryAddWithoutValidation(key.ToString(), val.ToString()); }); response.Content = new System.Net.Http.StreamContent(new DisposeCallbackStream(stream, () => { Release(easy, requestMs, responseMs); })); } OnResponseReceived?.Invoke(response, easy, requestMs, responseMs); return(response); }