Exemple #1
0
        internal static void SetDummyResponseWithUrl(WebException w, LazyUri url, HttpStatusCode statusCode)
        {
#if DESKTOP
            if (w != null && url != null && w.Response == null)
            {
                var respfield = typeof(WebException).GetField("response", BindingFlags.Instance | BindingFlags.NonPublic); // m_Response on netfx
                if (respfield == null)
                {
                    return;
                }
#if CORECLR
                Sanity.NotImplementedButTryToContinue();
                return;
#else
                var response = FormatterServices.GetUninitializedObject(typeof(HttpWebResponse));
                var uriField = typeof(HttpWebResponse).GetField("uri", BindingFlags.Instance | BindingFlags.NonPublic); // m_Uri on netfx
                if (uriField == null)
                {
                    return;
                }

                var statusField = typeof(HttpWebResponse).GetField("statusCode", BindingFlags.Instance | BindingFlags.NonPublic); // m_StatusCode on netfx
                if (statusField == null)
                {
                    return;
                }
                statusField.SetValue(response, statusCode);
                uriField.SetValue(response, url.Url);
                respfield.SetValue(w, response);
#endif
            }
#endif
        }
Exemple #2
0
        internal static string GetPrefetchedFilePath(LazyUri url, bool checkExistence)
        {
            if (Caching.AzureApi != null)
            {
                Sanity.NotImplemented();
            }
            var path = Caching.GetFileCachePath(url);

            if (checkExistence)
            {
                if (!BlobStore.Exists(path))
                {
                    return(null);
                }
                if (BlobStore.GetLength(path) == 0)
                {
                    if (BlobStore.Exists(Path.ChangeExtension(path, ".err")))
                    {
                        return(null);
                    }
                }
            }

            return(path);
        }
        public int TryReadFromCache(
            int position,
            byte[] buffer,
            int bufferOffset,
            int count,
            bool readFully
            )
        {
            maxRequestedByte = Math.Max(maxRequestedByte, position + count);


            if (error != null)
            {
                throw error;
            }

            if (size.HasValue && position == size.Value)
            {
                return(0);
            }

            var avail = Interlocked.Read(ref availableBytes);

            if (position >= avail)
            {
                return(-1);
            }

            var slot   = (int)(position / SlotSize);
            var offset = (int)(position % SlotSize);

            var slotFirstMissing   = (int)(avail / SlotSize);
            var offsetFirstMissing = (int)(avail % SlotSize);

            var actualBytes = Math.Min(count, SlotSize - offset);

            if (slot == slotFirstMissing)
            {
                actualBytes = Math.Min(actualBytes, offsetFirstMissing - offset);
            }

            if (readFully && actualBytes != count && !completed)
            {
                return(-1);
            }

            var slotData = data[slot];

            if (slotData == null)
            {
                throw new Exception("The requested data interval has been discarded and is no longer available.");
            }
            Buffer.BlockCopy(slotData, offset, buffer, bufferOffset, actualBytes);

            Sanity.Assert(actualBytes >= 0);
            return(actualBytes);
        }
        internal void NotifyConsumerRemoved(int id, bool linger)
        {
            lock (syncObj)
            {
                var newCount = consumers--;
                Sanity.Assert(newCount >= 0);

                mediaStreams[id] = null;

                if (newCount == 0)
                {
                    disposalCancellation = new CancellationTokenSource();

                    DelayedDisposalAsync(disposalCancellation.Token, false, linger);
                }
            }
        }
Exemple #5
0
        public static async Task RetryFailedRequestsAsync()
        {
            await GetCachedResponses().ForEachThrottledAsync(async item =>
            {
                var c = TryReadCacheFile(item.Value, true);
                if (c != null && c.ExceptionType != null)
                {
                    DeleteWebCache(item.Key);
                    try
                    {
                        var dummy = await item.Key.GetHtmlNodeAsync();
                    }
                    catch (Exception ex)
                    {
                        Sanity.ReportError(ex, ErrorCategory.CacheRetry);
                    }
                }
            }

                                                             , Configuration_RetryFailedRequestsConcurrency);
        }
Exemple #6
0
        internal static void SaveCache(string cachePath, WebCache webCache)
        {
            if (cachePath == null || webCache == null)
            {
                return;
            }

            using (var stream = BlobStore.OpenWrite(cachePath))
            {
                using (var gz = new GZipStream2(stream, CompressionMode.Compress))
                    using (var bw = new BinaryWriter(gz, Encoding.UTF8))
                    {
                        Sanity.AssertFastWriteByte(gz.BaseStream);
                        bw.Write((byte)52);
                        bw.WriteNullableString(webCache.ContentType);
                        bw.Write(webCache.DateRetrieved.Ticks);
                        bw.Write(webCache.ErrorCode);
                        bw.WriteNullableString(webCache.ExceptionMessage);
                        bw.WriteNullableString(webCache.ExceptionType);
                        bw.Write(webCache.Headers != null ? webCache.Headers.Count : 0);
                        if (webCache.Headers != null)
                        {
                            foreach (var item in webCache.Headers)
                            {
                                bw.Write(item.Key);
                                bw.Write(item.Value);
                            }
                        }

                        bw.Write(webCache.Cookies != null ? webCache.Cookies.Count : 0);
                        if (webCache.Cookies != null)
                        {
                            foreach (var item in webCache.Cookies)
                            {
                                bw.Write(item.Key);
                                bw.Write(item.Value);
                            }
                        }

                        bw.Write((byte)webCache.DataType);
                        bw.WriteNullableString(webCache.RedirectUrl != null ? webCache.RedirectUrl.AbsoluteUri : null);
                        bw.WriteNullableString(webCache.Url != null ? webCache.Url.AbsoluteUri : null);
                        bw.WriteNullableString(webCache.Result);
                        bw.WriteNullableString(webCache.JsExecutionResults);
                        bw.WriteNullableString(webCache.PageUrl?.AbsoluteUri);
                    }
            }

            if (lastFlush == null)
            {
                lastFlush = Stopwatch.StartNew();
            }
            else if (lastFlush.ElapsedMilliseconds > Configuration_FlushIntervalMs)
            {
                BlobStore.FlushDirectory(Path.GetDirectoryName(cachePath));
#if NET35
                lastFlush.Stop();
                lastFlush.Start();
#else
                lastFlush.Restart();
#endif
            }
        }
Exemple #7
0
        internal static WebCache TryReadCacheFile(string path, bool onlyIfFailed = false, bool fromFileSystem = false)
        {
#if STANDALONE
            HttpUtils.EnsureInitialized();
#else
            Utils.EnsureInitialized();
#endif
            Stream stream;
            if (fromFileSystem)
            {
                if (!File.Exists(path))
                {
                    return(null);
                }
                try
                {
                    stream = File.Open(path, FileMode.Open, FileAccess.Read, FileShare.Delete | FileShare.Read);
                }
                catch (FileNotFoundException)
                {
                    return(null);
                }
            }
            else
            {
                if (!BlobStore.Exists(path))
                {
                    return(null);
                }
                try
                {
                    stream = BlobStore.OpenRead(path);
                }
                catch (FileNotFoundException)
                {
                    return(null);
                }
            }

            Sanity.AssertFastReadByte(stream);

            using (stream)
            {
                var q = stream.ReadByte();
                if (q == 0xEF)
                {
                    stream.ReadByte();
                    stream.ReadByte();
                    using (var sr = new StreamReader(stream, Encoding.UTF8))
                    {
                        var qq = JsonConvert.DeserializeObject <WebCache>(sr.ReadToEnd());
                        stream.Dispose();
                        SaveCache(path, qq);
                        return(qq);
                    }
                }

                if (q != 0x1F)
                {
                    throw new ArgumentException("Invalid cache file.");
                }
                stream.Seek(0, SeekOrigin.Begin);
                var gz = new GZipStream2(stream, CompressionMode.Decompress);
                q = gz.ReadByte();
                if (q < 50 || q > 80)
                {
                    throw new ArgumentException("Invalid cache file.");
                }
                using (var br = new BinaryReader(gz, Encoding.UTF8))
                {
                    Sanity.AssertFastReadByte(br.BaseStream);
                    var cache = new WebCache();
                    cache.ContentType      = br.ReadNullableString();
                    cache.DateRetrieved    = new DateTime(br.ReadInt64(), DateTimeKind.Utc);
                    cache.ErrorCode        = br.ReadInt32();
                    cache.ExceptionMessage = br.ReadNullableString();
                    cache.ExceptionType    = br.ReadNullableString();
                    if (onlyIfFailed && cache.ExceptionType == null)
                    {
                        return(null);
                    }
                    var headerCount = br.ReadInt32();
                    cache.Headers = new Dictionary <string, string>(headerCount);
                    for (int i = 0; i < headerCount; i++)
                    {
                        var name  = br.ReadString();
                        var value = br.ReadString();
                        cache.Headers[name] = value;
                    }

                    var cookieCount = br.ReadInt32();
                    cache.Cookies = new Dictionary <string, string>(cookieCount);
                    for (int i = 0; i < cookieCount; i++)
                    {
                        var name  = br.ReadString();
                        var value = br.ReadString();
                        cache.Cookies[name] = value;
                    }

                    cache.DataType    = (WebCacheDataType)br.ReadByte();
                    cache.RedirectUrl = br.ReadNullableString()?.AsLazyUri();
                    var p = br.ReadNullableString();
                    cache.Url    = p != null ? new LazyUri(p) : null;
                    cache.Result = br.ReadNullableString();
                    if (q >= 51)
                    {
                        cache.JsExecutionResults = br.ReadNullableString();
                        if (q >= 52)
                        {
                            var pp = br.ReadNullableString();
                            cache.PageUrl = pp != null ? new LazyUri(pp) : null;
                        }
                    }

                    return(cache);
                }
            }
        }
Exemple #8
0
 internal void PopulateHeaders(HtmlDocument doc)
 {
     throw Sanity.NotImplemented();
 }
        private async Task <HttpResponseMessage> SendAsyncInternal(HttpRequestMessage request, CancellationToken cancellationToken)
        {
            if (disposed)
            {
                throw new ObjectDisposedException(nameof(CurlWarcHandler));
            }
            if (request.Properties.TryGetValue("ShamanURL", out var shamanUrlObj) && shamanUrlObj is LazyUri shamanUrl)
            {
                shamanUrl.RemoveFragmentParameter("$assume-text");
                request.Properties["ShamanURL"] = shamanUrl;
            }
            if (TryGetCached != null)
            {
                var cached = TryGetCached(request);
                if (cached != null)
                {
                    return(cached);
                }
                else
                {
                }
            }

            CurlEasy     easy       = null;
            MemoryStream requestMs  = null;
            MemoryStream responseMs = null;

            lock (lockObj)
            {
                easy       = BorrowPooled(pooledEasyHandles);
                requestMs  = BorrowPooled(pooledRequestMemoryStreams);
                responseMs = BorrowPooled(pooledResponseMemoryStreams);
            }
            Sanity.Assert(requestMs != null);

            var response = new HttpResponseMessage();



            var(httpCode, curlCode, warcItem) = await WebsiteScraper.ScrapeAsync(easy, request, request.RequestUri.AbsoluteUri, requestMs, responseMs, ea =>
            {
                return(GetDestinationWarc(request.RequestUri, easy, requestMs, responseMs));
            }, syncObj, cancellationToken);

            if (curlCode != CurlCode.Ok)
            {
                Release(easy, requestMs, responseMs);
                throw new WebException("Curl: " + curlCode, (WebExceptionStatus)(800 + curlCode));
            }

            responseMs.Seek(0, SeekOrigin.Begin);
            var httpResponse = new Utf8StreamReader(responseMs);

            response.RequestMessage = request;
            response.StatusCode     = httpCode;

            using (var scratchpad = new Scratchpad())
            {
                var stream = WarcItem.OpenHttp(httpResponse, scratchpad, request.RequestUri, responseMs.Length, out long payloadLength, out var _, out var _, out var contentType, out var _, (key, val) =>
                {
                    response.Headers.TryAddWithoutValidation(key.ToString(), val.ToString());
                });
                response.Content = new System.Net.Http.StreamContent(new DisposeCallbackStream(stream, () =>
                {
                    Release(easy, requestMs, responseMs);
                }));
            }
            OnResponseReceived?.Invoke(response, easy, requestMs, responseMs);
            return(response);
        }