public Crawler(KeyValuePair <Uri, IUserAgent> inpParams, LogMessanger logMessanger, IHttpParser httpParser, string output) { _inpParams = inpParams; _logMessanger = logMessanger; _httpParser = httpParser; _output = output; }
readonly string BackupPath; // ditto to write revised (localised) *.html public Localiser(IHttpParser httpserver, string htmlPath, string backupPath = null, Downloader download = null) { Httpserver = httpserver; Downloader = download; HtmlPath = htmlPath; BackupPath = backupPath; }
public StartLineTests() { MemoryPool = PinnedBlockMemoryPoolFactory.Create(); var options = new PipeOptions(MemoryPool, readerScheduler: PipeScheduler.Inline, writerScheduler: PipeScheduler.Inline, useSynchronizationContext: false); var pair = DuplexPipe.CreateConnectionPair(options, options); Transport = pair.Transport; var serviceContext = TestContextFactory.CreateServiceContext( serverOptions: new KestrelServerOptions(), httpParser: new HttpParser <Http1ParsingHandler>()); var connectionContext = TestContextFactory.CreateHttpConnectionContext( serviceContext: serviceContext, connectionContext: Mock.Of <ConnectionContext>(), transport: Transport, timeoutControl: new TimeoutControl(timeoutHandler: null), memoryPool: MemoryPool, connectionFeatures: new FeatureCollection()); Http1Connection = new Http1Connection(connectionContext); Parser = new HttpParser <Http1ParsingHandler>(showErrorDetails: true); ParsingHandler = new Http1ParsingHandler(Http1Connection); }
public StartLineTests() { MemoryPool = KestrelMemoryPool.Create(); var options = new PipeOptions(MemoryPool, readerScheduler: PipeScheduler.Inline, writerScheduler: PipeScheduler.Inline, useSynchronizationContext: false); var pair = DuplexPipe.CreateConnectionPair(options, options); Transport = pair.Transport; var serviceContext = new ServiceContext { ServerOptions = new KestrelServerOptions(), Log = _trace, HttpParser = new HttpParser <Http1ParsingHandler>() }; Http1Connection = new Http1Connection(context: new HttpConnectionContext { ServiceContext = serviceContext, ConnectionFeatures = new FeatureCollection(), MemoryPool = MemoryPool, Transport = Transport, TimeoutControl = new TimeoutControl(timeoutHandler: null) }); Parser = new HttpParser <Http1ParsingHandler>(showErrorDetails: true); ParsingHandler = new Http1ParsingHandler(Http1Connection); }
/// <summary> /// Initializes a new instance of the <see cref="HeaderDecoder"/> class. /// </summary> /// <param name="parser">HTTP parser to use.</param> public HeaderDecoder(IHttpParser parser) { if (parser == null) { throw new ArgumentNullException("parser"); } _parser = parser; }
public Http1Connection(Http1ConnectionContext context) : base(context) { _context = context; _parser = ServiceContext.HttpParser; _keepAliveTicks = ServerOptions.Limits.KeepAliveTimeout.Ticks; _requestHeadersTimeoutTicks = ServerOptions.Limits.RequestHeadersTimeout.Ticks; Output = new Http1OutputProducer(_context.Application.Input, _context.Transport.Output, _context.ConnectionId, _context.ServiceContext.Log, _context.TimeoutControl); }
public Downloader(IRepository dataserver, HttpClient httpclient, IAsyncPolicy <HttpResponseMessage> policy, IHttpParser httpserver, string htmlPath, string otherPath = null, string backupPath = null, long maxfilesize = 10_000_000) { Client = httpclient; _httpRetryPolicy = policy; Httpserver = httpserver; Dataserver = dataserver; HtmlPath = Utils.TrimOrNull(htmlPath) ?? throw new InvalidOperationException($"DownloadPage(htmlPath) cannot be null"); OtherPath = Utils.TrimOrNull(otherPath) ?? HtmlPath; BackupPath = backupPath; MaxFileSize = maxfilesize; SetDefaultHeaders(); }
public Http1Connection(Http1ConnectionContext context) : base(context) { _context = context; _parser = ServiceContext.HttpParser; _keepAliveTicks = ServerOptions.Limits.KeepAliveTimeout.Ticks; _requestHeadersTimeoutTicks = ServerOptions.Limits.RequestHeadersTimeout.Ticks; Output = new Http1OutputProducer( _context.Transport.Output, _context.ConnectionId, _context.ConnectionContext, _context.ServiceContext.Log, _context.TimeoutControl, _context.ConnectionFeatures.Get <IBytesWrittenFeature>()); }
public BaseRequestCacheManager(string name, ICache<string, byte[]> cache, IHttpParser parser, IRequestLogger requestLogger) { Requires.NotNull(name, "name"); Requires.NotNullOrEmpty<ArgumentException>(name); Requires.NotNullOrWhiteSpace<ArgumentException>(name); _name = name; Requires.NotNull(cache, "cache"); _cache = cache; Requires.NotNull(parser, "parser"); _parser = parser; Requires.NotNull(requestLogger, "requestLogger"); _requestLogger = requestLogger; }
public HttpProxyHandlerFactory(IHttpParser parser, IHttpRequestSender requestSender, IHashProvider hashProvider, ICache<string, byte[]> cache, IRequestLogger requestLogger) { Requires.NotNull(parser, "parser"); _parser = parser; Requires.NotNull(requestSender, "requestSender"); _requestSender = requestSender; Requires.NotNull(hashProvider, "hashProvider"); _hashProvider = hashProvider; Requires.NotNull(cache, "cache"); _cache = cache; Requires.NotNull(requestLogger, "requestLogger"); _requestLogger = requestLogger; }
public Http1Connection(HttpConnectionContext context) : base(context) { _context = context; _parser = ServiceContext.HttpParser; _keepAliveTicks = ServerOptions.Limits.KeepAliveTimeout.Ticks; _requestHeadersTimeoutTicks = ServerOptions.Limits.RequestHeadersTimeout.Ticks; RequestBodyPipe = CreateRequestBodyPipe(); _http1Output = new Http1OutputProducer( _context.Transport.Output, _context.ConnectionId, _context.ConnectionContext, _context.ServiceContext.Log, _context.TimeoutControl, this); Output = _http1Output; }
public Http1Connection(HttpConnectionContext context) { Initialize(context); _context = context; _parser = ServiceContext.HttpParser; _keepAliveTicks = ServerOptions.Limits.KeepAliveTimeout.Ticks; _requestHeadersTimeoutTicks = ServerOptions.Limits.RequestHeadersTimeout.Ticks; _http1Output = new Http1OutputProducer( _context.Transport.Output, _context.ConnectionId, _context.ConnectionContext, _context.ServiceContext.Log, _context.TimeoutControl, this, _context.MemoryPool); Input = _context.Transport.Input; Output = _http1Output; MemoryPool = _context.MemoryPool; }
public static ServiceContext CreateServiceContext( KestrelServerOptions serverOptions, IHttpParser <Http1ParsingHandler> httpParser = null, PipeScheduler scheduler = null, ISystemClock systemClock = null, DateHeaderValueManager dateHeaderValueManager = null, ConnectionManager connectionManager = null, Heartbeat heartbeat = null) { var context = new ServiceContext { Log = new KestrelTrace(NullLoggerFactory.Instance), Scheduler = scheduler, HttpParser = httpParser, SystemClock = systemClock, DateHeaderValueManager = dateHeaderValueManager, ConnectionManager = connectionManager, Heartbeat = heartbeat, ServerOptions = serverOptions }; return(context); }
/// <summary> /// Initializes a new instance of the <see cref="HeaderDecoder"/> class. /// </summary> /// <param name="parser">HTTP parser to use.</param> public HeaderDecoder(IHttpParser parser) { if (parser == null) throw new ArgumentNullException("parser"); _parser = parser; }
public MemoryRequestCacheManager(MemoryCache<string, byte[]> cache, IHttpParser parser, IRequestLogger requestLogger) : base("Memory Cache", cache, parser, requestLogger) { }
public HttpProxyHandler(IPAddress address, int port, IHttpParser parser, IHttpRequestSender requestSender, IHashProvider hashProvider, ICache<string, byte[]> cache, IRequestLogger requestLogger) { Requires.NotNull(parser, "parser"); _parser = parser; Requires.NotNull(requestSender, "requestSender"); _requestSender = requestSender; Requires.NotNull(hashProvider, "hashProvider"); _hashProvider = hashProvider; Requires.NotNull(cache, "cache"); _cache = cache; Requires.NotNull(requestLogger, "requestLogger"); _requestLogger = requestLogger; _listener = new TcpListener(address, port); // _clientQueue = new ActionQueue<TcpClient>(Handle); _tokenSource = new CancellationTokenSource(); _workingTask = new Task(Run, Token, TaskCreationOptions.LongRunning); _waitHandle = new AutoResetEvent(false); _enableCache = true; }
static int MaxFileSize; // don't download files bigger than 10 MB static async Task Main(string[] _) { //string fs1 = @"C:\Ligonier\webcache\state - theology - does - sin - deserve - damnation.html", // fs2 = @"C:\Ligonier\webcache\assets\bible - plan.pdf"; //var rel = Utils.GetRelativePath(fs1, fs2); //Console.WriteLine(rel); dbctx = new WebModel(); // EF context defaults to config: "name=DefaultConnection" IAsyncPolicy AdoRetryPolicy = // TODO: probably should configure based on App.config Policy.Handle <Exception>(ex => true) // retry every exception! TODO: improve .WaitAndRetryAsync(5, retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt) / 4)); // i.e. 0.5, 1, 2, 4, 8 second retries //IRepository repo = new Repository(dbctx); IRepository repo = new BulkRepository(dbctx, AdoRetryPolicy); MimeCollection.Load(await repo.GetContentTypeToExtnsAsync()); //var ct = new CancellationToken(); htmldir = ConfigurationManager.AppSettings["htmldir"] ?? @"C:\Ligonier\webcache"; if (!Directory.Exists(htmldir)) { Directory.CreateDirectory(htmldir); } var otherdir = ConfigurationManager.AppSettings["otherdir"] ?? (htmldir + Path.DirectorySeparatorChar + OTHFOLDER); if (!Directory.Exists(otherdir)) { Directory.CreateDirectory(otherdir); } backupdir = ConfigurationManager.AppSettings["backupdir"] ?? (htmldir + Path.DirectorySeparatorChar + BACKUPFOLDER); if (!Directory.Exists(backupdir)) { Directory.CreateDirectory(backupdir); } if (!int.TryParse(ConfigurationManager.AppSettings["batchsize"], out var batchSize)) { batchSize = 4; } if (!int.TryParse(ConfigurationManager.AppSettings["maxlinks"], out MaxLinks)) { MaxLinks = 1500; } if (!int.TryParse(ConfigurationManager.AppSettings["maxfilesize"], out MaxFileSize)) { MaxFileSize = 10_000_000; // 10 MB } var ValidRetry = new HttpStatusCode[] { HttpStatusCode.Ambiguous, // 300 HttpStatusCode.Conflict, // 409 HttpStatusCode.InternalServerError, // 500 HttpStatusCode.NotImplemented, // 501 HttpStatusCode.BadGateway, // 502 HttpStatusCode.ServiceUnavailable, // 503 HttpStatusCode.GatewayTimeout }; // 504 IAsyncPolicy <HttpResponseMessage> HttpRetryPolicy = // TODO: probably should configure based on App.config Policy.HandleResult <HttpResponseMessage>(rsp => ValidRetry.Contains(rsp.StatusCode)) .WaitAndRetryAsync(0, retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt) / 2)); // i.e. 1, 2, 4 seconds #pragma warning disable GCop302 // Since '{0}' implements IDisposable, wrap it in a using() statement //TODO: plug-in Polly as MessageProcessingHandler / whatever ! var Client = new HttpClient( new HttpClientHandler { AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate, AllowAutoRedirect = true }) { Timeout = new TimeSpan(0, 0, 20) }; #pragma warning restore GCop302 // Since '{0}' implements IDisposable, wrap it in a using() statement var p = new Program(); var retrycount = 2; Downloader download; do { HParser = new HapParser(MaxLinks); download = new Downloader(repo, Client, HttpRetryPolicy, HParser, htmldir, otherdir, backupdir, MaxFileSize); var dlresult = await p.DownloadAndParse(repo, batchSize, download); if (!dlresult) // failure may be due to tainted EF context so have to reset all these { dbctx = new WebModel(); // EF context defaults to config: "name=DefaultConnection" repo = new BulkRepository(dbctx, AdoRetryPolicy); retrycount--; } else { break; } } while (retrycount >= 0); Console.WriteLine("*** DownloadAndParse FINISHED ***"); var localise = new Localiser(HParser, htmldir, backupdir, download); await p.HtmlLocalise(repo, batchSize, localise, getMissing : true); Console.WriteLine("*** HtmlLocalise FINISHED ***"); #if DEBUG foreach (var extn in MimeCollection.MissingExtns.OrderBy(e => e)) { Console.WriteLine($"missing extn\t{extn}"); } #endif Console.ReadLine(); }
public IsolatedStorageCacheManager(IsolatedStorageCache<string, byte[]> cache, IHttpParser parser, IRequestLogger requestLogger) : base("Isolated Storage Cache", cache, parser, requestLogger) { }