Esempio n. 1
0
 public Crawler(KeyValuePair <Uri, IUserAgent> inpParams, LogMessanger logMessanger, IHttpParser httpParser, string output)
 {
     _inpParams    = inpParams;
     _logMessanger = logMessanger;
     _httpParser   = httpParser;
     _output       = output;
 }
Esempio n. 2
0
 readonly string BackupPath;             //  ditto to write revised (localised) *.html
 public Localiser(IHttpParser httpserver, string htmlPath, string backupPath = null, Downloader download = null)
 {
     Httpserver = httpserver;
     Downloader = download;
     HtmlPath   = htmlPath;
     BackupPath = backupPath;
 }
Esempio n. 3
0
    public StartLineTests()
    {
        MemoryPool = PinnedBlockMemoryPoolFactory.Create();
        var options = new PipeOptions(MemoryPool, readerScheduler: PipeScheduler.Inline, writerScheduler: PipeScheduler.Inline, useSynchronizationContext: false);
        var pair    = DuplexPipe.CreateConnectionPair(options, options);

        Transport = pair.Transport;

        var serviceContext = TestContextFactory.CreateServiceContext(
            serverOptions: new KestrelServerOptions(),
            httpParser: new HttpParser <Http1ParsingHandler>());

        var connectionContext = TestContextFactory.CreateHttpConnectionContext(
            serviceContext: serviceContext,
            connectionContext: Mock.Of <ConnectionContext>(),
            transport: Transport,
            timeoutControl: new TimeoutControl(timeoutHandler: null),
            memoryPool: MemoryPool,
            connectionFeatures: new FeatureCollection());

        Http1Connection = new Http1Connection(connectionContext);

        Parser         = new HttpParser <Http1ParsingHandler>(showErrorDetails: true);
        ParsingHandler = new Http1ParsingHandler(Http1Connection);
    }
Esempio n. 4
0
        public StartLineTests()
        {
            MemoryPool = KestrelMemoryPool.Create();
            var options = new PipeOptions(MemoryPool, readerScheduler: PipeScheduler.Inline, writerScheduler: PipeScheduler.Inline, useSynchronizationContext: false);
            var pair    = DuplexPipe.CreateConnectionPair(options, options);

            Transport = pair.Transport;

            var serviceContext = new ServiceContext
            {
                ServerOptions = new KestrelServerOptions(),
                Log           = _trace,
                HttpParser    = new HttpParser <Http1ParsingHandler>()
            };

            Http1Connection = new Http1Connection(context: new HttpConnectionContext
            {
                ServiceContext     = serviceContext,
                ConnectionFeatures = new FeatureCollection(),
                MemoryPool         = MemoryPool,
                Transport          = Transport,
                TimeoutControl     = new TimeoutControl(timeoutHandler: null)
            });

            Parser         = new HttpParser <Http1ParsingHandler>(showErrorDetails: true);
            ParsingHandler = new Http1ParsingHandler(Http1Connection);
        }
Esempio n. 5
0
 /// <summary>
 /// Initializes a new instance of the <see cref="HeaderDecoder"/> class.
 /// </summary>
 /// <param name="parser">HTTP parser to use.</param>
 public HeaderDecoder(IHttpParser parser)
 {
     if (parser == null)
     {
         throw new ArgumentNullException("parser");
     }
     _parser = parser;
 }
Esempio n. 6
0
        public Http1Connection(Http1ConnectionContext context)
            : base(context)
        {
            _context                    = context;
            _parser                     = ServiceContext.HttpParser;
            _keepAliveTicks             = ServerOptions.Limits.KeepAliveTimeout.Ticks;
            _requestHeadersTimeoutTicks = ServerOptions.Limits.RequestHeadersTimeout.Ticks;

            Output = new Http1OutputProducer(_context.Application.Input, _context.Transport.Output, _context.ConnectionId, _context.ServiceContext.Log, _context.TimeoutControl);
        }
Esempio n. 7
0
 public Downloader(IRepository dataserver, HttpClient httpclient, IAsyncPolicy <HttpResponseMessage> policy, IHttpParser httpserver,
                   string htmlPath, string otherPath = null, string backupPath = null,
                   long maxfilesize = 10_000_000)
 {
     Client           = httpclient;
     _httpRetryPolicy = policy;
     Httpserver       = httpserver;
     Dataserver       = dataserver;
     HtmlPath         = Utils.TrimOrNull(htmlPath) ?? throw new InvalidOperationException($"DownloadPage(htmlPath) cannot be null");
     OtherPath        = Utils.TrimOrNull(otherPath) ?? HtmlPath;
     BackupPath       = backupPath;
     MaxFileSize      = maxfilesize;
     SetDefaultHeaders();
 }
Esempio n. 8
0
        public Http1Connection(Http1ConnectionContext context)
            : base(context)
        {
            _context                    = context;
            _parser                     = ServiceContext.HttpParser;
            _keepAliveTicks             = ServerOptions.Limits.KeepAliveTimeout.Ticks;
            _requestHeadersTimeoutTicks = ServerOptions.Limits.RequestHeadersTimeout.Ticks;

            Output = new Http1OutputProducer(
                _context.Transport.Output,
                _context.ConnectionId,
                _context.ConnectionContext,
                _context.ServiceContext.Log,
                _context.TimeoutControl,
                _context.ConnectionFeatures.Get <IBytesWrittenFeature>());
        }
        public BaseRequestCacheManager(string name, ICache<string, byte[]> cache, IHttpParser parser, IRequestLogger requestLogger)
        {
            Requires.NotNull(name, "name");
            Requires.NotNullOrEmpty<ArgumentException>(name);
            Requires.NotNullOrWhiteSpace<ArgumentException>(name);
            _name = name;

            Requires.NotNull(cache, "cache");
            _cache = cache;

            Requires.NotNull(parser, "parser");
            _parser = parser;

            Requires.NotNull(requestLogger, "requestLogger");
            _requestLogger = requestLogger;
        }
        public HttpProxyHandlerFactory(IHttpParser parser, IHttpRequestSender requestSender, IHashProvider hashProvider, ICache<string, byte[]> cache, IRequestLogger requestLogger)
        {
            Requires.NotNull(parser, "parser");
            _parser = parser;

            Requires.NotNull(requestSender, "requestSender");
            _requestSender = requestSender;

            Requires.NotNull(hashProvider, "hashProvider");
            _hashProvider = hashProvider;

            Requires.NotNull(cache, "cache");
            _cache = cache;

            Requires.NotNull(requestLogger, "requestLogger");
            _requestLogger = requestLogger;
        }
Esempio n. 11
0
        public Http1Connection(HttpConnectionContext context)
            : base(context)
        {
            _context                    = context;
            _parser                     = ServiceContext.HttpParser;
            _keepAliveTicks             = ServerOptions.Limits.KeepAliveTimeout.Ticks;
            _requestHeadersTimeoutTicks = ServerOptions.Limits.RequestHeadersTimeout.Ticks;

            RequestBodyPipe = CreateRequestBodyPipe();

            _http1Output = new Http1OutputProducer(
                _context.Transport.Output,
                _context.ConnectionId,
                _context.ConnectionContext,
                _context.ServiceContext.Log,
                _context.TimeoutControl,
                this);

            Output = _http1Output;
        }
Esempio n. 12
0
        public Http1Connection(HttpConnectionContext context)
        {
            Initialize(context);

            _context                    = context;
            _parser                     = ServiceContext.HttpParser;
            _keepAliveTicks             = ServerOptions.Limits.KeepAliveTimeout.Ticks;
            _requestHeadersTimeoutTicks = ServerOptions.Limits.RequestHeadersTimeout.Ticks;

            _http1Output = new Http1OutputProducer(
                _context.Transport.Output,
                _context.ConnectionId,
                _context.ConnectionContext,
                _context.ServiceContext.Log,
                _context.TimeoutControl,
                this,
                _context.MemoryPool);

            Input      = _context.Transport.Input;
            Output     = _http1Output;
            MemoryPool = _context.MemoryPool;
        }
Esempio n. 13
0
    public static ServiceContext CreateServiceContext(
        KestrelServerOptions serverOptions,
        IHttpParser <Http1ParsingHandler> httpParser = null,
        PipeScheduler scheduler  = null,
        ISystemClock systemClock = null,
        DateHeaderValueManager dateHeaderValueManager = null,
        ConnectionManager connectionManager           = null,
        Heartbeat heartbeat = null)
    {
        var context = new ServiceContext
        {
            Log                    = new KestrelTrace(NullLoggerFactory.Instance),
            Scheduler              = scheduler,
            HttpParser             = httpParser,
            SystemClock            = systemClock,
            DateHeaderValueManager = dateHeaderValueManager,
            ConnectionManager      = connectionManager,
            Heartbeat              = heartbeat,
            ServerOptions          = serverOptions
        };

        return(context);
    }
 /// <summary>
 /// Initializes a new instance of the <see cref="HeaderDecoder"/> class.
 /// </summary>
 /// <param name="parser">HTTP parser to use.</param>
 public HeaderDecoder(IHttpParser parser)
 {
     if (parser == null) throw new ArgumentNullException("parser");
     _parser = parser;
 }
 public MemoryRequestCacheManager(MemoryCache<string, byte[]> cache, IHttpParser parser, IRequestLogger requestLogger)
     : base("Memory Cache", cache, parser, requestLogger)
 {
 }
Esempio n. 16
0
        public HttpProxyHandler(IPAddress address, int port, IHttpParser parser, IHttpRequestSender requestSender, IHashProvider hashProvider, ICache<string, byte[]> cache, IRequestLogger requestLogger)
        {
            Requires.NotNull(parser, "parser");
            _parser = parser;

            Requires.NotNull(requestSender, "requestSender");
            _requestSender = requestSender;

            Requires.NotNull(hashProvider, "hashProvider");
            _hashProvider = hashProvider;

            Requires.NotNull(cache, "cache");
            _cache = cache;

            Requires.NotNull(requestLogger, "requestLogger");
            _requestLogger = requestLogger;

            _listener = new TcpListener(address, port);
            // _clientQueue = new ActionQueue<TcpClient>(Handle);

            _tokenSource = new CancellationTokenSource();
            _workingTask = new Task(Run, Token, TaskCreationOptions.LongRunning);
            _waitHandle = new AutoResetEvent(false);

            _enableCache = true;

        }
Esempio n. 17
0
        static int MaxFileSize;                 // don't download files bigger than 10 MB

        static async Task Main(string[] _)
        {
            //string fs1 = @"C:\Ligonier\webcache\state - theology - does - sin - deserve - damnation.html",
            //    fs2 = @"C:\Ligonier\webcache\assets\bible - plan.pdf";
            //var rel = Utils.GetRelativePath(fs1, fs2);
            //Console.WriteLine(rel);

            dbctx = new WebModel();                                                                                                   // EF context defaults to config: "name=DefaultConnection"

            IAsyncPolicy AdoRetryPolicy =                                                                                             // TODO: probably should configure based on App.config
                                          Policy.Handle <Exception>(ex => true)                                                       // retry every exception! TODO: improve
                                          .WaitAndRetryAsync(5, retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt) / 4)); // i.e. 0.5, 1, 2, 4, 8 second retries

            //IRepository repo = new Repository(dbctx);
            IRepository repo = new BulkRepository(dbctx, AdoRetryPolicy);

            MimeCollection.Load(await repo.GetContentTypeToExtnsAsync());

            //var ct = new CancellationToken();
            htmldir = ConfigurationManager.AppSettings["htmldir"] ?? @"C:\Ligonier\webcache";
            if (!Directory.Exists(htmldir))
            {
                Directory.CreateDirectory(htmldir);
            }
            var otherdir = ConfigurationManager.AppSettings["otherdir"] ?? (htmldir + Path.DirectorySeparatorChar + OTHFOLDER);

            if (!Directory.Exists(otherdir))
            {
                Directory.CreateDirectory(otherdir);
            }
            backupdir = ConfigurationManager.AppSettings["backupdir"] ?? (htmldir + Path.DirectorySeparatorChar + BACKUPFOLDER);
            if (!Directory.Exists(backupdir))
            {
                Directory.CreateDirectory(backupdir);
            }
            if (!int.TryParse(ConfigurationManager.AppSettings["batchsize"], out var batchSize))
            {
                batchSize = 4;
            }
            if (!int.TryParse(ConfigurationManager.AppSettings["maxlinks"], out MaxLinks))
            {
                MaxLinks = 1500;
            }
            if (!int.TryParse(ConfigurationManager.AppSettings["maxfilesize"], out MaxFileSize))
            {
                MaxFileSize = 10_000_000;               // 10 MB
            }
            var ValidRetry = new HttpStatusCode[] {
                HttpStatusCode.Ambiguous,                                                                                                                    // 300
                HttpStatusCode.Conflict,                                                                                                                     // 409
                HttpStatusCode.InternalServerError,                                                                                                          // 500
                HttpStatusCode.NotImplemented,                                                                                                               // 501
                HttpStatusCode.BadGateway,                                                                                                                   // 502
                HttpStatusCode.ServiceUnavailable,                                                                                                           // 503
                HttpStatusCode.GatewayTimeout
            };                                                                                                                                               // 504
            IAsyncPolicy <HttpResponseMessage> HttpRetryPolicy =                                                                                             // TODO: probably should configure based on App.config
                                                                 Policy.HandleResult <HttpResponseMessage>(rsp => ValidRetry.Contains(rsp.StatusCode))
                                                                 .WaitAndRetryAsync(0, retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt) / 2)); // i.e. 1, 2, 4 seconds

#pragma warning disable GCop302                                                                                                                              // Since '{0}' implements IDisposable, wrap it in a using() statement
            //TODO: plug-in Polly as MessageProcessingHandler / whatever !
            var Client = new HttpClient(
                new HttpClientHandler {
                AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate, AllowAutoRedirect = true
            })
            {
                Timeout = new TimeSpan(0, 0, 20)
            };
#pragma warning restore GCop302 // Since '{0}' implements IDisposable, wrap it in a using() statement

            var        p          = new Program();
            var        retrycount = 2;
            Downloader download;
            do
            {
                HParser  = new HapParser(MaxLinks);
                download = new Downloader(repo, Client, HttpRetryPolicy, HParser, htmldir, otherdir, backupdir, MaxFileSize);
                var dlresult = await p.DownloadAndParse(repo, batchSize, download);

                if (!dlresult)                          // failure may be due to tainted EF context so have to reset all these
                {
                    dbctx = new WebModel();             // EF context defaults to config: "name=DefaultConnection"
                    repo  = new BulkRepository(dbctx, AdoRetryPolicy);
                    retrycount--;
                }
                else
                {
                    break;
                }
            } while (retrycount >= 0);
            Console.WriteLine("*** DownloadAndParse FINISHED ***");

            var localise = new Localiser(HParser, htmldir, backupdir, download);
            await p.HtmlLocalise(repo, batchSize, localise, getMissing : true);

            Console.WriteLine("*** HtmlLocalise FINISHED ***");

#if DEBUG
            foreach (var extn in MimeCollection.MissingExtns.OrderBy(e => e))
            {
                Console.WriteLine($"missing extn\t{extn}");
            }
#endif

            Console.ReadLine();
        }
 public IsolatedStorageCacheManager(IsolatedStorageCache<string, byte[]> cache, IHttpParser parser, IRequestLogger requestLogger)
     : base("Isolated Storage Cache", cache, parser, requestLogger)
 {
 }