Inheritance: IUriDiscovery
Beispiel #1
0
        public async Task RunAsync(WebCacheTask task)
        {
            var httpClient = new HttpClient();

            Utility.SetGeneralHttpHeaders(httpClient);

            var frame = new IndexPageNavigation(task.Pagination.NavigationUriFormat);

            var encoding = Encoding.GetEncoding(task.Encoding);

            var cacheProvider = DefaultLocalFileSystemHttpCacheProvider.Current;

            Regex urlRegex = new Regex(task.Cache.UriToPathTransform.Pattern);

            var resolver = new RegexPathResolver(urlRegex, task.Cache.UriToPathTransform.TargetFormat);

            cacheProvider.Configure(task.Cache.RootFolder, resolver);

            var discovery = new XPathUriDiscovery(task.Pagination.Lookup.XPath,
                task.Pagination.Lookup.Attribute,
                string.IsNullOrEmpty(task.Pagination.UriFilter)
                ? null
                : new Regex(task.Pagination.UriFilter),
                task.Pagination.BasicUri
                );

            for (int i = task.Pagination.StartPage; 
                 i < task.Pagination.StartPage + task.Pagination.PageLength; 
                 i++)
            {
                frame.NavigateTo(i);

                string text = string.Empty;

                using (var content = await frame.GetAsync())
                using (var sr = new StreamReader(content, encoding))
                {
                    text = await sr.ReadToEndAsync();
                }

                var uris = discovery.Discover(text);

                uris = TransformIfNeeded(task, uris);

                await CacheAll(httpClient, cacheProvider, uris);
            }
        }
Beispiel #2
0
        public async Task RunAsync(WebCacheServiceTask task)
        {
            var frame = new IndexPageNavigation(task.PageNavigationUriFormat);

            var encoding = Encoding.GetEncoding(task.Encoding);

            var cacheProvider = DefaultLocalFileSystemHttpCacheProvider.Current;

            Regex urlRegex = new Regex(task.CacheProviderUriPattern);

            var resolver = new RegexPathResolver(urlRegex, task.CacheProviderPathFormat);

            cacheProvider.Configure(task.CacheProviderRootFolder, resolver);

            var discovery = new XPathUriDiscovery(task.HtmlNodeXPath,
                task.HtmlNodeAttribute,
                string.IsNullOrEmpty(task.UriFilterPattern)
                ? null
                : new Regex(task.UriFilterPattern));

            for (int i = task.StartPage; i < task.StartPage + task.PageLength; i++)
            {
                frame.NavigateTo(i);

                string text = string.Empty;

                using (var content = await frame.GetAsync())
                using (var sr = new StreamReader(content, encoding))
                {
                    text = await sr.ReadToEndAsync();
                }

                var uris = discovery.Discover(text);

                foreach(var uri in uris)
                {
                    Console.WriteLine(uri);
                }
            }
        }