示例#1
0
        public async Task RunAsync(WebCacheTask task)
        {
            var httpClient = new HttpClient();

            Utility.SetGeneralHttpHeaders(httpClient);

            var frame = new IndexPageNavigation(task.Pagination.NavigationUriFormat);

            var encoding = Encoding.GetEncoding(task.Encoding);

            var cacheProvider = DefaultLocalFileSystemHttpCacheProvider.Current;

            Regex urlRegex = new Regex(task.Cache.UriToPathTransform.Pattern);

            var resolver = new RegexPathResolver(urlRegex, task.Cache.UriToPathTransform.TargetFormat);

            cacheProvider.Configure(task.Cache.RootFolder, resolver);

            var discovery = new XPathUriDiscovery(task.Pagination.Lookup.XPath,
                task.Pagination.Lookup.Attribute,
                string.IsNullOrEmpty(task.Pagination.UriFilter)
                ? null
                : new Regex(task.Pagination.UriFilter),
                task.Pagination.BasicUri
                );

            for (int i = task.Pagination.StartPage; 
                 i < task.Pagination.StartPage + task.Pagination.PageLength; 
                 i++)
            {
                frame.NavigateTo(i);

                string text = string.Empty;

                using (var content = await frame.GetAsync())
                using (var sr = new StreamReader(content, encoding))
                {
                    text = await sr.ReadToEndAsync();
                }

                var uris = discovery.Discover(text);

                uris = TransformIfNeeded(task, uris);

                await CacheAll(httpClient, cacheProvider, uris);
            }
        }
示例#2
0
        private static IEnumerable<Uri> TransformIfNeeded(WebCacheTask task, IEnumerable<Uri> uris)
        {
            if (task.Pagination.UriTransform != null)
            {
                var tfg = task.Pagination.UriTransform;

                var regex = new Regex(tfg.Pattern);

                var tranformedUris = new List<Uri>();

                uris.ToList().ForEach((uri) =>
                {
                    string output;
                    if (TryTransform(regex, tfg.TargetFormat, uri.ToString(), out output))
                    {
                        tranformedUris.Add(new Uri(output));
                    }
                });

                uris = tranformedUris;
            }

            return uris;
        }