public async Task RunAsync(WebCacheTask task) { var httpClient = new HttpClient(); Utility.SetGeneralHttpHeaders(httpClient); var frame = new IndexPageNavigation(task.Pagination.NavigationUriFormat); var encoding = Encoding.GetEncoding(task.Encoding); var cacheProvider = DefaultLocalFileSystemHttpCacheProvider.Current; Regex urlRegex = new Regex(task.Cache.UriToPathTransform.Pattern); var resolver = new RegexPathResolver(urlRegex, task.Cache.UriToPathTransform.TargetFormat); cacheProvider.Configure(task.Cache.RootFolder, resolver); var discovery = new XPathUriDiscovery(task.Pagination.Lookup.XPath, task.Pagination.Lookup.Attribute, string.IsNullOrEmpty(task.Pagination.UriFilter) ? null : new Regex(task.Pagination.UriFilter), task.Pagination.BasicUri ); for (int i = task.Pagination.StartPage; i < task.Pagination.StartPage + task.Pagination.PageLength; i++) { frame.NavigateTo(i); string text = string.Empty; using (var content = await frame.GetAsync()) using (var sr = new StreamReader(content, encoding)) { text = await sr.ReadToEndAsync(); } var uris = discovery.Discover(text); uris = TransformIfNeeded(task, uris); await CacheAll(httpClient, cacheProvider, uris); } }
public async Task RunAsync(WebCacheServiceTask task) { var frame = new IndexPageNavigation(task.PageNavigationUriFormat); var encoding = Encoding.GetEncoding(task.Encoding); var cacheProvider = DefaultLocalFileSystemHttpCacheProvider.Current; Regex urlRegex = new Regex(task.CacheProviderUriPattern); var resolver = new RegexPathResolver(urlRegex, task.CacheProviderPathFormat); cacheProvider.Configure(task.CacheProviderRootFolder, resolver); var discovery = new XPathUriDiscovery(task.HtmlNodeXPath, task.HtmlNodeAttribute, string.IsNullOrEmpty(task.UriFilterPattern) ? null : new Regex(task.UriFilterPattern)); for (int i = task.StartPage; i < task.StartPage + task.PageLength; i++) { frame.NavigateTo(i); string text = string.Empty; using (var content = await frame.GetAsync()) using (var sr = new StreamReader(content, encoding)) { text = await sr.ReadToEndAsync(); } var uris = discovery.Discover(text); foreach(var uri in uris) { Console.WriteLine(uri); } } }