public async Task Execute() { if (this.SiteDb == null || this.TransferTask == null) { return; } List <TransferPage> pagelist = SiteDb.TransferPages.TableScan.Where(o => o.taskid == TransferTask.Id && o.done == false).SelectAll(); DownloadManager manager = new DownloadManager() { SiteDb = SiteDb, UserId = this.TransferTask.UserId }; List <TransferPage> transpages = new List <TransferPage>(); string baseurl = null; bool defaultstart = true; foreach (var item in pagelist) { if (baseurl == null) { baseurl = item.absoluteUrl; } var down = await DownloadHelper.DownloadUrlAsync(item.absoluteUrl, manager.CookieContainer, "GET", null, null); if (down == null || string.IsNullOrEmpty(down.GetString())) { item.done = true; SiteDb.TransferPages.AddOrUpdate(item); continue; } Page page = null; string downloadbody = down.GetString(); Guid sourcehash = Lib.Security.Hash.ComputeHashGuid(downloadbody); item.HtmlSourceHash = sourcehash; if (!string.IsNullOrEmpty(downloadbody)) { var result = SiteDb.TransferPages.Query.Where(o => o.HtmlSourceHash == sourcehash).SelectAll(); if (result != null && result.Count > 0) { var transferpage = result[0]; TransferHelper.AddPageRoute(SiteDb, transferpage.PageId, item.absoluteUrl, baseurl); item.done = true; item.PageId = transferpage.PageId; SiteDb.TransferPages.AddOrUpdate(item); continue; } } transpages.Add(item); SiteObject downloadobject = TransferHelper.AddDownload(manager, down, item.absoluteUrl, defaultstart, true, baseurl); if (downloadobject != null && downloadobject is Page) { page = downloadobject as Page; } if (page != null) { item.PageId = page.Id; } UpdateTransferPage(transpages, manager); if (defaultstart) { defaultstart = false; } // DownloadOnePage(manager, item); } while (!manager.IsComplete) { System.Threading.Thread.Sleep(20); } this.SiteDb.TransferTasks.SetDone(this.TransferTask.Id); }
private async Task Downloads(SiteDb siteDb, TransferProgress progress, DownloadManager manager) { List <TransferPage> transferingPages = new List <TransferPage>(); List <TransferPage> lowerPriorityPages = new List <TransferPage>(); var query = siteDb.TransferPages.Query.Where(o => o.taskid == progress.TaskId && o.done == false); while (true) { List <TransferPage> pagelist = query.SelectAll(); pagelist.RemoveAll(o => DoneUrlHash.Contains(o.Id)); if (pagelist == null || pagelist.Count == 0) { if (progress.counter < progress.TotalPages && lowerPriorityPages.Count() > 0) { var needed = progress.TotalPages - progress.counter; var neededpages = lowerPriorityPages.Take(needed); foreach (var item in neededpages) { progress.counter += 1; siteDb.TransferPages.AddOrUpdate(item); lowerPriorityPages.Remove(item); } continue; } else { break; } } foreach (var item in pagelist) { DoneUrlHash.Add(item.Id); var down = await DownloadHelper.DownloadUrlAsync(item.absoluteUrl, manager.CookieContainer); siteDb.TransferTasks.UpdateCookie(progress.TaskId, manager.CookieContainer); if (down == null || string.IsNullOrEmpty(down.GetString())) { item.done = true; siteDb.TransferPages.AddOrUpdate(item); continue; } Page page = null; string downloadbody = down.GetString(); Guid sourcehash = Lib.Security.Hash.ComputeHashGuid(downloadbody); item.HtmlSourceHash = sourcehash; if (!string.IsNullOrEmpty(downloadbody)) { var result = SiteDb.TransferPages.Query.Where(o => o.HtmlSourceHash == sourcehash).SelectAll(); if (result != null && result.Count > 0) { var transferpage = result[0]; TransferHelper.AddPageRoute(SiteDb, transferpage.PageId, item.absoluteUrl, progress.BaseUrl); item.done = true; item.PageId = transferpage.PageId; SiteDb.TransferPages.AddOrUpdate(item); continue; } } transferingPages.Add(item); SiteObject downloadobject = TransferHelper.AddDownload(manager, down, item.absoluteUrl, item.DefaultStartPage, true, progress.BaseUrl); if (downloadobject != null && downloadobject is Page) { page = downloadobject as Page; } if (page != null) { item.PageId = page.Id; } if (page == null || page.Dom == null) { item.done = true; manager.SiteDb.TransferPages.AddOrUpdate(item); continue; } if (progress.counter < progress.TotalPages && item.depth < progress.Levels) { page.Dom.URL = item.absoluteUrl; var links = TransferHelper.GetAbsoluteLinks(page.Dom, page.Dom.getBaseUrl()); foreach (var linkitem in links) { if (progress.counter >= progress.TotalPages) { break; } if (!UrlHelper.isSameHost(linkitem, progress.BaseUrl)) { continue; } if (!TransferHelper.IsPageUrl(linkitem)) { continue; } TransferPage newpage = new TransferPage(); newpage.absoluteUrl = linkitem; newpage.depth = item.depth + 1; newpage.taskid = progress.TaskId; if (!IsDuplicate(siteDb, newpage)) { if (TransferHelper.IsLowerPrioUrl(linkitem)) { if (lowerPriorityPages.Find(o => o.Id == newpage.Id) == null) { lowerPriorityPages.Add(newpage); } } else { progress.counter += 1; siteDb.TransferPages.AddOrUpdate(newpage); } } } } UpdateTransferPage(transferingPages, manager); } } while (transferingPages.Count() > 0) { System.Threading.Thread.Sleep(1000); UpdateTransferPage(transferingPages, manager); } }