コード例 #1
0
        public async Task Execute()
        {
            if (this.SiteDb == null || this.TransferTask == null)
            {
                return;
            }

            List <TransferPage> pagelist = SiteDb.TransferPages.TableScan.Where(o => o.taskid == TransferTask.Id && o.done == false).SelectAll();

            DownloadManager manager = new DownloadManager()
            {
                SiteDb = SiteDb, UserId = this.TransferTask.UserId
            };
            List <TransferPage> transpages = new List <TransferPage>();

            string baseurl      = null;
            bool   defaultstart = true;

            foreach (var item in pagelist)
            {
                if (baseurl == null)
                {
                    baseurl = item.absoluteUrl;
                }
                var down = await DownloadHelper.DownloadUrlAsync(item.absoluteUrl, manager.CookieContainer, "GET", null, null);

                if (down == null || string.IsNullOrEmpty(down.GetString()))
                {
                    item.done = true;
                    SiteDb.TransferPages.AddOrUpdate(item);
                    continue;
                }
                Page page = null;

                string downloadbody = down.GetString();
                Guid   sourcehash   = Lib.Security.Hash.ComputeHashGuid(downloadbody);
                item.HtmlSourceHash = sourcehash;

                if (!string.IsNullOrEmpty(downloadbody))
                {
                    var result = SiteDb.TransferPages.Query.Where(o => o.HtmlSourceHash == sourcehash).SelectAll();
                    if (result != null && result.Count > 0)
                    {
                        var transferpage = result[0];
                        TransferHelper.AddPageRoute(SiteDb, transferpage.PageId, item.absoluteUrl, baseurl);
                        item.done   = true;
                        item.PageId = transferpage.PageId;
                        SiteDb.TransferPages.AddOrUpdate(item);
                        continue;
                    }
                }

                transpages.Add(item);

                SiteObject downloadobject = TransferHelper.AddDownload(manager, down, item.absoluteUrl, defaultstart, true, baseurl);

                if (downloadobject != null && downloadobject is Page)
                {
                    page = downloadobject as Page;
                }
                if (page != null)
                {
                    item.PageId = page.Id;
                }

                UpdateTransferPage(transpages, manager);

                if (defaultstart)
                {
                    defaultstart = false;
                }
                // DownloadOnePage(manager, item);
            }

            while (!manager.IsComplete)
            {
                System.Threading.Thread.Sleep(20);
            }

            this.SiteDb.TransferTasks.SetDone(this.TransferTask.Id);
        }
コード例 #2
0
        private async Task Downloads(SiteDb siteDb, TransferProgress progress, DownloadManager manager)
        {
            List <TransferPage> transferingPages = new List <TransferPage>();

            List <TransferPage> lowerPriorityPages = new List <TransferPage>();

            var query = siteDb.TransferPages.Query.Where(o => o.taskid == progress.TaskId && o.done == false);

            while (true)
            {
                List <TransferPage> pagelist = query.SelectAll();
                pagelist.RemoveAll(o => DoneUrlHash.Contains(o.Id));
                if (pagelist == null || pagelist.Count == 0)
                {
                    if (progress.counter < progress.TotalPages && lowerPriorityPages.Count() > 0)
                    {
                        var needed      = progress.TotalPages - progress.counter;
                        var neededpages = lowerPriorityPages.Take(needed);

                        foreach (var item in neededpages)
                        {
                            progress.counter += 1;
                            siteDb.TransferPages.AddOrUpdate(item);
                            lowerPriorityPages.Remove(item);
                        }
                        continue;
                    }
                    else
                    {
                        break;
                    }
                }

                foreach (var item in pagelist)
                {
                    DoneUrlHash.Add(item.Id);

                    var down = await DownloadHelper.DownloadUrlAsync(item.absoluteUrl, manager.CookieContainer);

                    siteDb.TransferTasks.UpdateCookie(progress.TaskId, manager.CookieContainer);

                    if (down == null || string.IsNullOrEmpty(down.GetString()))
                    {
                        item.done = true;
                        siteDb.TransferPages.AddOrUpdate(item);
                        continue;
                    }

                    Page page = null;

                    string downloadbody = down.GetString();
                    Guid   sourcehash   = Lib.Security.Hash.ComputeHashGuid(downloadbody);
                    item.HtmlSourceHash = sourcehash;

                    if (!string.IsNullOrEmpty(downloadbody))
                    {
                        var result = SiteDb.TransferPages.Query.Where(o => o.HtmlSourceHash == sourcehash).SelectAll();
                        if (result != null && result.Count > 0)
                        {
                            var transferpage = result[0];
                            TransferHelper.AddPageRoute(SiteDb, transferpage.PageId, item.absoluteUrl, progress.BaseUrl);
                            item.done   = true;
                            item.PageId = transferpage.PageId;
                            SiteDb.TransferPages.AddOrUpdate(item);
                            continue;
                        }
                    }

                    transferingPages.Add(item);

                    SiteObject downloadobject = TransferHelper.AddDownload(manager, down, item.absoluteUrl, item.DefaultStartPage, true, progress.BaseUrl);

                    if (downloadobject != null && downloadobject is Page)
                    {
                        page = downloadobject as Page;
                    }
                    if (page != null)
                    {
                        item.PageId = page.Id;
                    }

                    if (page == null || page.Dom == null)
                    {
                        item.done = true;
                        manager.SiteDb.TransferPages.AddOrUpdate(item);
                        continue;
                    }


                    if (progress.counter < progress.TotalPages && item.depth < progress.Levels)
                    {
                        page.Dom.URL = item.absoluteUrl;

                        var links = TransferHelper.GetAbsoluteLinks(page.Dom, page.Dom.getBaseUrl());

                        foreach (var linkitem in links)
                        {
                            if (progress.counter >= progress.TotalPages)
                            {
                                break;
                            }

                            if (!UrlHelper.isSameHost(linkitem, progress.BaseUrl))
                            {
                                continue;
                            }

                            if (!TransferHelper.IsPageUrl(linkitem))
                            {
                                continue;
                            }

                            TransferPage newpage = new TransferPage();
                            newpage.absoluteUrl = linkitem;
                            newpage.depth       = item.depth + 1;
                            newpage.taskid      = progress.TaskId;

                            if (!IsDuplicate(siteDb, newpage))
                            {
                                if (TransferHelper.IsLowerPrioUrl(linkitem))
                                {
                                    if (lowerPriorityPages.Find(o => o.Id == newpage.Id) == null)
                                    {
                                        lowerPriorityPages.Add(newpage);
                                    }
                                }
                                else
                                {
                                    progress.counter += 1;
                                    siteDb.TransferPages.AddOrUpdate(newpage);
                                }
                            }
                        }
                    }

                    UpdateTransferPage(transferingPages, manager);
                }
            }

            while (transferingPages.Count() > 0)
            {
                System.Threading.Thread.Sleep(1000);
                UpdateTransferPage(transferingPages, manager);
            }
        }