Beispiel #1
0
        private static async Task Download()
        {
            var options = new ProgressBarOptions
            {
                ForegroundColor   = ConsoleColor.Yellow,
                BackgroundColor   = ConsoleColor.DarkYellow,
                ProgressCharacter = '─'
            };

            Console.WriteLine("Hello World!");
            Console.WriteLine("");
            IFileSystem sys    = new FileSystem();
            var         fox    = new Panda.SeriesParser();
            var         mgr    = new MangaDownloader(null, new List <ISeriesParser>());
            PageGetter  getter = Client2.GetDocumentAsync;

            var chapter = fox.CreateChapter("https://www.mangapanda.com/the-heroic-legend-of-arslan-arakawa-hiromu/63");

            var nrPages = await chapter.GetPageCount(getter);

            var p1 = await chapter.GetImageUrl(6, getter);

            var p2 = await chapter.GetImageUrl(7, getter);

            var p3 = await chapter.GetImageUrl(19, getter);

            var mrg = new MangaDownloader(sys, new List <ISeriesParser> {
                fox
            });
        }
        public async Task <string> GetImageUrl(int pageNumber, PageGetter getPage)
        {
            var page = await getPage(Url + "/" + pageNumber);

            var imageUrl = page.GetElementById("img").Attributes.First(a => a.Name == "src").Value;

            return(imageUrl);
        }
Beispiel #3
0
        public async Task <int> GetPageCount(PageGetter getPage)
        {
            var doc = await getPage(Url);

            var selectNode = doc.GetElementsByTagName("select").First(); //this is a bit iffy

            return(selectNode.Children.Count(n => n.LocalName == "option") - 1);
        }
        public async Task <int> GetPageCount(PageGetter getPage)
        {
            var doc = await getPage(Url);

            var vungdoc = doc.GetElementById("vungdoc") ?? doc.GetElementsByClassName("container-chapter-reader").First();

            return(vungdoc.Children.Count(c => c.LocalName == "img"));
        }
        public async Task <int> GetPageCount(PageGetter getPage)
        {
            var doc = await getPage(Url);

            return(doc
                   .GetElementById("pageMenu")
                   .Children
                   .Count(n => n.LocalName == "option"));
        }
        private static async Task <IEnumerable <(string, string)> > GetForUrl(PageGetter getter, string letter)
        {
            var mangaIndex = await getter($"http://funmanga.com/manga-list/{letter}").ConfigureAwait(false);

            return(mangaIndex
                   .GetElementsByTagName("ul")
                   .Where(n => n.HasClass("manga-list"))
                   .SelectMany(listNode => listNode.Elements("li"))
                   .Select(n => n.Element("a"))
                   .Select(e => (e.TextContent, e.Attributes.FirstOrDefault(a => a.Name == "href")?.Value)));
        }
 public virtual void InitPageGetter()
 {
     _upcGetter = new PageGetter
     {
         Store = "upc",
     };
     _getter = new PageGetter
     {
         Store = StoreName,
     };
 }
        public async Task <int> GetPageCount(PageGetter getPage)
        {
            var doc = await getPage(Url);

            return(doc
                   .GetElementById("top_bar")
                   .Element("div")
                   .Element("div")
                   .Element("select")
                   .Elements("option")
                   .Count() - 1);
        }
Beispiel #9
0
        public async Task <IReadOnlyList <T> > GetAllPagesAsync <T>(PageGetter <T> pageGetter, decimal pageSize, CancellationToken token, ILog log, bool throwIfCountMismatch = false)
        {
            var firstPage = await pageGetter(pageSize, 0);

            var total = firstPage.TotalCount;

            log.Debug($"{0}/{total}, {firstPage.Count} items");

            var result = new HashSet <T>((int)total);

            result.AddUnique(firstPage, log);

            var remainingPages = total / pageSize;
            var pageTasks      = Enumerable.Range(1, (int)remainingPages)
                                 .Select(pageNumber => (ulong)pageNumber * pageSize)
                                 .Select(async offset =>
            {
                VkCollection <T> page = null;
                try
                {
                    page = await pageGetter(pageSize, offset);
                    log.Debug($"{offset}/{total}, {page.Count} items");
                }
                catch (Exception e)
                {
                    log.Warn($"Error getting page {offset}/{total}. Getting page by smaller chunks...", e);
                    var items = await GetSinglePage(pageGetter, offset, pageSize, token, log);
                    page      = new VkCollection <T>(0, items);
                }

                token.ThrowIfCancellationRequested();
                return(page ?? new VkCollection <T>(0, new List <T>()));
            });
            var pages = await Task.WhenAll(pageTasks);

            foreach (var page in pages)
            {
                result.AddUnique(page, log);
            }

            if ((int)total != result.Count)
            {
                var message = $"Expected {total} items, got {result.Count}. Maybe they were created/deleted, or it's VK bugs again.";
                log.Warn(message);
                if (throwIfCountMismatch)
                {
                    throw new InvalidOperationException(message);
                }
            }

            return(result.ToList());
        }
Beispiel #10
0
        private static async Task <IEnumerable <(string, string)> > GetForUrl(PageGetter getter, string url)
        {
            var index = await getter(url).ConfigureAwait(false);

            return(index
                   .GetElementById("mangalist")
                   .Element("ul")
                   .Elements("li")
                   .Select(n => {
                var element = n.GetElementsByClassName("manga_text").First().Element("a");
                return (element.TextContent, $"http:{element.GetAttribute("href")}");
            })
                   .ToList());
        }
        private static async Task <IEnumerable <PageStatus> > HitPages()
        {
            var listPageStatus = new List <PageStatus>();

            try
            {
                using (var repoPages = new PagesRepository())
                {
                    using (var repoPageStatus = new PageStatusRepository())
                    {
                        //Get page with Monitor bit on.
                        var pages = repoPages.GetAllMonitor();
                        //var pages = repoPages.GetAllProdMonitor();
                        //var pages = repoPages.GetAllStgMonitor();

                        var pageGetter = new PageGetter();

                        foreach (var page in pages)
                        {
                            var pageStats = new PageStats {
                                Url = page.Url
                            };
                            var stats = await pageGetter.HTTP_GET(pageStats);

                            var pageStatus = new PageStatus
                            {
                                Url              = stats.Url,
                                ResponseTime     = stats.ResponseTime,
                                ContentLength    = stats.ContentLength,
                                ExceptionMessage = stats.ExceptionMessage,
                                Status           = stats.Status.ToString(),
                                Created          = DateTime.Now
                            };

                            repoPageStatus.Add(pageStatus);
                            repoPageStatus.SaveChanges();

                            listPageStatus.Add(pageStatus);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                // ReSharper disable once UnusedVariable
                var msg = ex.Message;
            }

            return(listPageStatus);
        }
        public async Task <string> GetImageUrl(int pageNumber, PageGetter getPage)
        {
            var doc = await getPage(Url);

            var container = doc
                            .GetElementById("vungdoc") ?? doc.GetElementsByClassName("container-chapter-reader").First();

            return(container
                   .Children
                   .Where(c => c.LocalName == "img")
                   .Skip(pageNumber - 1)
                   .First()
                   .GetAttribute("src"));
        }
        public async Task <IEnumerable <(string name, string url)> > ListInstances(PageGetter pageGetter, IProgress <double> progress = null)
        {
            var page = await pageGetter("https://www.mangaeden.com/en/en-directory/?page=1");

            var pag        = page.GetElementsByClassName("pagination pagination_bottom").First();
            var count      = pag.Children.Length;
            var secondLast = pag.Children[count - 2];
            var max        = int.Parse(secondLast.TextContent);

            return(await Enumerable.Range(1, max)
                   .Select(i => $"https://www.mangaeden.com/en/en-directory/?page={i}")
                   .Batch(20)
                   .Transform(u => GetMangaList(pageGetter, u), progress));
        }
Beispiel #14
0
        public async Task <IEnumerable <(string name, string url)> > ListInstances(PageGetter pageGetter, IProgress <double> progress = null)
        {
            var mangaIndex = await pageGetter("http://fanfox.net/directory/").ConfigureAwait(false);

            var lastIndex  = mangaIndex.GetElementById("nav").Element("ul").Elements("li").ToList();
            var secondLast = lastIndex[lastIndex.Count - 2];
            var index      = int.Parse(secondLast.Element("a").TextContent.Trim());

            return(await Enumerable.Range(1, index)
                   .Select(i => $"http://fanfox.net/directory/{i}.htm")
                   .Select(url => GetForUrl(pageGetter, url))
                   .WhenAll(progress)
                   .Flatten()
                   .ToListAsync());
        }
        public async Task <string> GetImageUrl(int pageNumber, PageGetter getPage)
        {
            var strippedUrl = Url.Split('/');
            var url         = string.Join("/", strippedUrl.Take(strippedUrl.Length - 1));
            var page        = await getPage(url + "/" + pageNumber + ".html");

            var imageUrl = page
                           .GetElementById("viewer")
                           .Element("div")//class read_img
                           .Element("a")
                           .Element("img")
                           .Attributes.First(a => a.Name == "src").Value;

            return(imageUrl);
        }
        public MetaDataService(ICollection <IMetaDataParser> metaDataParsers, PageGetter getter)
        {
            MetaDataParsers = metaDataParsers.ToDictionary(p => p.ProviderName);

            PageGetter = getter;

            Path = Combine(DirectoryPath, "meta.data");

            if (!Directory.Exists(DirectoryPath))
            {
                Directory.CreateDirectory(DirectoryPath);
            }

            if (!File.Exists(Path))
            {
                WriteToDisk(new (string, MetaData)[0]).GetAwaiter().GetResult();
        static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                ShowHelp();
            }

            if (args.Length > 0 &&
                (args[0] == "-h" ||
                 args[0] == "--help"))
            {
                ShowHelp();
            }

            var url = args[0];

            var enableFullLogging = args.Length > 1 &&
                                    (args[1] == "-f" ||
                                     args[1] == "--full");

            ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11;

            var getter = new PageGetter(
                new WebClientWrapper(),
                new HtmlDocumentWrapper(),
                new Website(url),
                new Log(enableFullLogging));

            var pages = getter.GetPages(url);

            pages.Sort();

            var checker = new PageChecker(
                new LinkDictionary(),
                new WebClientWrapper(),
                new HtmlDocumentWrapper(),
                new Log(enableFullLogging),
                new Website(url));

            foreach (var page in pages)
            {
                checker.Check(page);
            }

            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
        }
Beispiel #18
0
        private static void DirectAccess()
        {
            var repoPages = new PagesRepository();
            //var pages = repoPages.GetAllProdMonitor();
            var pages = repoPages.GetAllStgMonitor();

            var repoPageStatus = new PageStatusRepository();

            var pageGetter = new PageGetter();

            var counter = 1000;

            while (counter > 0)
            {
                foreach (var page in pages)
                {
                    Console.WriteLine($"Page: {page.Url}");

                    var pageStats = new PageStats {
                        Url = page.Url
                    };
                    var result = pageGetter.HTTP_GET(pageStats);
                    var stats  = result.Result;

                    var pageStatus = new PageStatus
                    {
                        Url              = stats.Url,
                        ResponseTime     = stats.ResponseTime,
                        ContentLength    = stats.ContentLength,
                        ExceptionMessage = stats.ExceptionMessage,
                        Status           = stats.Status.ToString(),
                        Created          = DateTime.Now
                    };

                    repoPageStatus.Add(pageStatus);
                    repoPageStatus.SaveChanges();

                    Console.WriteLine($"seconds: {stats.ResponseTime}  length: {stats.ContentLength} time: {DateTime.Now}");
                    Console.WriteLine();
                }

                Thread.Sleep(10000);

                counter--;
            }
        }
        public async Task <IEnumerable <(string name, string url)> > ListInstances(PageGetter pageGetter, IProgress <double> progress = null)
        {
            var mangaIndex = await pageGetter("http://www.mangapanda.com/alphabetical").ConfigureAwait(false);

            progress?.Report(0.9);
            var i = mangaIndex
                    .GetElementById("wrapper_body")
                    .Element("div")                       //content_bloc2
                    .Elements("div")
                    .Where(n => n.HasClass("series_col")) //series_col
                    .SelectMany(n => n.Elements("div") /* series_alpha */)
                    .SelectMany(n => n.Elements("ul"))
                    .SelectMany(ul => ul.Elements("li"))
                    .Select(li => li.Element("a"))
                    .Select(a => (a.TextContent, $"http://www.mangapanda.com{a.GetAttribute("href")}"))
                    .Distinct()
                    .ToList();

            progress?.Report(1);
            return(i);
        }
Beispiel #20
0
        private static async Task <IEnumerable <AdHocPageStatu> > HitPage(string pageUrl, string user)
        {
            var listPageStatus = new List <AdHocPageStatu>();

            try
            {
                using (var adHocPageStatusRepository = new AdHocPageStatusRepository())
                {
                    var pageGetter = new PageGetter();

                    var pageStats = new PageStats {
                        Url = pageUrl
                    };
                    var stats = await pageGetter.HTTP_GET(pageStats);

                    var pageStatus = new AdHocPageStatu
                    {
                        Url              = stats.Url,
                        ResponseTime     = stats.ResponseTime,
                        ContentLength    = stats.ContentLength,
                        ExceptionMessage = stats.ExceptionMessage,
                        Status           = stats.Status.ToString(),
                        Created          = DateTime.Now,
                        User             = user
                    };

                    adHocPageStatusRepository.Add(pageStatus);
                    adHocPageStatusRepository.SaveChanges();

                    listPageStatus.Add(pageStatus);
                }
            }
            catch (Exception ex)
            {
                // ReSharper disable once UnusedVariable
                var msg = ex.Message;
            }

            return(listPageStatus);
        }
Beispiel #21
0
        private async Task <IEnumerable <T> > GetSinglePage <T>(PageGetter <T> pageGetter, decimal offset, decimal length, CancellationToken token, ILog log)
        {
            var pageTasks = Enumerable.Range((int)offset, (int)length)
                            .Select(async x =>
            {
                VkCollection <T> page = null;
                try
                {
                    page = await pageGetter(1, offset);
                    log.Debug($"{x}/{length} item from bad page");
                }
                catch (Exception e)
                {
                    log.Warn($"Error getting item {x}", e);
                }

                token.ThrowIfCancellationRequested();
                return(page ?? new VkCollection <T>(0, new List <T>()));
            });
            var pages = await Task.WhenAll(pageTasks);

            return(pages.SelectMany(x => x));
        }
Beispiel #22
0
        public List <MinimizedDiaryPageDto> GetDiaryMinimizedPageList(long diaryId)
        {
            PageGetter pageGetter = new PageGetter();

            return(pageGetter.GetMinimizedDiaryPages(diaryId));
        }
        public async Task <int> GetPageCount(PageGetter getPage)
        {
            var page = await getPage(Url);

            return(page?.GetElementById("pageSelect").Elements("option").Count() ?? 0);
        }
Beispiel #24
0
        public DiaryPageDto GetPageById(long diaryId, long pageId)
        {
            PageGetter pageGetter = new PageGetter();

            return(pageGetter.GetPageById(diaryId, pageId));
        }
 public async Task <IEnumerable <(string name, string url)> > ListInstances(PageGetter pageGetter, IProgress <double> progress = null)
 {
     return(await GetAlphabet().Select(s => GetForUrl(pageGetter, s)).WhenAll(progress).Flatten().ConfigureAwait(false));
 }
Beispiel #26
0
 public MangaDownloader(IFileSystem fileSystem, IEnumerable <ISeriesParser> parsers, PageGetter getter) : base(fileSystem, getter) =>
Beispiel #27
0
        private static async Task GetMetaData(IMetaDataParser parser)
        {
            var options = new ProgressBarOptions
            {
                ForegroundColor   = ConsoleColor.Yellow,
                BackgroundColor   = ConsoleColor.DarkYellow,
                ProgressCharacter = '─'
            };

            var        cts    = new CancellationTokenSource();
            var        mgr    = new MangaDownloader(null, new List <ISeriesParser>());
            PageGetter getter = Client2.GetDocumentAsync;
            var        doc    = await getter("https://www.mangaeden.com/en/en-manga/naruto/");

            //var doc = await getter("http://manganelo.com/manga/read_naruto_manga_online_free3");
            var metaData = parser.GetMetaData(doc);

            var service = new MetaDataService(new List <IMetaDataParser>()
            {
                parser
            });

            var wasCalled = false;

            IProgress <double> GetProgress(string context)
            {
                if (context == "Instances" && !wasCalled)
                {
                    wasCalled = true;
                    Console.WriteLine($"Handling {context}");
                    Console.WriteLine();
                    return(new ConsoleProgress(options, context));
                }

                if (context == "MetaData")
                {
                    Console.WriteLine();
                    Console.WriteLine();
                    Console.WriteLine($"Handling {context}");
                    Console.WriteLine();
                    return(new ConsoleProgress(options, context));
                }

                cts.Cancel();
                return(null);
            }

            service.ReportProgressFactory = GetProgress;
            var t = service.Start(parser.ProviderName, cts.Token);

            try
            {
                await t;
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }

            var res2 = await service.GetMetaData();

            var test   = res2.Where(a => a.metaData.Genres.HasFlag(Genre.MartialArts)).ToList();
            var unused = Enum.GetValues(typeof(Genre)).Cast <Genre>().Where(e => e != Genre.None).ToDictionary(g => g, _ => false);

            foreach (var valueTuple in res2)
            {
                foreach (var genre in valueTuple.metaData.Genres.Split())
                {
                    if (unused.ContainsKey(genre))
                    {
                        unused[genre] = true;
                    }
                }
            }

            var areNotUsed = unused.Where(kvp => !kvp.Value).Select(kvp => kvp.Key).ToList();

            Console.Read();
        }
        public async Task <string> GetImageUrl(int pageNumber, PageGetter getPage)
        {
            var page = await getPage(Url + "/" + pageNumber);

            return($"https:{page.GetElementById("mainImg").GetAttribute("src")}");
        }
        private static async Task <IEnumerable <(string name, string url)> > GetMangaList(PageGetter doc, string url)
        {
            var page = await doc(url);

            return(page
                   .GetElementById("mangaList")
                   .GetElementsByTagName("tbody")
                   .First()
                   .Elements("tr")
                   .Select(tr => tr.Element("td").Element("a"))
                   .Select(a => (a.TextContent, $"https://www.mangaeden.com{a.GetAttribute("href")}")));
        }
Beispiel #30
0
 private ParserServiceBase(PageGetter getter) =>