private static async Task Download() { var options = new ProgressBarOptions { ForegroundColor = ConsoleColor.Yellow, BackgroundColor = ConsoleColor.DarkYellow, ProgressCharacter = '─' }; Console.WriteLine("Hello World!"); Console.WriteLine(""); IFileSystem sys = new FileSystem(); var fox = new Panda.SeriesParser(); var mgr = new MangaDownloader(null, new List <ISeriesParser>()); PageGetter getter = Client2.GetDocumentAsync; var chapter = fox.CreateChapter("https://www.mangapanda.com/the-heroic-legend-of-arslan-arakawa-hiromu/63"); var nrPages = await chapter.GetPageCount(getter); var p1 = await chapter.GetImageUrl(6, getter); var p2 = await chapter.GetImageUrl(7, getter); var p3 = await chapter.GetImageUrl(19, getter); var mrg = new MangaDownloader(sys, new List <ISeriesParser> { fox }); }
public async Task <string> GetImageUrl(int pageNumber, PageGetter getPage) { var page = await getPage(Url + "/" + pageNumber); var imageUrl = page.GetElementById("img").Attributes.First(a => a.Name == "src").Value; return(imageUrl); }
public async Task <int> GetPageCount(PageGetter getPage) { var doc = await getPage(Url); var selectNode = doc.GetElementsByTagName("select").First(); //this is a bit iffy return(selectNode.Children.Count(n => n.LocalName == "option") - 1); }
public async Task <int> GetPageCount(PageGetter getPage) { var doc = await getPage(Url); var vungdoc = doc.GetElementById("vungdoc") ?? doc.GetElementsByClassName("container-chapter-reader").First(); return(vungdoc.Children.Count(c => c.LocalName == "img")); }
public async Task <int> GetPageCount(PageGetter getPage) { var doc = await getPage(Url); return(doc .GetElementById("pageMenu") .Children .Count(n => n.LocalName == "option")); }
private static async Task <IEnumerable <(string, string)> > GetForUrl(PageGetter getter, string letter) { var mangaIndex = await getter($"http://funmanga.com/manga-list/{letter}").ConfigureAwait(false); return(mangaIndex .GetElementsByTagName("ul") .Where(n => n.HasClass("manga-list")) .SelectMany(listNode => listNode.Elements("li")) .Select(n => n.Element("a")) .Select(e => (e.TextContent, e.Attributes.FirstOrDefault(a => a.Name == "href")?.Value))); }
public virtual void InitPageGetter() { _upcGetter = new PageGetter { Store = "upc", }; _getter = new PageGetter { Store = StoreName, }; }
public async Task <int> GetPageCount(PageGetter getPage) { var doc = await getPage(Url); return(doc .GetElementById("top_bar") .Element("div") .Element("div") .Element("select") .Elements("option") .Count() - 1); }
public async Task <IReadOnlyList <T> > GetAllPagesAsync <T>(PageGetter <T> pageGetter, decimal pageSize, CancellationToken token, ILog log, bool throwIfCountMismatch = false) { var firstPage = await pageGetter(pageSize, 0); var total = firstPage.TotalCount; log.Debug($"{0}/{total}, {firstPage.Count} items"); var result = new HashSet <T>((int)total); result.AddUnique(firstPage, log); var remainingPages = total / pageSize; var pageTasks = Enumerable.Range(1, (int)remainingPages) .Select(pageNumber => (ulong)pageNumber * pageSize) .Select(async offset => { VkCollection <T> page = null; try { page = await pageGetter(pageSize, offset); log.Debug($"{offset}/{total}, {page.Count} items"); } catch (Exception e) { log.Warn($"Error getting page {offset}/{total}. Getting page by smaller chunks...", e); var items = await GetSinglePage(pageGetter, offset, pageSize, token, log); page = new VkCollection <T>(0, items); } token.ThrowIfCancellationRequested(); return(page ?? new VkCollection <T>(0, new List <T>())); }); var pages = await Task.WhenAll(pageTasks); foreach (var page in pages) { result.AddUnique(page, log); } if ((int)total != result.Count) { var message = $"Expected {total} items, got {result.Count}. Maybe they were created/deleted, or it's VK bugs again."; log.Warn(message); if (throwIfCountMismatch) { throw new InvalidOperationException(message); } } return(result.ToList()); }
private static async Task <IEnumerable <(string, string)> > GetForUrl(PageGetter getter, string url) { var index = await getter(url).ConfigureAwait(false); return(index .GetElementById("mangalist") .Element("ul") .Elements("li") .Select(n => { var element = n.GetElementsByClassName("manga_text").First().Element("a"); return (element.TextContent, $"http:{element.GetAttribute("href")}"); }) .ToList()); }
private static async Task <IEnumerable <PageStatus> > HitPages() { var listPageStatus = new List <PageStatus>(); try { using (var repoPages = new PagesRepository()) { using (var repoPageStatus = new PageStatusRepository()) { //Get page with Monitor bit on. var pages = repoPages.GetAllMonitor(); //var pages = repoPages.GetAllProdMonitor(); //var pages = repoPages.GetAllStgMonitor(); var pageGetter = new PageGetter(); foreach (var page in pages) { var pageStats = new PageStats { Url = page.Url }; var stats = await pageGetter.HTTP_GET(pageStats); var pageStatus = new PageStatus { Url = stats.Url, ResponseTime = stats.ResponseTime, ContentLength = stats.ContentLength, ExceptionMessage = stats.ExceptionMessage, Status = stats.Status.ToString(), Created = DateTime.Now }; repoPageStatus.Add(pageStatus); repoPageStatus.SaveChanges(); listPageStatus.Add(pageStatus); } } } } catch (Exception ex) { // ReSharper disable once UnusedVariable var msg = ex.Message; } return(listPageStatus); }
public async Task <string> GetImageUrl(int pageNumber, PageGetter getPage) { var doc = await getPage(Url); var container = doc .GetElementById("vungdoc") ?? doc.GetElementsByClassName("container-chapter-reader").First(); return(container .Children .Where(c => c.LocalName == "img") .Skip(pageNumber - 1) .First() .GetAttribute("src")); }
public async Task <IEnumerable <(string name, string url)> > ListInstances(PageGetter pageGetter, IProgress <double> progress = null) { var page = await pageGetter("https://www.mangaeden.com/en/en-directory/?page=1"); var pag = page.GetElementsByClassName("pagination pagination_bottom").First(); var count = pag.Children.Length; var secondLast = pag.Children[count - 2]; var max = int.Parse(secondLast.TextContent); return(await Enumerable.Range(1, max) .Select(i => $"https://www.mangaeden.com/en/en-directory/?page={i}") .Batch(20) .Transform(u => GetMangaList(pageGetter, u), progress)); }
public async Task <IEnumerable <(string name, string url)> > ListInstances(PageGetter pageGetter, IProgress <double> progress = null) { var mangaIndex = await pageGetter("http://fanfox.net/directory/").ConfigureAwait(false); var lastIndex = mangaIndex.GetElementById("nav").Element("ul").Elements("li").ToList(); var secondLast = lastIndex[lastIndex.Count - 2]; var index = int.Parse(secondLast.Element("a").TextContent.Trim()); return(await Enumerable.Range(1, index) .Select(i => $"http://fanfox.net/directory/{i}.htm") .Select(url => GetForUrl(pageGetter, url)) .WhenAll(progress) .Flatten() .ToListAsync()); }
public async Task <string> GetImageUrl(int pageNumber, PageGetter getPage) { var strippedUrl = Url.Split('/'); var url = string.Join("/", strippedUrl.Take(strippedUrl.Length - 1)); var page = await getPage(url + "/" + pageNumber + ".html"); var imageUrl = page .GetElementById("viewer") .Element("div")//class read_img .Element("a") .Element("img") .Attributes.First(a => a.Name == "src").Value; return(imageUrl); }
public MetaDataService(ICollection <IMetaDataParser> metaDataParsers, PageGetter getter) { MetaDataParsers = metaDataParsers.ToDictionary(p => p.ProviderName); PageGetter = getter; Path = Combine(DirectoryPath, "meta.data"); if (!Directory.Exists(DirectoryPath)) { Directory.CreateDirectory(DirectoryPath); } if (!File.Exists(Path)) { WriteToDisk(new (string, MetaData)[0]).GetAwaiter().GetResult();
static void Main(string[] args) { if (args.Length == 0) { ShowHelp(); } if (args.Length > 0 && (args[0] == "-h" || args[0] == "--help")) { ShowHelp(); } var url = args[0]; var enableFullLogging = args.Length > 1 && (args[1] == "-f" || args[1] == "--full"); ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11; var getter = new PageGetter( new WebClientWrapper(), new HtmlDocumentWrapper(), new Website(url), new Log(enableFullLogging)); var pages = getter.GetPages(url); pages.Sort(); var checker = new PageChecker( new LinkDictionary(), new WebClientWrapper(), new HtmlDocumentWrapper(), new Log(enableFullLogging), new Website(url)); foreach (var page in pages) { checker.Check(page); } Console.WriteLine("Press any key to exit..."); Console.ReadKey(); }
private static void DirectAccess() { var repoPages = new PagesRepository(); //var pages = repoPages.GetAllProdMonitor(); var pages = repoPages.GetAllStgMonitor(); var repoPageStatus = new PageStatusRepository(); var pageGetter = new PageGetter(); var counter = 1000; while (counter > 0) { foreach (var page in pages) { Console.WriteLine($"Page: {page.Url}"); var pageStats = new PageStats { Url = page.Url }; var result = pageGetter.HTTP_GET(pageStats); var stats = result.Result; var pageStatus = new PageStatus { Url = stats.Url, ResponseTime = stats.ResponseTime, ContentLength = stats.ContentLength, ExceptionMessage = stats.ExceptionMessage, Status = stats.Status.ToString(), Created = DateTime.Now }; repoPageStatus.Add(pageStatus); repoPageStatus.SaveChanges(); Console.WriteLine($"seconds: {stats.ResponseTime} length: {stats.ContentLength} time: {DateTime.Now}"); Console.WriteLine(); } Thread.Sleep(10000); counter--; } }
public async Task <IEnumerable <(string name, string url)> > ListInstances(PageGetter pageGetter, IProgress <double> progress = null) { var mangaIndex = await pageGetter("http://www.mangapanda.com/alphabetical").ConfigureAwait(false); progress?.Report(0.9); var i = mangaIndex .GetElementById("wrapper_body") .Element("div") //content_bloc2 .Elements("div") .Where(n => n.HasClass("series_col")) //series_col .SelectMany(n => n.Elements("div") /* series_alpha */) .SelectMany(n => n.Elements("ul")) .SelectMany(ul => ul.Elements("li")) .Select(li => li.Element("a")) .Select(a => (a.TextContent, $"http://www.mangapanda.com{a.GetAttribute("href")}")) .Distinct() .ToList(); progress?.Report(1); return(i); }
private static async Task <IEnumerable <AdHocPageStatu> > HitPage(string pageUrl, string user) { var listPageStatus = new List <AdHocPageStatu>(); try { using (var adHocPageStatusRepository = new AdHocPageStatusRepository()) { var pageGetter = new PageGetter(); var pageStats = new PageStats { Url = pageUrl }; var stats = await pageGetter.HTTP_GET(pageStats); var pageStatus = new AdHocPageStatu { Url = stats.Url, ResponseTime = stats.ResponseTime, ContentLength = stats.ContentLength, ExceptionMessage = stats.ExceptionMessage, Status = stats.Status.ToString(), Created = DateTime.Now, User = user }; adHocPageStatusRepository.Add(pageStatus); adHocPageStatusRepository.SaveChanges(); listPageStatus.Add(pageStatus); } } catch (Exception ex) { // ReSharper disable once UnusedVariable var msg = ex.Message; } return(listPageStatus); }
private async Task <IEnumerable <T> > GetSinglePage <T>(PageGetter <T> pageGetter, decimal offset, decimal length, CancellationToken token, ILog log) { var pageTasks = Enumerable.Range((int)offset, (int)length) .Select(async x => { VkCollection <T> page = null; try { page = await pageGetter(1, offset); log.Debug($"{x}/{length} item from bad page"); } catch (Exception e) { log.Warn($"Error getting item {x}", e); } token.ThrowIfCancellationRequested(); return(page ?? new VkCollection <T>(0, new List <T>())); }); var pages = await Task.WhenAll(pageTasks); return(pages.SelectMany(x => x)); }
public List <MinimizedDiaryPageDto> GetDiaryMinimizedPageList(long diaryId) { PageGetter pageGetter = new PageGetter(); return(pageGetter.GetMinimizedDiaryPages(diaryId)); }
public async Task <int> GetPageCount(PageGetter getPage) { var page = await getPage(Url); return(page?.GetElementById("pageSelect").Elements("option").Count() ?? 0); }
public DiaryPageDto GetPageById(long diaryId, long pageId) { PageGetter pageGetter = new PageGetter(); return(pageGetter.GetPageById(diaryId, pageId)); }
public async Task <IEnumerable <(string name, string url)> > ListInstances(PageGetter pageGetter, IProgress <double> progress = null) { return(await GetAlphabet().Select(s => GetForUrl(pageGetter, s)).WhenAll(progress).Flatten().ConfigureAwait(false)); }
public MangaDownloader(IFileSystem fileSystem, IEnumerable <ISeriesParser> parsers, PageGetter getter) : base(fileSystem, getter) =>
private static async Task GetMetaData(IMetaDataParser parser) { var options = new ProgressBarOptions { ForegroundColor = ConsoleColor.Yellow, BackgroundColor = ConsoleColor.DarkYellow, ProgressCharacter = '─' }; var cts = new CancellationTokenSource(); var mgr = new MangaDownloader(null, new List <ISeriesParser>()); PageGetter getter = Client2.GetDocumentAsync; var doc = await getter("https://www.mangaeden.com/en/en-manga/naruto/"); //var doc = await getter("http://manganelo.com/manga/read_naruto_manga_online_free3"); var metaData = parser.GetMetaData(doc); var service = new MetaDataService(new List <IMetaDataParser>() { parser }); var wasCalled = false; IProgress <double> GetProgress(string context) { if (context == "Instances" && !wasCalled) { wasCalled = true; Console.WriteLine($"Handling {context}"); Console.WriteLine(); return(new ConsoleProgress(options, context)); } if (context == "MetaData") { Console.WriteLine(); Console.WriteLine(); Console.WriteLine($"Handling {context}"); Console.WriteLine(); return(new ConsoleProgress(options, context)); } cts.Cancel(); return(null); } service.ReportProgressFactory = GetProgress; var t = service.Start(parser.ProviderName, cts.Token); try { await t; } catch (Exception e) { Console.WriteLine(e.Message); } var res2 = await service.GetMetaData(); var test = res2.Where(a => a.metaData.Genres.HasFlag(Genre.MartialArts)).ToList(); var unused = Enum.GetValues(typeof(Genre)).Cast <Genre>().Where(e => e != Genre.None).ToDictionary(g => g, _ => false); foreach (var valueTuple in res2) { foreach (var genre in valueTuple.metaData.Genres.Split()) { if (unused.ContainsKey(genre)) { unused[genre] = true; } } } var areNotUsed = unused.Where(kvp => !kvp.Value).Select(kvp => kvp.Key).ToList(); Console.Read(); }
public async Task <string> GetImageUrl(int pageNumber, PageGetter getPage) { var page = await getPage(Url + "/" + pageNumber); return($"https:{page.GetElementById("mainImg").GetAttribute("src")}"); }
private static async Task <IEnumerable <(string name, string url)> > GetMangaList(PageGetter doc, string url) { var page = await doc(url); return(page .GetElementById("mangaList") .GetElementsByTagName("tbody") .First() .Elements("tr") .Select(tr => tr.Element("td").Element("a")) .Select(a => (a.TextContent, $"https://www.mangaeden.com{a.GetAttribute("href")}"))); }
private ParserServiceBase(PageGetter getter) =>