public static async Task <int> ExtractSemestersCount() { await CoursesClient.LazyRefresh(); var profileHtml = await CoursesClient.SessionClient.GetStringAsyncHttp(CoursesProfileAllCoursesUrl); var doc = new HtmlDocument(); doc.LoadHtml(profileHtml); var coursesLinks = doc.DocumentNode.SelectNodes(XPathFilterProfileCoursesLinks); var semestersCount = SemesterCoursesLongFormNameRegex .Reverse() .Select(pair => new { Number = pair.Key, Courses = coursesLinks .Where(node => Regex.IsMatch(node.InnerText, pair.Value)) }) .First(semester => semester.Courses.Any()) .Number; return(semestersCount); }
public static async Task <List <ICourseLink> > ExtractCourses() { await CoursesClient.LazyRefresh(); var coursesPageText = await CoursesClient.SessionClient.GetStringAsyncHttp(CoursesProfileAllCoursesUrl); CoursesClient.FindSessKey(coursesPageText); var doc = new HtmlDocument(); doc.LoadHtml(coursesPageText); var coursesLinksNodes = doc.DocumentNode.SelectNodes(XPathFilterProfileCoursesLinks); var coursesLinksTasks = coursesLinksNodes .Where(l => Regex.IsMatch(l.InnerText, CurrentSemesterCourseLongFormNameRegex)) .Select(async(l, _) => { var longName = ExtractName(l); var url = ExtractHref(l); string shortName; using (var courseHtml = await CoursesClient.SessionClient.GetStreamAsyncHttp(url)) { shortName = LazyHtmlParser.FindShortNameInHtml(courseHtml); shortName = CleanName(shortName, true); } return(new CourseLink($"{shortName,-8}{longName}", url)); }); var coursesLinks = await Task.WhenAll(coursesLinksTasks); SharedVars.Courses = coursesLinks.ToList <ICourseLink>(); return(SharedVars.Courses); }
public static async Task Init() { Console.OutputEncoding = Encoding.UTF8; await CoursesClient.LazyRefresh(); SharedVars.CurrentSemesterNumber = await CoursesExtractor.ExtractSemestersCount(); }
private async Task ExtractExternalUrlAndTitle() { await CoursesClient.LazyRefresh(); using (var html = await CoursesClient.SessionClient.GetStreamAsyncHttp(Url)) { var title = LazyHtmlParser.FindTitleInHtml(html); Title = title; IsTitleExtracted = true; } }
protected override async Task GetAndSaveFile(string filename) { await CoursesClient.LazyRefresh(); ReportProgress(0, 1024); var html = await ExtractMainHtml(); ReportProgress(html.Length / 2.0, html.Length); var pdf = GeneratePdf(html); ReportProgress(pdf.Length * 0.9, pdf.Length); File.WriteAllBytes(filename, pdf); ReportProgress(pdf.Length, pdf.Length); }
private async Task ExtractExternalUrlAndTitle() { await CoursesClient.LazyRefresh(); // Go to url as clicked on courses using (var coursesResponse = await CoursesClient.SessionClient.GetHeadersAsyncHttp(Url)) { var externalResponse = coursesResponse; // if redirected to courses, grab url workaround, get title from there if (coursesResponse.RequestMessage.RequestUri.Host == CoursesClient.SessionClient.BaseAddress.Host) { using (var coursesHtml = await coursesResponse.Content.ReadAsStreamAsync()) { var urlWorkaround = LazyHtmlParser.FindUrlWorkaroundInHtml(coursesHtml); ExternalUrl = urlWorkaround; // Go to url as clicked on workaround url externalResponse = await CoursesClient.SessionClient.GetHeadersAsyncHttp(urlWorkaround); } } // else redirected to external link, just save it else { var url = coursesResponse.RequestMessage.RequestUri.ToString(); ExternalUrl = url; } using (externalResponse) { using (var externalHtml = await externalResponse.Content.ReadAsStreamAsync()) { var title = LazyHtmlParser.FindTitleInHtml(externalHtml); Title = title; AreExternalUrlAndTitleExtracted = true; } } } }
public async Task Download(string[] middlePath) { await CoursesClient.LazyRefresh(); await GetNameFromUrlNow(); string filename; switch (SharedVars.NamingMethod) { case NamingMethod.CoursesName: filename = FileFromCourses.FileNameAndExtensionOnly; break; case NamingMethod.UrlName: filename = FileFromUrl.FileNameAndExtensionOnly; break; default: filename = FileFromUrl.FileNameAndExtensionOnly; break; } var filepath = FileNameHelpers.FullyPrepareFile(filename, middlePath); FileFromUrl.FullPathAndFileAndExtension = FileFromCourses.FullPathAndFileAndExtension = filepath; var fileInfo = new FileInfo(filepath); if (!fileInfo.Directory?.Exists ?? false) { fileInfo.Directory.Create(); } await GetAndSaveFile(filepath); }
protected override async Task GetAndSaveFile(string filename) { await CoursesClient.LazyRefresh(); CoursesClient.AddEvent(DownloadProgressTracker); // we request headers because otherwise the file is first put into memory so we lose the whole point of streams // since we are using ReadAsStreamAsync, nothing is loaded into memory // although we can't use HeadersResponse from previously because that way we can't track progress using (var file = await CoursesClient.SessionClient.GetHeadersAsyncHttp(DownloadUrl)) { using (var fileStream = File.Create(filename)) { using (var content = await file.Content.ReadAsStreamAsync()) { await content.CopyToAsync(fileStream); await fileStream.FlushAsync(); } } } CoursesClient.RemoveEvent(DownloadProgressTracker); }
public static async Task <List <ISection> > ExtractSectionsForCourse(ICourseLink courseLink) { await CoursesClient.LazyRefresh(); var coursePageText = await CoursesClient.SessionClient.GetStringAsyncHttp(courseLink.Url); CoursesClient.FindSessKey(coursePageText); var doc = new HtmlDocument(); doc.LoadHtml(coursePageText); var headersLinks = doc.DocumentNode.SelectNodes(XPathFilterLinksHeadersFolders); SharedVars.Sections = new List <ISection>(); var currentSection = new Section(); SharedVars.Sections.Add(currentSection); foreach (var headerLink in headersLinks) { var itemType = TryGetItemType(headerLink); string innerText = null; string href = null; if (itemType != ItemType.Header) { innerText = headerLink.Descendants().First(d => d.Name == "#text").InnerText.DecodeHtml(); href = headerLink.Attributes.First(l => l.Name == "href").Value; } switch (itemType) { case ItemType.Header: var headerName = headerLink.InnerText.DecodeHtml(); var headerTag = headerLink.OriginalName; var headerId = FindIdFromAncestors(headerLink); currentSection = new Section(new Header(headerName, headerTag, headerId), courseLink); SharedVars.Sections.Add(currentSection); break; case ItemType.File: currentSection.Links.Add(new FileLink(innerText, href, currentSection)); break; case ItemType.Folder: currentSection.Links.Add(new FolderLink(innerText, href, currentSection)); break; case ItemType.Url: currentSection.Links.Add(new ExternalLink(innerText, href, currentSection)); break; case ItemType.Page: currentSection.Links.Add(new PageLink(innerText, href, currentSection)); break; default: throw new ArgumentOutOfRangeException(); } } SharedVars.Sections = SharedVars.Sections.Where(s => s.Links.Any()).ToList(); return(SharedVars.Sections); }