Beispiel #1
0
        public static async Task <int> ExtractSemestersCount()
        {
            await CoursesClient.LazyRefresh();

            var profileHtml = await CoursesClient.SessionClient.GetStringAsyncHttp(CoursesProfileAllCoursesUrl);

            var doc = new HtmlDocument();

            doc.LoadHtml(profileHtml);
            var coursesLinks = doc.DocumentNode.SelectNodes(XPathFilterProfileCoursesLinks);

            var semestersCount =
                SemesterCoursesLongFormNameRegex
                .Reverse()
                .Select(pair => new
            {
                Number  = pair.Key,
                Courses = coursesLinks
                          .Where(node => Regex.IsMatch(node.InnerText, pair.Value))
            })
                .First(semester => semester.Courses.Any())
                .Number;

            return(semestersCount);
        }
Beispiel #2
0
        public static async Task <List <ICourseLink> > ExtractCourses()
        {
            await CoursesClient.LazyRefresh();

            var coursesPageText = await CoursesClient.SessionClient.GetStringAsyncHttp(CoursesProfileAllCoursesUrl);

            CoursesClient.FindSessKey(coursesPageText);
            var doc = new HtmlDocument();

            doc.LoadHtml(coursesPageText);
            var coursesLinksNodes = doc.DocumentNode.SelectNodes(XPathFilterProfileCoursesLinks);

            var coursesLinksTasks = coursesLinksNodes
                                    .Where(l => Regex.IsMatch(l.InnerText, CurrentSemesterCourseLongFormNameRegex))
                                    .Select(async(l, _) =>
            {
                var longName = ExtractName(l);
                var url      = ExtractHref(l);

                string shortName;
                using (var courseHtml = await CoursesClient.SessionClient.GetStreamAsyncHttp(url))
                {
                    shortName = LazyHtmlParser.FindShortNameInHtml(courseHtml);
                    shortName = CleanName(shortName, true);
                }

                return(new CourseLink($"{shortName,-8}{longName}", url));
            });

            var coursesLinks = await Task.WhenAll(coursesLinksTasks);

            SharedVars.Courses = coursesLinks.ToList <ICourseLink>();

            return(SharedVars.Courses);
        }
Beispiel #3
0
        public static async Task Init()
        {
            Console.OutputEncoding = Encoding.UTF8;

            await CoursesClient.LazyRefresh();

            SharedVars.CurrentSemesterNumber = await CoursesExtractor.ExtractSemestersCount();
        }
        private async Task ExtractExternalUrlAndTitle()
        {
            await CoursesClient.LazyRefresh();

            using (var html = await CoursesClient.SessionClient.GetStreamAsyncHttp(Url))
            {
                var title = LazyHtmlParser.FindTitleInHtml(html);
                Title = title;

                IsTitleExtracted = true;
            }
        }
        protected override async Task GetAndSaveFile(string filename)
        {
            await CoursesClient.LazyRefresh();

            ReportProgress(0, 1024);

            var html = await ExtractMainHtml();

            ReportProgress(html.Length / 2.0, html.Length);

            var pdf = GeneratePdf(html);

            ReportProgress(pdf.Length * 0.9, pdf.Length);

            File.WriteAllBytes(filename, pdf);

            ReportProgress(pdf.Length, pdf.Length);
        }
Beispiel #6
0
        private async Task ExtractExternalUrlAndTitle()
        {
            await CoursesClient.LazyRefresh();

            // Go to url as clicked on courses
            using (var coursesResponse = await CoursesClient.SessionClient.GetHeadersAsyncHttp(Url))
            {
                var externalResponse = coursesResponse;

                // if redirected to courses, grab url workaround, get title from there
                if (coursesResponse.RequestMessage.RequestUri.Host == CoursesClient.SessionClient.BaseAddress.Host)
                {
                    using (var coursesHtml = await coursesResponse.Content.ReadAsStreamAsync())
                    {
                        var urlWorkaround = LazyHtmlParser.FindUrlWorkaroundInHtml(coursesHtml);
                        ExternalUrl = urlWorkaround;

                        // Go to url as clicked on workaround url
                        externalResponse = await CoursesClient.SessionClient.GetHeadersAsyncHttp(urlWorkaround);
                    }
                }
                // else redirected to external link, just save it
                else
                {
                    var url = coursesResponse.RequestMessage.RequestUri.ToString();
                    ExternalUrl = url;
                }

                using (externalResponse)
                {
                    using (var externalHtml = await externalResponse.Content.ReadAsStreamAsync())
                    {
                        var title = LazyHtmlParser.FindTitleInHtml(externalHtml);
                        Title = title;

                        AreExternalUrlAndTitleExtracted = true;
                    }
                }
            }
        }
Beispiel #7
0
        public async Task Download(string[] middlePath)
        {
            await CoursesClient.LazyRefresh();

            await GetNameFromUrlNow();

            string filename;

            switch (SharedVars.NamingMethod)
            {
            case NamingMethod.CoursesName:
                filename = FileFromCourses.FileNameAndExtensionOnly;
                break;

            case NamingMethod.UrlName:
                filename = FileFromUrl.FileNameAndExtensionOnly;
                break;

            default:
                filename = FileFromUrl.FileNameAndExtensionOnly;
                break;
            }

            var filepath = FileNameHelpers.FullyPrepareFile(filename, middlePath);

            FileFromUrl.FullPathAndFileAndExtension = FileFromCourses.FullPathAndFileAndExtension = filepath;

            var fileInfo = new FileInfo(filepath);

            if (!fileInfo.Directory?.Exists ?? false)
            {
                fileInfo.Directory.Create();
            }

            await GetAndSaveFile(filepath);
        }
        protected override async Task GetAndSaveFile(string filename)
        {
            await CoursesClient.LazyRefresh();

            CoursesClient.AddEvent(DownloadProgressTracker);

            // we request headers because otherwise the file is first put into memory so we lose the whole point of streams
            // since we are using ReadAsStreamAsync, nothing is loaded into memory
            // although we can't use HeadersResponse from previously because that way we can't track progress
            using (var file = await CoursesClient.SessionClient.GetHeadersAsyncHttp(DownloadUrl))
            {
                using (var fileStream = File.Create(filename))
                {
                    using (var content = await file.Content.ReadAsStreamAsync())
                    {
                        await content.CopyToAsync(fileStream);

                        await fileStream.FlushAsync();
                    }
                }
            }

            CoursesClient.RemoveEvent(DownloadProgressTracker);
        }
        public static async Task <List <ISection> > ExtractSectionsForCourse(ICourseLink courseLink)
        {
            await CoursesClient.LazyRefresh();

            var coursePageText = await CoursesClient.SessionClient.GetStringAsyncHttp(courseLink.Url);

            CoursesClient.FindSessKey(coursePageText);
            var doc = new HtmlDocument();

            doc.LoadHtml(coursePageText);
            var headersLinks = doc.DocumentNode.SelectNodes(XPathFilterLinksHeadersFolders);

            SharedVars.Sections = new List <ISection>();
            var currentSection = new Section();

            SharedVars.Sections.Add(currentSection);

            foreach (var headerLink in headersLinks)
            {
                var itemType = TryGetItemType(headerLink);

                string innerText = null;
                string href      = null;

                if (itemType != ItemType.Header)
                {
                    innerText = headerLink.Descendants().First(d => d.Name == "#text").InnerText.DecodeHtml();
                    href      = headerLink.Attributes.First(l => l.Name == "href").Value;
                }

                switch (itemType)
                {
                case ItemType.Header:
                    var headerName = headerLink.InnerText.DecodeHtml();
                    var headerTag  = headerLink.OriginalName;
                    var headerId   = FindIdFromAncestors(headerLink);
                    currentSection = new Section(new Header(headerName, headerTag, headerId), courseLink);
                    SharedVars.Sections.Add(currentSection);
                    break;

                case ItemType.File:
                    currentSection.Links.Add(new FileLink(innerText, href, currentSection));
                    break;

                case ItemType.Folder:
                    currentSection.Links.Add(new FolderLink(innerText, href, currentSection));
                    break;

                case ItemType.Url:
                    currentSection.Links.Add(new ExternalLink(innerText, href, currentSection));
                    break;

                case ItemType.Page:
                    currentSection.Links.Add(new PageLink(innerText, href, currentSection));
                    break;

                default:
                    throw new ArgumentOutOfRangeException();
                }
            }

            SharedVars.Sections = SharedVars.Sections.Where(s => s.Links.Any()).ToList();

            return(SharedVars.Sections);
        }