public static IEnumerable <CWMArticle> ScrapeIssueArticles( CWMIssue cwmIssue) { var context = BrowsingContext.New( Configuration.Default.WithDefaultLoader()); var downloadPageUrl = cwmIssue.IssuePageAbsoluteUrl; using (var document = context .OpenAsync(downloadPageUrl) .GetAwaiter() .GetResult()) { var canvasWrapper = document .GetElementById("canvas-wrapper"); var contentDiv = canvasWrapper .QuerySelector( "div#canvas > " + "div#page-body-wrapper > " + "div#page-body > " + "div#content-wrapper > " + "div#content"); var mainContentDiv = contentDiv.Children[2]; var productBlockContentElement = mainContentDiv .QuerySelector( "div#productWrapper > " + "div.product-description > " + "div.sqs-layout > " + "div.row.sqs-row > " + "div.col > " + "div.sqs-block.html-block > " + "div.sqs-block-content"); var currentArticleCategory = "Unknown"; var magazineSections = ValueEnum .EnumerateValues <MagazineSection, string>() .ToArray(); foreach (var productBlockElement in productBlockContentElement.Children) { var articeInfoStr = productBlockElement .TextContent .Replace(" ", "") .Trim(); var isMagazineSection = magazineSections .Contains( articeInfoStr, new FuzzyStringMatchingComparer(2)); if (isMagazineSection) { currentArticleCategory = articeInfoStr; continue; } var splitTerms = articeInfoStr.Split('-'); if (splitTerms.Length == 2) { var articleName = splitTerms[0].Trim(); var articleAuthor = splitTerms[1].Trim(); yield return(new CWMArticle( currentArticleCategory, articleName, articleAuthor, cwmIssue)); } else { var articleName = articeInfoStr.Trim(); yield return(new CWMArticle( currentArticleCategory, articleName, "unknown", cwmIssue)); } } } }
public static IReadOnlyList <CWMArticle> GetIssueArticles( this CWMIssue @this) { return(CWMInterpreter.ScrapeIssueArticles(@this) .ToArray()); }