/// <summary> /// Asynchronoulsy Returns a d3 tree node headlines object for the given url,site name and headline selector. /// Fetches the html of the webpage asynchornously and extracts the news headline texts values /// using the provided selector (uses Html Agility pack nuget package). The list of headlines is /// cleaned and limited to the set headline count value. A D3TreeNode is then constrcuted for the /// headlines and returned. /// If in error, returns an empty D3TreeNode object. /// </summary> /// <param name="siteName"></param> /// <param name="url"></param> /// <param name="headlineSelector"></param> /// <returns>Task<D3TreeNode></returns> private async Task <D3TreeNode> ConstructHeadlineData(string siteName, string url, string headlineSelector) { try { /* Asynchoronusly read webpage and extract news headlines */ //TODO: Refactor methods to form a pipeed chain of functions var doc = await WebScraper.GetHtmlDocument(url); var rawList = WebScraper.GetRawHeadlines(doc, headlineSelector); var cleanedList = WebScraper.CleanHeadlines(rawList); var list = WebScraper.GetLimitedHeadlines(cleanedList, 12); /* Construct D3 tree data from news headline list*/ var headlineNodes = D3TreeNodeMaker.GetNodesFromStringList(list); var node = D3TreeNodeMaker.ConstructNode(siteName, "null", headlineNodes); return(node); } catch (Exception) { //TODO: Propagate error to caller return(D3TreeNode.Empty()); } }
public static int InjectScraperServiceDependecies() { WebScraper.InjectDependencies(); D3TreeNodeMaker.InjectDependencies(); return(0); }