/// <summary> /// Calculates number of occurrences of each word listed in keywords Meta Tag. /// </summary> /// <returns>Dictionary of each word listed in keywords meta tags.</returns> public override Dictionary <string, int> CalculateOccuranceInMetaTag() { if (IsFilterStopWords && stopwordDictionary == null) { stopwordDictionary = CoreUtil.ProcessStopWords(StopWords); } if (htmlPage == null) { var htmlWeb = new HtmlWeb(); var lastStatusCode = HttpStatusCode.OK; htmlWeb.PostResponse = (request, response) => { if (response != null) { lastStatusCode = response.StatusCode; } }; htmlPage = htmlWeb.Load(Input); if (lastStatusCode != HttpStatusCode.OK) { throw new Exception($"{Constant.WEB_REQUEST_ERROR_MESSAGE} {lastStatusCode}"); } } var metaTagCollection = htmlPage.DocumentNode.SelectNodes("//meta"); return(CoreUtil.ProcessMetaTag(metaTagCollection, stopwordDictionary, allWordDictionary)); }
/// <summary> /// Calculates number of occurrences of each word in a webpage. /// </summary> /// <returns>Dictionary of each word.</returns> public override Dictionary <string, int> CalculateOccuranceInText() { if (IsFilterStopWords && stopwordDictionary == null) { stopwordDictionary = CoreUtil.ProcessStopWords(StopWords); } if (htmlPage == null) { var htmlWeb = new HtmlWeb(); var lastStatusCode = HttpStatusCode.OK; htmlWeb.PostResponse = (request, response) => { if (response != null) { lastStatusCode = response.StatusCode; } }; htmlPage = htmlWeb.Load(Input); if (lastStatusCode != HttpStatusCode.OK) { throw new Exception($"{Constant.WEB_REQUEST_ERROR_MESSAGE} {lastStatusCode}"); } } var bodyText = htmlPage.DocumentNode.SelectSingleNode("//body").InnerText; allWordDictionary = CoreUtil.ProcessInput(bodyText, stopwordDictionary); return(allWordDictionary); }
/// <summary> /// Calculates number of occurrences of each word in a english text. /// </summary> /// <returns>Dictionary of each word.</returns> public override Dictionary <string, int> CalculateOccuranceInText() { if (IsFilterStopWords && stopwordDictionary == null) { stopwordDictionary = CoreUtil.ProcessStopWords(StopWords); } return(CoreUtil.ProcessInput(Input, stopwordDictionary)); }
public void ProcessStopWord_TextWithNonLetterOrWhiteSpace_ReturnEmptyDictionary(string input) { //arrange var expected = new Dictionary <string, int>(StringComparer.InvariantCultureIgnoreCase); //act var actual = CoreUtil.ProcessStopWords(input); //assert Assert.AreEqual(actual, expected); Assert.IsEmpty(actual); }
public void ProcessStopWord_WithEnglishText_ReturnDictionaryOfSixElements(string input) { //arrange var expected = new Dictionary <string, int>(StringComparer.InvariantCultureIgnoreCase) { { "don't", 1 }, { "or", 1 }, { "and", 1 }, { "are", 1 }, { "the", 1 }, { "I'm", 1 } }; //act var actual = CoreUtil.ProcessStopWords(input); //assert Assert.AreEqual(actual, expected); }