/// <summary> /// Scores a single HtmlNode as lunch menu content. /// </summary> /// <param name="node">Node to be scored.</param> public LunchMenuScorePoint ScoreNode(HtmlNode node) { var scorePoint = new LunchMenuScorePoint { DetectionLocation = LunchMenuDetectionLocation.Unknown, DetectionType = StringMatchType.NoMatch }; if (node.InnerText == null) { return scorePoint; } var nodeText = Utils.HtmlDecode(node.InnerText.ToLower()).Trim(); if (string.IsNullOrEmpty(nodeText)) { return scorePoint; } // ---------------------------------------------------------------------------------------------- // try exact match var exactMatch = _lunchMenuKeywords.FirstOrDefault(keyword => keyword.Word.Equals(nodeText)); if (exactMatch != null) { scorePoint.DetectionLocation = LunchMenuDetectionLocation.Content; scorePoint.DetectionType = StringMatchType.Exact; scorePoint.DetectedText = node.InnerText; scorePoint.DetectedKeyword = exactMatch.Word; scorePoint.PointsGiven = exactMatch.Weight; UpdateLunchMenuKeyword(exactMatch); // let's also remove the keyword not to match it again _lunchMenuKeywords.ToList().RemoveAll(keyword => keyword.Word == exactMatch.Word); return scorePoint; } // try partial match var partialMatch = _lunchMenuKeywords.FirstOrDefault(keyword => nodeText.Contains(keyword.Word)); if (partialMatch != null) { scorePoint.DetectionLocation = LunchMenuDetectionLocation.Content; scorePoint.DetectionType = StringMatchType.Partial; scorePoint.DetectedText = node.InnerText; scorePoint.DetectedKeyword = partialMatch.Word; scorePoint.PointsGiven = partialMatch.Weight; UpdateLunchMenuKeyword(partialMatch); // let's also remove the keyword not to match it again _lunchMenuKeywords.ToList().RemoveAll(keyword => keyword.Word == partialMatch.Word); return scorePoint; } return scorePoint; }
private static bool IsValidDeepLink(string link, LunchMenuScorePoint scoreForLink, ICollection<string> checkedDeepLinks) { return scoreForLink != null && scoreForLink.PointsGiven >= 5 && !checkedDeepLinks.Contains(link) && link.Length < 200; }
/// <summary> /// Scores a single HtmlNode as a deep link. /// </summary> /// <param name="link">Node to be scored.</param> public LunchMenuScorePoint ScoreDeepLink(string link) { var scorePoint = new LunchMenuScorePoint { DetectionLocation = LunchMenuDetectionLocation.Unknown, DetectionType = StringMatchType.NoMatch }; var linkText = Utils.HtmlDecode(link.ToLower()).Trim(); if (string.IsNullOrEmpty(linkText)) { return scorePoint; } // ---------------------------------------------------------------------------------------------- // try exact match var exactMatch = _deepLinkKeywords.FirstOrDefault(keyword => keyword.Word.Equals(linkText)); if (exactMatch != null) { scorePoint.DetectionLocation = LunchMenuDetectionLocation.UrlPath; scorePoint.DetectionType = StringMatchType.Exact; scorePoint.DeepLinkContentType = (DeepLinkContentType)Enum.ToObject(typeof(DeepLinkContentType), exactMatch.ContentType); scorePoint.DetectedText = link; scorePoint.DetectedKeyword = exactMatch.Word; scorePoint.PointsGiven = exactMatch.Weight; UpdateDeepLinkKeyword(exactMatch); // let's also remove the keyword not to match it again _deepLinkKeywords.ToList().RemoveAll(keyword => keyword.Word == exactMatch.Word); return scorePoint; } // try partial match var partialMatch = _deepLinkKeywords.FirstOrDefault(keyword => linkText.Contains(keyword.Word)); if (partialMatch != null) { scorePoint.DetectionLocation = LunchMenuDetectionLocation.UrlPath; scorePoint.DetectionType = StringMatchType.Partial; scorePoint.DeepLinkContentType = (DeepLinkContentType)Enum.ToObject(typeof(DeepLinkContentType), partialMatch.ContentType); scorePoint.DetectedText = link; scorePoint.DetectedKeyword = partialMatch.Word; scorePoint.PointsGiven = partialMatch.Weight; UpdateDeepLinkKeyword(partialMatch); // let's also remove the keyword not to match it again _deepLinkKeywords.ToList().RemoveAll(keyword => keyword.Word == partialMatch.Word); return scorePoint; } return scorePoint; }