Ejemplo n.º 1
0
        /// <summary>
        /// Scores a single HtmlNode as lunch menu content.
        /// </summary>
        /// <param name="node">Node to be scored.</param>
        public LunchMenuScorePoint ScoreNode(HtmlNode node)
        {
            var scorePoint = new LunchMenuScorePoint
            {
                DetectionLocation = LunchMenuDetectionLocation.Unknown,
                DetectionType = StringMatchType.NoMatch
            };

            if (node.InnerText == null)
            {
                return scorePoint;
            }

            var nodeText = Utils.HtmlDecode(node.InnerText.ToLower()).Trim();
            if (string.IsNullOrEmpty(nodeText))
            {
                return scorePoint;
            }

            // ----------------------------------------------------------------------------------------------

            // try exact match
            var exactMatch = _lunchMenuKeywords.FirstOrDefault(keyword => keyword.Word.Equals(nodeText));
            if (exactMatch != null)
            {
                scorePoint.DetectionLocation = LunchMenuDetectionLocation.Content;
                scorePoint.DetectionType = StringMatchType.Exact;
                scorePoint.DetectedText = node.InnerText;
                scorePoint.DetectedKeyword = exactMatch.Word;
                scorePoint.PointsGiven = exactMatch.Weight;
                UpdateLunchMenuKeyword(exactMatch);

                // let's also remove the keyword not to match it again
                _lunchMenuKeywords.ToList().RemoveAll(keyword => keyword.Word == exactMatch.Word);

                return scorePoint;
            }

            // try partial match
            var partialMatch = _lunchMenuKeywords.FirstOrDefault(keyword => nodeText.Contains(keyword.Word));
            if (partialMatch != null)
            {
                scorePoint.DetectionLocation = LunchMenuDetectionLocation.Content;
                scorePoint.DetectionType = StringMatchType.Partial;
                scorePoint.DetectedText = node.InnerText;
                scorePoint.DetectedKeyword = partialMatch.Word;
                scorePoint.PointsGiven = partialMatch.Weight;
                UpdateLunchMenuKeyword(partialMatch);

                // let's also remove the keyword not to match it again
                _lunchMenuKeywords.ToList().RemoveAll(keyword => keyword.Word == partialMatch.Word);

                return scorePoint;
            }

            return scorePoint;
        }
Ejemplo n.º 2
0
 private static bool IsValidDeepLink(string link, LunchMenuScorePoint scoreForLink, ICollection<string> checkedDeepLinks)
 {
     return scoreForLink != null &&
            scoreForLink.PointsGiven >= 5 &&
            !checkedDeepLinks.Contains(link) &&
            link.Length < 200;
 }
Ejemplo n.º 3
0
        /// <summary>
        /// Scores a single HtmlNode as a deep link.
        /// </summary>
        /// <param name="link">Node to be scored.</param>
        public LunchMenuScorePoint ScoreDeepLink(string link)
        {
            var scorePoint = new LunchMenuScorePoint
            {
                DetectionLocation = LunchMenuDetectionLocation.Unknown,
                DetectionType = StringMatchType.NoMatch
            };

            var linkText = Utils.HtmlDecode(link.ToLower()).Trim();
            if (string.IsNullOrEmpty(linkText))
            {
                return scorePoint;
            }

            // ----------------------------------------------------------------------------------------------

            // try exact match
            var exactMatch = _deepLinkKeywords.FirstOrDefault(keyword => keyword.Word.Equals(linkText));
            if (exactMatch != null)
            {
                scorePoint.DetectionLocation = LunchMenuDetectionLocation.UrlPath;
                scorePoint.DetectionType = StringMatchType.Exact;
                scorePoint.DeepLinkContentType = (DeepLinkContentType)Enum.ToObject(typeof(DeepLinkContentType), exactMatch.ContentType);
                scorePoint.DetectedText = link;
                scorePoint.DetectedKeyword = exactMatch.Word;
                scorePoint.PointsGiven = exactMatch.Weight;
                UpdateDeepLinkKeyword(exactMatch);

                // let's also remove the keyword not to match it again
                _deepLinkKeywords.ToList().RemoveAll(keyword => keyword.Word == exactMatch.Word);

                return scorePoint;
            }

            // try partial match
            var partialMatch = _deepLinkKeywords.FirstOrDefault(keyword => linkText.Contains(keyword.Word));
            if (partialMatch != null)
            {
                scorePoint.DetectionLocation = LunchMenuDetectionLocation.UrlPath;
                scorePoint.DetectionType = StringMatchType.Partial;
                scorePoint.DeepLinkContentType = (DeepLinkContentType)Enum.ToObject(typeof(DeepLinkContentType), partialMatch.ContentType);
                scorePoint.DetectedText = link;
                scorePoint.DetectedKeyword = partialMatch.Word;
                scorePoint.PointsGiven = partialMatch.Weight;
                UpdateDeepLinkKeyword(partialMatch);

                // let's also remove the keyword not to match it again
                _deepLinkKeywords.ToList().RemoveAll(keyword => keyword.Word == partialMatch.Word);

                return scorePoint;
            }

            return scorePoint;
        }