Beispiel #1
0
        public WikiLink FindLink(string linkTitle)
        {
            WikiLink BestLink = null;

            // Check for amiguity
            var matchingLinkTexts = wikiLinks.Where(x => x.LinkText == linkTitle).ToList();

            if (matchingLinkTexts.Count() == 1)
            {
                return(matchingLinkTexts[0]);
            }

            var matchingPageTitles = wikiLinks.Where(x => x.PageTitle == linkTitle).ToList();

            if (matchingPageTitles.Count() == 1)
            {
                return(matchingPageTitles[0]);
            }

            // If No matches have been found, check if there are any close matches
            foreach (var link in wikiLinks)
            {
                // Attempt to identify typos. Set to a threshold of 10% error in the typing as defined by the Levenshtein
                // distance (number of corrections needed to get the strings to match)
                var maxLevenshteinDistance = (int)Math.Ceiling((decimal)linkTitle.Length * 0.10m);
                var similarLinkTitleMatch  = string.IsNullOrEmpty(link.LinkText) == false?
                                             LevenshteinDistance(link.LinkText, linkTitle) <= maxLevenshteinDistance : false;

                var similarPageTitleMatch = string.IsNullOrEmpty(link.PageTitle) == false?
                                            LevenshteinDistance(link.PageTitle, linkTitle) <= maxLevenshteinDistance : false;

                // Don't immediately return a similar match, there might be an exact match somwhere else
                if (similarLinkTitleMatch || similarPageTitleMatch)
                {
                    BestLink = link;
                }
            }

            return(BestLink);
        }
Beispiel #2
0
        public async Task <List <WikiLink> > GeneratePageLinks(string url)
        {
            List <WikiLink> wikiLinks = new List <WikiLink>();

            var response = await _httpClient.GetAsync(url);

            var pageContents = await response.Content.ReadAsStringAsync();

            HtmlDocument pageDocument = new HtmlDocument();

            pageDocument.LoadHtml(pageContents);

            var links = pageDocument.DocumentNode.SelectNodes("//body//a");

            foreach (var link in links)
            {
                var newWikiLink = new WikiLink();
                newWikiLink.LinkText = link.InnerText;
                newWikiLink.Url      = link.GetAttributeValue("href", null);
                if (string.IsNullOrEmpty(newWikiLink.Url) == false)
                {
                    switch (newWikiLink.Url[0])
                    {
                    case '/':
                        newWikiLink.Url = $"https://en.wikipedia.org{newWikiLink.Url}";
                        break;

                    case '#':
                        newWikiLink.Url = $"{url}{newWikiLink.Url}";
                        break;
                    }
                }
                newWikiLink.PageTitle = link.GetAttributeValue("title", null);
                wikiLinks.Add(newWikiLink);
            }
            return(wikiLinks);
        }