public WikiLink FindLink(string linkTitle) { WikiLink BestLink = null; // Check for amiguity var matchingLinkTexts = wikiLinks.Where(x => x.LinkText == linkTitle).ToList(); if (matchingLinkTexts.Count() == 1) { return(matchingLinkTexts[0]); } var matchingPageTitles = wikiLinks.Where(x => x.PageTitle == linkTitle).ToList(); if (matchingPageTitles.Count() == 1) { return(matchingPageTitles[0]); } // If No matches have been found, check if there are any close matches foreach (var link in wikiLinks) { // Attempt to identify typos. Set to a threshold of 10% error in the typing as defined by the Levenshtein // distance (number of corrections needed to get the strings to match) var maxLevenshteinDistance = (int)Math.Ceiling((decimal)linkTitle.Length * 0.10m); var similarLinkTitleMatch = string.IsNullOrEmpty(link.LinkText) == false? LevenshteinDistance(link.LinkText, linkTitle) <= maxLevenshteinDistance : false; var similarPageTitleMatch = string.IsNullOrEmpty(link.PageTitle) == false? LevenshteinDistance(link.PageTitle, linkTitle) <= maxLevenshteinDistance : false; // Don't immediately return a similar match, there might be an exact match somwhere else if (similarLinkTitleMatch || similarPageTitleMatch) { BestLink = link; } } return(BestLink); }
public async Task <List <WikiLink> > GeneratePageLinks(string url) { List <WikiLink> wikiLinks = new List <WikiLink>(); var response = await _httpClient.GetAsync(url); var pageContents = await response.Content.ReadAsStringAsync(); HtmlDocument pageDocument = new HtmlDocument(); pageDocument.LoadHtml(pageContents); var links = pageDocument.DocumentNode.SelectNodes("//body//a"); foreach (var link in links) { var newWikiLink = new WikiLink(); newWikiLink.LinkText = link.InnerText; newWikiLink.Url = link.GetAttributeValue("href", null); if (string.IsNullOrEmpty(newWikiLink.Url) == false) { switch (newWikiLink.Url[0]) { case '/': newWikiLink.Url = $"https://en.wikipedia.org{newWikiLink.Url}"; break; case '#': newWikiLink.Url = $"{url}{newWikiLink.Url}"; break; } } newWikiLink.PageTitle = link.GetAttributeValue("title", null); wikiLinks.Add(newWikiLink); } return(wikiLinks); }