/// <summary> /// Calculates the name of the most probably country when compared to the words passed within the message body (using Levenshtein algorithm). /// </summary> /// <param name="words">The words contained within the message.</param> /// <param name="countries">The country names.</param> /// <returns>Most probably country name.</returns> /// <exception cref="ArgumentNullException"> /// words /// or /// countries /// </exception> private string CalculateBestCountryName(IEnumerable <string> words, IEnumerable <string> countries) { if (words == null) { throw new ArgumentNullException(nameof(words)); } if (countries == null) { throw new ArgumentNullException(nameof(countries)); } // TODO: I am 100% sure there should be a more elegant way to handle this... var bestCountryName = ""; var lowestDistance = int.MaxValue; foreach (var word in words) { foreach (var countryCandidate in countries) { var distance = LevenshteinDistance.Calculate(word, countryCandidate); if (distance < lowestDistance) { lowestDistance = distance; bestCountryName = countryCandidate; } } } return(bestCountryName); }
/// <inheritdoc cref="IEditor.Edit"/> public string Edit(string word, string[] dictionaryWords) { var singlePrescriptionList = new List <string>(); var doublePrescriptionList = new List <string>(); foreach (var dictionaryWord in dictionaryWords) { var prescription = LevenshteinDistance.Calculate(word, dictionaryWord); if (prescription.Distance == 1) { singlePrescriptionList.Add(dictionaryWord); } else if (prescription.Distance == 2 & !prescription.Route.Contains("DD") && !prescription.Route.Contains("II")) { doublePrescriptionList.Add(dictionaryWord); } } var resultCollection = singlePrescriptionList.Count > 0 ? singlePrescriptionList : doublePrescriptionList; if (resultCollection.Count > 0) { if (resultCollection.Count == 1) { return(resultCollection.First()); } else { return($"{{{String.Join(' ', resultCollection)}}}"); } } return($"{{{word}?}}"); }
private township GetTownship(string city) { string cityNameAlternative = city; if (mapCityToAlternativeName.ContainsKey(city.ToLower())) { cityNameAlternative = mapCityToAlternativeName[city.ToLower()]; } postalcode postalcode = this.database.postalcodes.FirstOrDefault(c => c.city.StartsWith(city) || c.city.StartsWith(cityNameAlternative)); if (postalcode == null) { Logger.Instance.LogError(this.GetFilePath(), string.Format("Could not find township for city {0}", city)); throw new Exception("Township not found."); } else { if (!city.ToLower().Equals(postalcode.city.ToLower()) && LevenshteinDistance.Calculate(city.ToLower(), postalcode.city.ToLower()) > 2) { Logger.Instance.LogCorrection(this.GetFilePath(), city, postalcode.city); } return(postalcode.township); } }
private T getElementClosestToName(string name, List <T> elements) { T closestElement = default(T); int lowestDistance = 100; int resultsFound = 0; foreach (var element in elements) { var nameFinal = element.ComparableString.ToLower(); var distance = LevenshteinDistance.Calculate(name, nameFinal); if (distance < lowestDistance) { lowestDistance = distance; closestElement = element; if (distance <= _settings.AcceptedNumberOfErrors) { resultsFound++; } } } if (lowestDistance <= _settings.AcceptedNumberOfErrors && (resultsFound == 1 || _settings.AcceptMultipleResults)) { return(closestElement); } return(default(T)); }
public void StringComparisonTest() { string s1 = "собствен-ности"; string s2 = "собственности"; int result = LevenshteinDistance.Calculate(s1, s2); Assert.AreEqual(1, result); }
public async Task <IEnumerable <RemoteSearchResult> > GetSearchResults(PersonLookupInfo searchInfo, CancellationToken cancellationToken) { var result = new List <RemoteSearchResult>(); if (searchInfo == null) { return(result); } var providerList = new List <string> { "Freeones" }; foreach (var siteName in providerList) { var title = $"{siteName} {searchInfo.Name}"; var site = Helper.GetSiteFromTitle(title); string actorName = Helper.GetClearTitle(title, site.siteName); Logger.Info($"site: {site.siteNum[0]}:{site.siteNum[1]} ({site.siteName})"); Logger.Info($"actorName: {actorName}"); var provider = Helper.GetActorProviderBySiteID(site.siteNum[0]); if (provider != null) { Logger.Info($"provider: {provider}"); try { result = await provider.Search(site.siteNum, actorName, cancellationToken).ConfigureAwait(false); } catch (Exception e) { Logger.Error($"Actor Search error: \"{e}\""); await Analitycs.Send(title, site.siteNum, site.siteName, actorName, null, provider.ToString(), e, cancellationToken).ConfigureAwait(false); } if (result.Any()) { foreach (var scene in result) { scene.ProviderIds[this.Name] = $"{site.siteNum[0]}#{site.siteNum[1]}#" + scene.ProviderIds[this.Name]; scene.Name = scene.Name.Trim(); } result = result.OrderByDescending(o => 100 - LevenshteinDistance.Calculate(searchInfo.Name, o.Name, StringComparison.OrdinalIgnoreCase)).ToList(); } break; } } return(result); }
public async Task <List <RemoteSearchResult> > Search(int[] siteNum, string searchTitle, DateTime?searchDate, CancellationToken cancellationToken) { var result = new List <RemoteSearchResult>(); if (siteNum == null || string.IsNullOrEmpty(searchTitle)) { return(result); } string searchJAVID = null; var splitedTitle = searchTitle.Split(); if (splitedTitle.Length > 1 && int.TryParse(splitedTitle[1], out _)) { searchJAVID = $"{splitedTitle[0]}-{splitedTitle[1]}"; searchTitle = searchJAVID; } var url = Helper.GetSearchSearchURL(siteNum) + searchTitle.Replace("-", " ", 1, StringComparison.OrdinalIgnoreCase); var data = await HTML.ElementFromURL(url, cancellationToken).ConfigureAwait(false); var searchResults = data.SelectNodes("//li[contains(@class, 'item-list')]"); if (searchResults != null) { foreach (var searchResult in searchResults) { string sceneURL = searchResult.SelectSingleNode(".//a").Attributes["href"].Value, curID, sceneName = Decensor(searchResult.SelectSingleNode(".//dt").InnerText), scenePoster = searchResult.SelectSingleNode(".//img").Attributes["data-original"].Value, javID = searchResult.SelectSingleNode(".//img").Attributes["alt"].Value; sceneURL = sceneURL.Replace("/" + sceneURL.Split('/').Last(), string.Empty, StringComparison.OrdinalIgnoreCase); curID = $"{siteNum[0]}#{siteNum[1]}#{Helper.Encode(sceneURL)}"; var res = new RemoteSearchResult { ProviderIds = { { Plugin.Instance.Name, curID } }, Name = $"{javID} {sceneName}", ImageUrl = scenePoster, }; if (!string.IsNullOrEmpty(searchJAVID)) { res.IndexNumber = 100 - LevenshteinDistance.Calculate(searchJAVID, javID, StringComparison.OrdinalIgnoreCase); } result.Add(res); } } return(result); }
public async Task <List <RemoteSearchResult> > Search(int[] siteNum, string searchTitle, DateTime?searchDate, CancellationToken cancellationToken) { var result = new List <RemoteSearchResult>(); if (siteNum == null || string.IsNullOrEmpty(searchTitle)) { return(result); } var url = Helper.GetSearchSearchURL(siteNum) + searchTitle; var data = await HTML.ElementFromURL(url, cancellationToken).ConfigureAwait(false); var searchResults = data.SelectNodes("//a[contains(@class, 'thumbnail')]"); if (searchResults != null) { foreach (var searchResult in searchResults) { string sceneURL = Helper.GetSearchBaseURL(siteNum) + searchResult.Attributes["href"].Value.Split('?')[0], curID = $"{siteNum[0]}#{siteNum[1]}#{Helper.Encode(sceneURL)}", sceneName = searchResult.SelectSingleNode(".//div/h3[@class='scene-title']").InnerText, posterURL = $"https:{searchResult.SelectSingleNode(".//img").Attributes["src"].Value}", subSite = searchResult.SelectSingleNode(".//div/p[@class='help-block']").InnerText.Replace(".com", string.Empty, StringComparison.OrdinalIgnoreCase); var res = new RemoteSearchResult { Name = $"{sceneName} from {subSite}", ImageUrl = posterURL, }; if (searchDate.HasValue) { curID += $"#{searchDate.Value.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture)}"; } res.ProviderIds.Add(Plugin.Instance.Name, curID); if (subSite == Helper.GetSearchSiteName(siteNum)) { res.IndexNumber = 100 - LevenshteinDistance.Calculate(searchTitle, sceneName, StringComparison.OrdinalIgnoreCase); } else { res.IndexNumber = 60 - LevenshteinDistance.Calculate(searchTitle, sceneName, StringComparison.OrdinalIgnoreCase); } result.Add(res); } } return(result); }
private void textBoxTranscriptions_TextChanged(object sender, EventArgs e) { if (!String.IsNullOrEmpty(this._recognizedText)) { int d = LevenshteinDistance.Calculate(this._recognizedText, this.textBoxTranscriptions.Text); double cer = ((double)d / (double)this._recognizedText.Length) * 100; this.labelCharacterErrorRate.Text = cer.ToString("N6") + "%"; } else { this.labelCharacterErrorRate.Text = String.Empty; } }
/// <summary> /// calculates a value representing how similar are to list of words by summing up the similarity of all pairs of /// words in both lists. It calculates similarity by normalizing the Levenshtein Distance of two words and then /// inverting this number (1 - distance). /// </summary> /// <param name="list1"></param> /// <param name="list2"></param> /// <returns></returns> private double Closeness(IEnumerable <string> list1, IEnumerable <string> list2) { double closeness = 0; foreach (var item1 in list1) { foreach (var item2 in list2) { var distance = LevenshteinDistance.Calculate(item1, item2); var distanceMax = Math.Max(item1.Length, item2.Length); var similarity = 1 - ((double)distance) / ((double)distanceMax); closeness += similarity; } } return(closeness); }
private string closestPossibleString(string input) { int lowestValue = 99999999; IReadOnlyCollection <string> stations = new List <string>() { "test" }; string closestValue = stations.First(); foreach (string possibleString in stations) { int value = LevenshteinDistance.Calculate(input, possibleString); if (value < lowestValue) { lowestValue = value; closestValue = possibleString; } } return(closestValue); }
public ActionResult Index22() { AskCaro_QuestionnaireAspirateur.StackOverflowAspirateur main = new AskCaro_QuestionnaireAspirateur.StackOverflowAspirateur(); var target = "Is there a best practice and recommended alternative to Session variables in MVC"; var text = AskCaro_Web_MVC.Tools.WordsAnalyzer.StringWords2Remove(target); var targetHS = text.Split(' ').ToHashSet(); var closeNeighbors = from h in _dbContext.Questions.AsEnumerable() // bring into memory // query continued below as linq-to-objects let score = (0.15 * LevenshteinDistance.Calculate(text, h.Tag) + 0.35 * LevenshteinDistance.Calculate(text.ToLower(), h.Tag.ToLower())) / Math.Max(text.Length, h.Tag.Length) //let lD = Tools.Class1.LevenshteinDistance(target, h.Tag) //let length = Math.Max(h.Tag.Length, target.Length) //let score = 1.0 - (double)lD / length where score > 0.25 select new { h, score }; var listttt = closeNeighbors.OrderByDescending(x => x.score).Take(10).ToList(); return(View()); }
private void AnalizeOcrResults(string engineName, string imageName, List <string> ocrResults) { StringBuilder results = new StringBuilder(); string originalImageResult = ocrResults[0]; for (int i = 1; i < ocrResults.Count; i++) { DistortedImage currentImage = distortedImages[imageName][i - 1]; results.Append(imageName); results.Append(" | "); results.Append(currentImage.distortion); results.Append(" | "); results.Append(currentImage.value); results.Append(" | "); results.Append(LevenshteinDistance.Calculate(originalImageResult, ocrResults[i]).ToString()); results.Append("/"); results.Append((originalImageResult.Length).ToString()); ; results.Append(Environment.NewLine); } File.WriteAllText(Path.Combine(Path.GetTempPath(), imageName + "_" + engineName + ".txt"), results.ToString()); }
public async Task <IEnumerable <RemoteSearchResult> > GetSearchResults(MovieInfo searchInfo, CancellationToken cancellationToken) { var result = new List <RemoteSearchResult>(); if (searchInfo == null || string.IsNullOrEmpty(searchInfo.Name)) { return(result); } Logger.Info($"searchInfo.Name: {searchInfo.Name}"); var title = Helper.ReplaceAbbrieviation(searchInfo.Name); var site = Helper.GetSiteFromTitle(title); if (site.siteNum == null) { string newTitle; if (!string.IsNullOrEmpty(Plugin.Instance.Configuration.DefaultSiteName)) { newTitle = $"{Plugin.Instance.Configuration.DefaultSiteName} {searchInfo.Name}"; } else { newTitle = Helper.GetSiteNameFromTitle(searchInfo.Name); } if (!string.IsNullOrEmpty(newTitle) && !newTitle.Equals(searchInfo.Name, StringComparison.OrdinalIgnoreCase)) { Logger.Info($"newTitle: {newTitle}"); title = Helper.ReplaceAbbrieviation(newTitle); site = Helper.GetSiteFromTitle(title); } if (site.siteNum == null) { return(result); } } string searchTitle = Helper.GetClearTitle(title, site.siteName), searchDate = string.Empty; DateTime?searchDateObj; var titleAfterDate = Helper.GetDateFromTitle(searchTitle); searchTitle = titleAfterDate.searchTitle; searchDateObj = titleAfterDate.searchDateObj; if (searchDateObj.HasValue) { searchDate = searchDateObj.Value.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture); } else { if (searchInfo.PremiereDate.HasValue) { #if __EMBY__ searchDateObj = searchInfo.PremiereDate.Value.DateTime; #else searchDateObj = searchInfo.PremiereDate.Value; #endif searchDate = searchInfo.PremiereDate.Value.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture); } } if (string.IsNullOrEmpty(searchTitle)) { return(result); } Logger.Info($"site: {site.siteNum[0]}:{site.siteNum[1]} ({site.siteName})"); Logger.Info($"searchTitle: {searchTitle}"); Logger.Info($"searchDate: {searchDate}"); var provider = Helper.GetProviderBySiteID(site.siteNum[0]); if (provider != null) { Logger.Info($"provider: {provider}"); try { result = await provider.Search(site.siteNum, searchTitle, searchDateObj, cancellationToken).ConfigureAwait(false); } catch (Exception e) { Logger.Error($"Search error: \"{e}\""); await Analitycs.Send(searchInfo.Name, site.siteNum, site.siteName, searchTitle, searchDateObj, provider.ToString(), e, cancellationToken).ConfigureAwait(false); } if (result.Any()) { foreach (var scene in result) { scene.ProviderIds[this.Name] = $"{site.siteNum[0]}#{site.siteNum[1]}#" + scene.ProviderIds[this.Name]; scene.Name = scene.Name.Trim(); if (scene.PremiereDate.HasValue) { scene.ProductionYear = scene.PremiereDate.Value.Year; } } if (result.Any(scene => scene.IndexNumber.HasValue)) { result = result.OrderByDescending(o => o.IndexNumber.HasValue).ThenByDescending(o => o.IndexNumber).ToList(); } else if (!string.IsNullOrEmpty(searchDate) && result.All(o => o.PremiereDate.HasValue) && result.Any(o => o.PremiereDate.Value != searchDateObj)) { result = result.OrderBy(o => Math.Abs((searchDateObj - o.PremiereDate).Value.TotalDays)).ToList(); } else { result = result.OrderByDescending(o => 100 - LevenshteinDistance.Calculate(searchTitle, o.Name, StringComparison.OrdinalIgnoreCase)).ToList(); } } } return(result); }
public async Task <List <RemoteSearchResult> > Search(int[] siteNum, string searchTitle, DateTime?searchDate, CancellationToken cancellationToken) { var result = new List <RemoteSearchResult>(); if (siteNum == null || string.IsNullOrEmpty(searchTitle)) { return(result); } string searchJAVID = null; var splitedTitle = searchTitle.Split(); if (splitedTitle.Length > 1 && int.TryParse(splitedTitle[1], out _)) { searchJAVID = $"{splitedTitle[0]}-{splitedTitle[1]}"; } if (!string.IsNullOrEmpty(searchJAVID)) { searchTitle = searchJAVID; } foreach (var site in Database.SiteList.Sites[siteNum[0]]) { siteNum[1] = site.Key; var url = Helper.GetSearchSearchURL(siteNum) + searchTitle; var data = await HTML.ElementFromURL(url, cancellationToken).ConfigureAwait(false); var searchResults = data.SelectNodesSafe("//div[@class='videos']//div[@class='video']"); if (searchResults.Any()) { foreach (var searchResult in searchResults) { var sceneURL = new Uri(Helper.GetSearchBaseURL(siteNum) + $"/en/?v={searchResult.SelectSingleText(".//a/@id")}"); string curID = Helper.Encode(sceneURL.PathAndQuery), sceneName = searchResult.SelectSingleText(".//div[@class='title']"), scenePoster = $"http:{searchResult.SelectSingleText(".//img/@src").Replace("ps.", "pl.", StringComparison.OrdinalIgnoreCase)}", javID = searchResult.SelectSingleText(".//div[@class='id']"); var res = new RemoteSearchResult { ProviderIds = { { Plugin.Instance.Name, curID } }, Name = $"{javID} {sceneName}", ImageUrl = scenePoster, }; if (!string.IsNullOrEmpty(searchJAVID)) { res.IndexNumber = 100 - LevenshteinDistance.Calculate(searchJAVID, javID, StringComparison.OrdinalIgnoreCase); } result.Add(res); } } else { var sceneURL = new Uri(Helper.GetSearchBaseURL(siteNum) + data.SelectSingleText("//div[@id='video_title']//a/@href")); var sceneID = new string[] { Helper.Encode(sceneURL.PathAndQuery) }; var searchResult = await Helper.GetSearchResultsFromUpdate(this, siteNum, sceneID, searchDate, cancellationToken).ConfigureAwait(false); if (searchResult.Any()) { result.AddRange(searchResult); } } if (result.Any()) { break; } } return(result); }
public void CalculationEqualsExpectedResult(double expectedResult, string firstLiteral, string secondLiteral) { var result = LevenshteinDistance.Calculate(firstLiteral, secondLiteral); Assert.Equal(expectedResult, result); }
public static IOrderedEnumerable <TSource> OrderBySimilarity <TSource>(this IEnumerable <TSource> sequence, Func <TSource, string> selector, string pattern) { return(sequence.OrderBy(item => LevenshteinDistance.Calculate(selector(item), pattern))); }
public T GetSpecificWithName(string name, List <T> elements) { name = name.ToLower(); // Match 1:1 var elementByFullNameExactMatch = elements.FirstOrDefault(t => t.ComparableString.ToLower().Trim() == name.Trim()); if (elementByFullNameExactMatch != null) { return(elementByFullNameExactMatch); } // Find closest match using Levenshtein Distance algorithm based on full name var closestElementOnFullName = getElementClosestToName(name, elements); if (closestElementOnFullName != null) { return(closestElementOnFullName); } string[] possibleNamesToLookFor = StringHelpers.GetAllArrayCombinations_Recursive(name.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries)); // Look for any exact matches with any combination of names var possibleMatches = 0; T matchingElement = null; foreach (var possibleName in possibleNamesToLookFor) { // Skip if only one name if (possibleName.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Length <= 1) { continue; } foreach (var element in elements) { // Skip if only one name if (element.ComparableString.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Length <= 1) { continue; } var elementName = element.ComparableString.ToLower(); string[] possibleElementNames = StringHelpers.GetAllArrayCombinations_Recursive(elementName.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries)); foreach (var matchingName in possibleElementNames) { if (matchingName.Equals(possibleName)) { possibleMatches++; matchingElement = element; break; } } } } if (matchingElement != null && possibleMatches == 1) { return(matchingElement); } // Look for any matches with any combination of names, with up to X errors possibleMatches = 0; matchingElement = null; foreach (var possibleName in possibleNamesToLookFor) { // Skip if only one name if (possibleName.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Length <= 1) { continue; } foreach (var element in elements) { // Skip if only one name if (element.ComparableString.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Length <= 1) { continue; } var elementName = element.ComparableString.ToLower(); string[] possibleElementNames = StringHelpers.GetAllArrayCombinations_Recursive(elementName.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries)); foreach (var matchingName in possibleElementNames) { var distance = LevenshteinDistance.Calculate(possibleName, matchingName); if (distance <= _settings.AcceptedNumberOfErrors) { if (matchingElement != null && matchingElement == element) { continue; } matchingElement = element; possibleMatches++; } } } } if (matchingElement != null && (possibleMatches == 1 || _settings.AcceptMultipleResults)) { return(matchingElement); } return(null); }
public void TestCalculate(string a, string b, int distance) { var result = LevenshteinDistance.Calculate(a, b); Assert.AreEqual(distance, result); }
public void ShouldReturnLevenshteinDistanceOf2() { var result = LevenshteinDistance.Calculate(InputRam, InputArm); Assert.AreEqual(2, result); }