public static List <Grade> ParseRouteGrades(IHtmlDocument doc) { List <Grade> grades = new List <Grade>(); IElement gradesSection = doc.GetElementsByTagName("h2").FirstOrDefault(p => p.Attributes["class"] != null && p.Attributes["class"].Value == "inline-block mr-2"); foreach (IElement spanElement in gradesSection.GetElementsByTagName("span")) { if (spanElement.Attributes["class"] == null || string.IsNullOrEmpty(spanElement.GetElementsByTagName("a").FirstOrDefault()?.TextContent)) { continue; } string gradeValue = HttpUtility.HtmlDecode(spanElement.TextContent.Replace(spanElement.GetElementsByTagName("a").FirstOrDefault().TextContent, "")).Trim(); switch (spanElement.Attributes["class"].Value) { case "rateYDS": case "rateHueco": List <Grade> parsedGrades = Grade.ParseString(gradeValue); if (parsedGrades.Count > 0) { grades.AddRange(parsedGrades); } else { //I think there's an issue with the MountainProject website where Hueco grades are listed as YDS (eg /route/111259770/three-pipe-problem). //I've reported this to them (I think) but for now I'm "coding around it". if (gradeValue.Contains("V")) { grades.Add(new Grade(GradeSystem.Hueco, gradeValue, false)); } else { grades.Add(new Grade(GradeSystem.YDS, gradeValue, false)); } } break; case "rateFrench": grades.Add(new Grade(GradeSystem.French, gradeValue, false)); break; case "rateEwbanks": grades.Add(new Grade(GradeSystem.Ewbanks, gradeValue, false)); break; case "rateUIAA": grades.Add(new Grade(GradeSystem.UIAA, gradeValue, false)); break; case "rateZA": grades.Add(new Grade(GradeSystem.SouthAfrica, gradeValue, false)); break; case "rateBritish": grades.Add(new Grade(GradeSystem.Britsh, gradeValue, false)); break; case "rateFont": grades.Add(new Grade(GradeSystem.Fontainebleau, gradeValue, false)); break; } } string gradeInnerText = Regex.Replace(gradesSection.InnerHtml, "<.*>", "", RegexOptions.Singleline).Trim(); if (!string.IsNullOrWhiteSpace(gradeInnerText)) { if (gradeInnerText.StartsWith("AI") || gradeInnerText.StartsWith("WI") || gradeInnerText.StartsWith("M")) { grades.Add(new Grade(GradeSystem.Ice, HttpUtility.HtmlDecode(gradeInnerText))); } else if (gradeInnerText.StartsWith("A") || gradeInnerText.StartsWith("C")) { grades.Add(new Grade(GradeSystem.Aid, HttpUtility.HtmlDecode(gradeInnerText))); } else { grades.Add(new Grade(GradeSystem.Unlabled, HttpUtility.HtmlDecode(gradeInnerText))); } } return(grades); }
public static SearchResult ParseRouteFromString(string inputString) { Stopwatch stopwatch = Stopwatch.StartNew(); SearchResult finalResult = new SearchResult(); //Todo: in the future support returning multiple routes (but only if there are multiple grades in the title? Maybe only if all of the routes' full names are in the title?) List <PossibleRouteResult> possibleResults = new List <PossibleRouteResult>(); List <Grade> postGrades = Grade.ParseString(inputString); WriteToConsole($"\tRecognized grade(s): {string.Join(" | ", postGrades)}"); List <string> possibleRouteNames = GetPossibleRouteNames(inputString); WriteToConsole($"\tRecognized name(s): {string.Join(" | ", possibleRouteNames)}"); foreach (string possibleRouteName in possibleRouteNames) { string inputWithoutName = inputString.Replace(possibleRouteName, ""); SearchResult searchResult = Search(possibleRouteName, new SearchParameters() { OnlyRoutes = true }); if (!searchResult.IsEmpty() && searchResult.AllResults.Count < 75) //If the number of matching results is greater than 75, it was probably a very generic word for a search (eg "There") { List <PossibleRouteResult> allSearchResults = searchResult.AllResults.Select(r => new PossibleRouteResult { Route = r as Route, Area = searchResult.RelatedLocation, RemainingInputString = inputWithoutName, }).ToList(); PossibleRouteResult filteredSearchResult = new PossibleRouteResult { Route = searchResult.FilteredResult as Route, Area = searchResult.RelatedLocation, RemainingInputString = inputWithoutName, }; if (allSearchResults.Count == 1 && ParentsInString(allSearchResults.First(), false, true).Any()) { possibleResults.Add(allSearchResults.First()); } else if (allSearchResults.Count(r => ParentsInString(r, false, true).Any() && Utilities.StringContainsWithFilters(inputString, r.Route.Name, true)) == 1) { PossibleRouteResult possibleResult = allSearchResults.First(r => ParentsInString(r, false, true).Any() && Utilities.StringContainsWithFilters(inputString, r.Route.Name, true)); possibleResults.Add(possibleResult); } else if (postGrades.Any()) { if (allSearchResults.Any(r => ParentsInString(r).Any())) //If some routes have a location in the inputString, work with those { foreach (PossibleRouteResult possibleResult in allSearchResults.Where(r => ParentsInString(r).Any())) { if (possibleResult.Route.Grades.Any(g => postGrades.Any(p => g.Equals(p, true, true)))) { possibleResults.Add(possibleResult); } } } else { foreach (PossibleRouteResult possibleResult in allSearchResults) { if (possibleResult.Route.Grades.Any(g => postGrades.Any(p => g.Equals(p, true, true)))) { possibleResults.Add(possibleResult); } } } } else if (allSearchResults.Any(r => ParentsInString(r, false, true).Any() && Utilities.StringContainsWithFilters(inputString, r.Route.Name, true))) { possibleResults = allSearchResults.Where(r => ParentsInString(r, false, true).Any() && Utilities.StringContainsWithFilters(inputString, r.Route.Name, true)).ToList(); } } } possibleResults = possibleResults.GroupBy(x => x.Route).Select(g => g.First()).ToList(); //Distinct routes if (possibleResults.Any()) { //Todo: prioritize routes where the grade matches exactly (eg 5.11a matches 5.11a rather than matching 5.11a-b). Also prioritize 5.11a towards 5.11a-b (or vice-versa) rather than 5.11c //Todo: for matching parents in string, maybe give higher priority to results that match MORE parents. EG "Once Upon a Time - Black Mountain, California" gives // 2 routes named "Once upon a time" in California. But only one is also at "Black Mountain" //Todo: rather than trying to match the whole route name in filteredResults, we should just prioritize matching the largest string of words in a row (in the case of "Birthing Experience // V1 Bishop, CA" we should prioritize "The Womb (Birthing Experience)" over routes just named "Bishop") //Prioritize routes where the full name is in the input string //(Additionally, we could also prioritize how close - within the input string - the name is to the grade) List <PossibleRouteResult> filteredResults = possibleResults.Where(p => Utilities.StringContainsWithFilters(inputString, p.Route.Name, true)).ToList(); if (filteredResults.Count > 1) { //Try to filter down to "most parents matched" //Todo: possibly a more efficient way to do this int maxParentsMatched = filteredResults.Max(r => r.FoundParents.Select(p => p.Value.Count).Max()); filteredResults = filteredResults.Where(r => r.FoundParents.Select(p => p.Value.Count).Max() == maxParentsMatched).ToList(); } int highConfidence = 1; int medConfidence = 2; int lowConfidence = 3; int confidence = lowConfidence; string unconfidentReason = null; if (filteredResults.Count == 1) { if (ParentsInString(filteredResults.First(), true).Any() || Grade.ParseString(inputString, false).Any(g => filteredResults.First().Route.Grades.Any(p => g.Equals(p)))) { confidence = highConfidence; //Highest confidence when we also match a location in the string or if we match a full grade } else { confidence = medConfidence; //Medium confidence when we have only found one match with that exact name but can't match a location in the string unconfidentReason = "Single result found, but no parents or grades matched"; } } else if (filteredResults.Count > 1) { //Prioritize routes where one of the parents (locations) is also in the input string List <PossibleRouteResult> routesWithMatchingLocations = filteredResults.Where(r => ParentsInString(r).Any()).ToList(); if (routesWithMatchingLocations.Any()) { filteredResults = routesWithMatchingLocations; if (postGrades.Any()) { confidence = highConfidence; //Highest confidence when we have found the location in the string } else { confidence = medConfidence; unconfidentReason = $"{filteredResults.Count} EXACTLY matching routes (name & location w/o abbrev). No grades matched"; } } else { routesWithMatchingLocations = filteredResults.Where(r => ParentsInString(r, true).Any()).ToList(); if (routesWithMatchingLocations.Any()) { filteredResults = routesWithMatchingLocations; if (postGrades.Any()) { confidence = highConfidence; //Highest confidence when we have found the location in the string } else { confidence = medConfidence; unconfidentReason = $"{filteredResults.Count} EXACTLY matching routes (name & location w/ abbrev). No grades matched"; } } else { routesWithMatchingLocations = filteredResults.Where(r => ParentsInString(r, true, true).Any()).ToList(); if (routesWithMatchingLocations.Any()) { filteredResults = routesWithMatchingLocations; confidence = medConfidence; //Medium confidence when we have matched only part of a parent's name unconfidentReason = $"{filteredResults.Count} EXACTLY matching routes (name & PARTIAL location w/ abbrev)"; } } } } else { //Prioritize routes where one of the parents (locations) is also in the input string List <PossibleRouteResult> routesWithMatchingLocations = possibleResults.Where(r => ParentsInString(r).Any()).ToList(); if (routesWithMatchingLocations.Any()) { filteredResults = routesWithMatchingLocations; confidence = medConfidence; //Medium confidence when we didn't match a full route name, but have found a parent location in the string unconfidentReason = $"{filteredResults.Count} PARTIALLY matching routes (name & location w/o abbrev)"; } else { routesWithMatchingLocations = possibleResults.Where(r => ParentsInString(r, true).Any()).ToList(); if (routesWithMatchingLocations.Any()) { filteredResults = routesWithMatchingLocations; confidence = medConfidence; //Medium confidence when we didn't match a full route name, but have found a parent location in the string (including the possibility of the state abbrv) unconfidentReason = $"{filteredResults.Count} PARTIALLY matching routes (name & location w/ abbrev)"; } else { routesWithMatchingLocations = possibleResults.Where(r => ParentsInString(r, true, true).Any()).ToList(); if (routesWithMatchingLocations.Any()) { filteredResults = routesWithMatchingLocations; confidence = medConfidence; //Medium confidence when we didn't match a full route name and have matched only part of a parent's name in the string unconfidentReason = $"{filteredResults.Count} PARTIALLY matching routes (name & PARTIAL location w/ abbrev)"; } } } } //Todo: temporary fix for posts about "covid-19" where the route is called "COVID-19" AND the parent is "Covid 19 Boulder" if (filteredResults.Count == 1 && filteredResults[0].Route.ID == "119484798" && filteredResults[0].FoundParents.All(kvp => kvp.Value.Count == 1)) { return(new SearchResult() { TimeTakenMS = stopwatch.ElapsedMilliseconds }); } PossibleRouteResult chosenRoute; Area location; List <MPObject> allResults = new List <MPObject>(); if (filteredResults.Count == 1) { chosenRoute = filteredResults.First(); allResults.Add(chosenRoute.Route); } else if (filteredResults.Count > 1) { chosenRoute = filteredResults.OrderByDescending(p => p.Route.Popularity).First(); allResults.AddRange(filteredResults.Select(p => p.Route)); confidence = medConfidence; //Medium confidence when we have matched the string exactly, but there are multiple results unconfidentReason ??= $"Too many filtered results ({filteredResults.Count})"; } else { chosenRoute = possibleResults.OrderByDescending(p => p.Route.Popularity).First(); allResults.AddRange(possibleResults.Select(p => p.Route)); confidence = lowConfidence; //Low confidence when we can't match the string exactly, haven't matched any locations, and there are multiple results unconfidentReason ??= "No filtered results. Chose most popular partial match instead"; } location = chosenRoute.Area; if (location == null) { location = ParentsInString(chosenRoute, allowPartialParents: true).FirstOrDefault(p => p.ID != GetOuterParent(chosenRoute.Route).ID) as Area; } finalResult = new SearchResult(chosenRoute.Route, location) { AllResults = allResults, Confidence = confidence, UnconfidentReason = unconfidentReason }; } finalResult.TimeTakenMS = stopwatch.ElapsedMilliseconds; return(finalResult); }