public static List<WebSearchClassifyResult> ClassifyWebSearchQueryUnit(RestaurantQueryPattern queryUnit, string classifierName, WebSearchResult searchResult) { if (!string.IsNullOrEmpty(searchResult.Snippet)) { queryUnit.Description = searchResult.Snippet; log.DebugFormat("[ClassifyWebSearchQueryUnit] classifierName={0}, queryUnit={1}.", classifierName, queryUnit.ToString()); var classifierResults = ClassifyQueryUnit(queryUnit, classifierName); if (classifierResults != null || classifierResults.Count > 0) { List<WebSearchClassifyResult> returnList = new List<WebSearchClassifyResult>(); foreach (var result in classifierResults) { WebSearchClassifyResult tempResult = new WebSearchClassifyResult(result, searchResult); returnList.Add(tempResult); } return returnList; } else { log.WarnFormat("[ClassifyWebSearchQueryUnit] Classifier results is null or empty. queryUnit.Name={0}, queryUnit.Description={1}.", queryUnit.Name, queryUnit.Description); } } else { log.ErrorFormat("[ClassifyWebSearchQueryUnit] WebSearchResult Snippet can't be null or empty."); } return null; }
public static List<Class> ClassifyUnit(RestaurantQueryPattern queryUnit, string classifierName) { try { //log.InfoFormat("[ClassifyUnit] queryUnit.Name={0}, queryUnit.Description={1}.", queryUnit.Name, queryUnit.Description); var result = Classify(queryUnit.ToString(), classifierName); return result; } catch (Exception e) { log.ErrorFormat("[ClassifyUnit] Exception={0}.", e.Message); return null; } }
///// <summary> ///// Fuinction classify all Restaurants with empty menus, and update restaurants cuisine ///// </summary> //public void ClassifyEmptyRestaurants() //{ // List<RestaurantBasicData> restsList = restaurantsSearchUtilty.GetAllRestaurantsWithEmptyMenu(); // List<string> safeCuisineList = new List<string>() { // "italian", // "falafel", // "cafe", // "shawarma", // "meat", // "kebab" // }; // log.InfoFormat("[ClassifyEmptyRestaurants] Restaurants.Count={0}.", restsList.Count); // foreach(var rest in restsList) // { // var tempQueryPattern = new RestaurantQueryPattern(rest); // var result = ClassifyUtility.ClassifyUnit(tempQueryPattern, "CuisinesUnionTest"); // if (result != null) // { // log.InfoFormat("[ClassifyEmptyRestaurants] Restaurant.Name={0}, Restaurant.Id={1}, BestResult={2}, ResultProbability={3}.", rest.Name, rest.Id.ToString(), result[0].className, result[0].p.ToString()); // if (result[0].p > 0.2 && safeCuisineList.Contains(result[0].className)) // { // rest.Source = "CuisineClassifier_2014.05.29"; // if (rest.Cuisines == null) rest.Cuisines = new List<string>(); // rest.Cuisines.Add(result[0].className); // m_serviceLayer.UpdateRestaurant(rest); // } // } // else // { // log.WarnFormat("[ClassifyEmptyRestaurants] Null result for Restaurant.Name={0}, Restaurant.Id={1}.", rest.Name, rest.Id.ToString()); // } // } //} /// <summary> /// Function classify each restaurant in input list /// first step: try to classify by restaurant name and description /// if not found goog result (propability lower than threshold) go to second step /// Second step: execute Google search and try to classify each result /// choose best result... /// </summary> /// <param name="classifierName"></param> /// <param name="restsList"></param> /// <param name="googleSearchLimit"></param> public void ClassifyRestaurants(string classifierName, List<RestaurantBasicData> restsList, int googleSearchLimit = 0) { try { log.InfoFormat("[ClassifyRestaurants] classifierName={0}, Restaurants.Count={1}.", classifierName, restsList.Count); int restCount = 0; int count = 0; int classifyCount = 0; int classifyLimit = 5000; double threshold = 0.5; List<string> safeSourceList = new List<string>() { "www.mouse.co.il", "www.rest.co.il" }; List<string> unsafeSourceList = new List<string>() { "plus.google.com", }; WebSearchServices webSearchServ = new WebSearchServices(); foreach (var rest in restsList) { if (rest.Source != null && rest.Source.IndexOf("BackOffice", StringComparison.OrdinalIgnoreCase) < 0) { restCount++; List<Classifier.uClassify.Models.Response.Class> bestClassifyResult = null; WebSearchResult bestGoogleSearch = null; var tempQueryPattern = new RestaurantQueryPattern(rest); if (classifyCount >= classifyLimit) { log.WarnFormat("[ClassifyRestaurants] Reach classify limit, classifyCount={0}.", classifyCount); break; } var result = ClassifyUtility.ClassifyUnit(tempQueryPattern, classifierName); classifyCount++; if (result != null) { //log.InfoFormat("[ClassifyRestaurants] Classify result only by rest info: Restaurant.Name={0}, Restaurant.Id={1}, BestResult={2}, ResultProbability={3}, classifierName={4}.", rest.Name, rest.Id.ToString(), result[0].className, result[0].p.ToString(), classifierName); if (result[0].p <= threshold) { //Get Search results from rest, if null execute google search, update restaurant List<WebSearchResult> searchResults = null; if (rest.SearchResults != null) { searchResults = rest.SearchResults; } else { if (googleSearchLimit != 0 && count >= googleSearchLimit) { log.WarnFormat("[ClassifyRestaurants] Reach google search limit, count={0}.", count); break; } count++; searchResults = webSearchServ.GoogleSearchRestaurantDescription(rest); } if (searchResults != null) { foreach (var item in searchResults) { if (!unsafeSourceList.Contains(item.Source) && !string.IsNullOrEmpty(item.Snippet)) { tempQueryPattern.Description = item.Snippet; if (classifyCount >= classifyLimit) { log.WarnFormat("[ClassifyRestaurants] Reach classify limit, classifyCount={0}.", classifyCount); break; } var tempResult = ClassifyUtility.ClassifyUnit(tempQueryPattern, classifierName); classifyCount++; if (tempResult != null && tempResult[0].p > threshold && (bestClassifyResult == null || bestClassifyResult[0].p < tempResult[0].p)) { bestClassifyResult = new List<Classifier.uClassify.Models.Response.Class>(tempResult); bestGoogleSearch = item; } } } if (bestClassifyResult != null && bestGoogleSearch != null) { if (!safeSourceList.Contains(bestGoogleSearch.Source)) safeSourceList.Add(bestGoogleSearch.Source); log.InfoFormat("[ClassifyRestaurants] After Google Search, Restaurant.Name={0}, Restaurant.Id={1}, BestResult={2}, ResultProbability={3}, item.DisplayLink={4}, item.Snippet={5}.", rest.Name, rest.Id.ToString(), bestClassifyResult[0].className, bestClassifyResult[0].p.ToString(), bestGoogleSearch.Source, bestGoogleSearch.Snippet); //if (bestClassifyResult[0].p > threshold && safeCuisineList.Contains(bestClassifyResult[0].className)) if (bestClassifyResult[0].p > threshold) { if (rest.SearchResults == null) rest.SearchResults = new List<WebSearchResult>(); rest.SearchResults.Add(bestGoogleSearch); if (rest.Cuisines == null) rest.Cuisines = new List<string>(); rest.Cuisines.Add(bestClassifyResult[0].className); DateTime date = DateTime.UtcNow; rest.Source = "CuisineClassifier_" + date.Year.ToString() + "." + date.Month.ToString() + "." + date.Day.ToString(); m_serviceLayer.UpdateRestaurant(rest); } } } else { log.WarnFormat("[ClassifyRestaurants] searchResults is null, rest.Name={0}, rest.Id={1}.", rest.Name, rest.Id.ToString()); } } else //if (result[0].p <= threshold) { //if (safeCuisineList.Contains(result[0].className)) //{ if (rest.Cuisines == null) rest.Cuisines = new List<string>(); rest.Cuisines.Add(result[0].className); log.InfoFormat("[ClassifyRestaurants] Classified only by rest info: Restaurant.Name={0}, Restaurant.Id={1}, BestResult={2}, ResultProbability={3}, classifierName={4}, classifyText={5}.", rest.Name, rest.Id.ToString(), result[0].className, result[0].p.ToString(), classifierName, tempQueryPattern.ToString()); DateTime date = DateTime.UtcNow; rest.Source = "CuisineClassifier_" + date.Year.ToString() + "." + date.Month.ToString() + "." + date.Day.ToString(); m_serviceLayer.UpdateRestaurant(rest); //} } } else { log.WarnFormat("[ClassifyRestaurants] Null result for Restaurant.Name={0}, Restaurant.Id={1}, classifierName={2}.", rest.Name, rest.Id.ToString(), classifierName); } } } log.WarnFormat("[ClassifyRestaurants] Google searches count={0}, classifyCount ={1}, restCount={2}.", count, classifyCount, restCount); } catch (Exception e) { log.ErrorFormat("[ClassifyRestaurants] Exception={0}.", e.Message); } }