public void ClassifyUtilityTest_ResponseShouldNotBeNull() { RestaurantQueryPattern query = new RestaurantQueryPattern() { Name = "פלאפל", Description = "פלאפל אכי טעים" }; var response = Classifier.uClassify.ClassifyUtility.ClassifyUnit(query, "CuisinesSupervised1"); Assert.IsNotNull(response); }
public RestaurantInfoClassificator(RestaurantBasicData rest, string classifierName) { if (rest == null) throw new ArgumentException("Can't create RestaurantInfoClassificator: Source cannot be null."); if (string.IsNullOrEmpty(rest.Name)) throw new ArgumentException("Can't create RestaurantInfoClassificator: Restaurant Name cannot be null or empty."); if (string.IsNullOrEmpty(classifierName)) throw new ArgumentException("Can't create RestaurantInfoClassificator: classifierName cannot be null or empty."); log.DebugFormat("[RestaurantInfoClassificator] Constructor: rest.Name={0}, rest.Id={1}, classifierName={2}.", rest.Name, rest.Id.ToString(), classifierName); restaurant = rest; classifyQuery = new RestaurantQueryPattern(restaurant); this.classifier = classifierName; }
public static List<ClassifyResult> ClassifyQueryUnit(RestaurantQueryPattern queryUnit, string classifierName) { List<ClassifyResult> returnValue = new List<ClassifyResult>(); List<Class> classList = ClassifyUnit(queryUnit, classifierName); if (classList != null) { foreach (var classResult in classList) { var tempClassifyResult = classResult.ToClassifierResult(); if (tempClassifyResult != null) returnValue.Add(tempClassifyResult); } } if (returnValue.Count > 0) return returnValue; else return null; }
//public List<ClassifyResult> bestResults { get; private set; } //private WebSearchResult bestSearchResult{ get; set; } public SearchEngineResultClassificator(RestaurantBasicData rest, string classifierName) { if (rest == null) throw new ArgumentException("Can't create SearchEngineResultClassificator: Source cannot be null."); if (string.IsNullOrEmpty(rest.Name)) throw new ArgumentException("Can't create SearchEngineResultClassificator: Restaurant Name cannot be null or empty."); if (string.IsNullOrEmpty(classifierName)) throw new ArgumentException("Can't create SearchEngineResultClassificator: classifierName cannot be null or empty."); log.DebugFormat("[SearchEngineResultClassificator] Constructor: rest.Name={0}, rest.Id={1}, classifierName={2}.", rest.Name, rest.Id.ToString(), classifierName); restaurant = rest; classifyQuery = new RestaurantQueryPattern(rest); searchQuery = BuildSearchQuery.BuildSearchQueryString(rest); classifier = classifierName; classificationResults = new List<WebSearchClassifyResult>(); }
public void ClassifyWebSearchQueryUnitTests_ResponseShouldNotBeNull() { WebSearchResult webSearchRes = new WebSearchResult() { SearchEngine = "Google", Snippet = "מנות פלאפל בפיתה, בלפה, בצלחת, הזמנת משלוחים.", Source = "www.test-source.com" }; RestaurantQueryPattern query = new RestaurantQueryPattern() { Name = "פלאפל", Description = "פלאפל אכי טעים" }; var response = Classifier.uClassify.ClassifyUtility.ClassifyWebSearchQueryUnit(query, "CuisinesSupervised1", webSearchRes); Assert.IsNotNull(response); }
public void ClassifyWebSearchQueryUnitTests_SearchResultInClassifierResultShouldBeSaved() { WebSearchResult webSearchRes = new WebSearchResult() { SearchEngine = "Google", Snippet = "מנות פלאפל בפיתה, בלפה, בצלחת, הזמנת משלוחים.", Source = "www.test-source.com" }; RestaurantQueryPattern query = new RestaurantQueryPattern() { Name = "פלאפל", Description = "פלאפל אכי טעים" }; var response = Classifier.uClassify.ClassifyUtility.ClassifyWebSearchQueryUnit(query, "CuisinesSupervised1", webSearchRes); Assert.IsFalse(response.Any(r => r.SearchResult == null)); Assert.IsFalse(response.Any(r => r.SearchResult.SearchEngine != webSearchRes.SearchEngine)); Assert.IsFalse(response.Any(r => r.SearchResult.Source != webSearchRes.Source)); Assert.IsFalse(response.Any(r => r.SearchResult.Snippet != webSearchRes.Snippet)); Assert.IsFalse(response.Any(r => r.SearchResult.IsTrained != webSearchRes.IsTrained)); }
public static List<Class> ClassifyUnit(RestaurantQueryPattern queryUnit, string classifierName) { try { //log.InfoFormat("[ClassifyUnit] queryUnit.Name={0}, queryUnit.Description={1}.", queryUnit.Name, queryUnit.Description); var result = Classify(queryUnit.ToString(), classifierName); return result; } catch (Exception e) { log.ErrorFormat("[ClassifyUnit] Exception={0}.", e.Message); return null; } }
public static List<WebSearchClassifyResult> ClassifyWebSearchQueryUnit(RestaurantQueryPattern queryUnit, string classifierName, WebSearchResult searchResult) { if (!string.IsNullOrEmpty(searchResult.Snippet)) { queryUnit.Description = searchResult.Snippet; log.DebugFormat("[ClassifyWebSearchQueryUnit] classifierName={0}, queryUnit={1}.", classifierName, queryUnit.ToString()); var classifierResults = ClassifyQueryUnit(queryUnit, classifierName); if (classifierResults != null || classifierResults.Count > 0) { List<WebSearchClassifyResult> returnList = new List<WebSearchClassifyResult>(); foreach (var result in classifierResults) { WebSearchClassifyResult tempResult = new WebSearchClassifyResult(result, searchResult); returnList.Add(tempResult); } return returnList; } else { log.WarnFormat("[ClassifyWebSearchQueryUnit] Classifier results is null or empty. queryUnit.Name={0}, queryUnit.Description={1}.", queryUnit.Name, queryUnit.Description); } } else { log.ErrorFormat("[ClassifyWebSearchQueryUnit] WebSearchResult Snippet can't be null or empty."); } return null; }
public void ClassifyWebSearchQueryUnitTests_WithEmptySnippetInputResultShouldBeNull() { WebSearchResult webSearchRes = new WebSearchResult() { SearchEngine = "Google", Snippet = "", Source = "www.test-source.com" }; RestaurantQueryPattern query = new RestaurantQueryPattern() { Name = "פלאפל", Description = "פלאפל אכי טעים" }; var response = Classifier.uClassify.ClassifyUtility.ClassifyWebSearchQueryUnit(query, "CuisinesSupervised1", webSearchRes); Assert.IsNull(response); }
///// <summary> ///// Fuinction classify all Restaurants with empty menus, and update restaurants cuisine ///// </summary> //public void ClassifyEmptyRestaurants() //{ // List<RestaurantBasicData> restsList = restaurantsSearchUtilty.GetAllRestaurantsWithEmptyMenu(); // List<string> safeCuisineList = new List<string>() { // "italian", // "falafel", // "cafe", // "shawarma", // "meat", // "kebab" // }; // log.InfoFormat("[ClassifyEmptyRestaurants] Restaurants.Count={0}.", restsList.Count); // foreach(var rest in restsList) // { // var tempQueryPattern = new RestaurantQueryPattern(rest); // var result = ClassifyUtility.ClassifyUnit(tempQueryPattern, "CuisinesUnionTest"); // if (result != null) // { // log.InfoFormat("[ClassifyEmptyRestaurants] Restaurant.Name={0}, Restaurant.Id={1}, BestResult={2}, ResultProbability={3}.", rest.Name, rest.Id.ToString(), result[0].className, result[0].p.ToString()); // if (result[0].p > 0.2 && safeCuisineList.Contains(result[0].className)) // { // rest.Source = "CuisineClassifier_2014.05.29"; // if (rest.Cuisines == null) rest.Cuisines = new List<string>(); // rest.Cuisines.Add(result[0].className); // m_serviceLayer.UpdateRestaurant(rest); // } // } // else // { // log.WarnFormat("[ClassifyEmptyRestaurants] Null result for Restaurant.Name={0}, Restaurant.Id={1}.", rest.Name, rest.Id.ToString()); // } // } //} /// <summary> /// Function classify each restaurant in input list /// first step: try to classify by restaurant name and description /// if not found goog result (propability lower than threshold) go to second step /// Second step: execute Google search and try to classify each result /// choose best result... /// </summary> /// <param name="classifierName"></param> /// <param name="restsList"></param> /// <param name="googleSearchLimit"></param> public void ClassifyRestaurants(string classifierName, List<RestaurantBasicData> restsList, int googleSearchLimit = 0) { try { log.InfoFormat("[ClassifyRestaurants] classifierName={0}, Restaurants.Count={1}.", classifierName, restsList.Count); int restCount = 0; int count = 0; int classifyCount = 0; int classifyLimit = 5000; double threshold = 0.5; List<string> safeSourceList = new List<string>() { "www.mouse.co.il", "www.rest.co.il" }; List<string> unsafeSourceList = new List<string>() { "plus.google.com", }; WebSearchServices webSearchServ = new WebSearchServices(); foreach (var rest in restsList) { if (rest.Source != null && rest.Source.IndexOf("BackOffice", StringComparison.OrdinalIgnoreCase) < 0) { restCount++; List<Classifier.uClassify.Models.Response.Class> bestClassifyResult = null; WebSearchResult bestGoogleSearch = null; var tempQueryPattern = new RestaurantQueryPattern(rest); if (classifyCount >= classifyLimit) { log.WarnFormat("[ClassifyRestaurants] Reach classify limit, classifyCount={0}.", classifyCount); break; } var result = ClassifyUtility.ClassifyUnit(tempQueryPattern, classifierName); classifyCount++; if (result != null) { //log.InfoFormat("[ClassifyRestaurants] Classify result only by rest info: Restaurant.Name={0}, Restaurant.Id={1}, BestResult={2}, ResultProbability={3}, classifierName={4}.", rest.Name, rest.Id.ToString(), result[0].className, result[0].p.ToString(), classifierName); if (result[0].p <= threshold) { //Get Search results from rest, if null execute google search, update restaurant List<WebSearchResult> searchResults = null; if (rest.SearchResults != null) { searchResults = rest.SearchResults; } else { if (googleSearchLimit != 0 && count >= googleSearchLimit) { log.WarnFormat("[ClassifyRestaurants] Reach google search limit, count={0}.", count); break; } count++; searchResults = webSearchServ.GoogleSearchRestaurantDescription(rest); } if (searchResults != null) { foreach (var item in searchResults) { if (!unsafeSourceList.Contains(item.Source) && !string.IsNullOrEmpty(item.Snippet)) { tempQueryPattern.Description = item.Snippet; if (classifyCount >= classifyLimit) { log.WarnFormat("[ClassifyRestaurants] Reach classify limit, classifyCount={0}.", classifyCount); break; } var tempResult = ClassifyUtility.ClassifyUnit(tempQueryPattern, classifierName); classifyCount++; if (tempResult != null && tempResult[0].p > threshold && (bestClassifyResult == null || bestClassifyResult[0].p < tempResult[0].p)) { bestClassifyResult = new List<Classifier.uClassify.Models.Response.Class>(tempResult); bestGoogleSearch = item; } } } if (bestClassifyResult != null && bestGoogleSearch != null) { if (!safeSourceList.Contains(bestGoogleSearch.Source)) safeSourceList.Add(bestGoogleSearch.Source); log.InfoFormat("[ClassifyRestaurants] After Google Search, Restaurant.Name={0}, Restaurant.Id={1}, BestResult={2}, ResultProbability={3}, item.DisplayLink={4}, item.Snippet={5}.", rest.Name, rest.Id.ToString(), bestClassifyResult[0].className, bestClassifyResult[0].p.ToString(), bestGoogleSearch.Source, bestGoogleSearch.Snippet); //if (bestClassifyResult[0].p > threshold && safeCuisineList.Contains(bestClassifyResult[0].className)) if (bestClassifyResult[0].p > threshold) { if (rest.SearchResults == null) rest.SearchResults = new List<WebSearchResult>(); rest.SearchResults.Add(bestGoogleSearch); if (rest.Cuisines == null) rest.Cuisines = new List<string>(); rest.Cuisines.Add(bestClassifyResult[0].className); DateTime date = DateTime.UtcNow; rest.Source = "CuisineClassifier_" + date.Year.ToString() + "." + date.Month.ToString() + "." + date.Day.ToString(); m_serviceLayer.UpdateRestaurant(rest); } } } else { log.WarnFormat("[ClassifyRestaurants] searchResults is null, rest.Name={0}, rest.Id={1}.", rest.Name, rest.Id.ToString()); } } else //if (result[0].p <= threshold) { //if (safeCuisineList.Contains(result[0].className)) //{ if (rest.Cuisines == null) rest.Cuisines = new List<string>(); rest.Cuisines.Add(result[0].className); log.InfoFormat("[ClassifyRestaurants] Classified only by rest info: Restaurant.Name={0}, Restaurant.Id={1}, BestResult={2}, ResultProbability={3}, classifierName={4}, classifyText={5}.", rest.Name, rest.Id.ToString(), result[0].className, result[0].p.ToString(), classifierName, tempQueryPattern.ToString()); DateTime date = DateTime.UtcNow; rest.Source = "CuisineClassifier_" + date.Year.ToString() + "." + date.Month.ToString() + "." + date.Day.ToString(); m_serviceLayer.UpdateRestaurant(rest); //} } } else { log.WarnFormat("[ClassifyRestaurants] Null result for Restaurant.Name={0}, Restaurant.Id={1}, classifierName={2}.", rest.Name, rest.Id.ToString(), classifierName); } } } log.WarnFormat("[ClassifyRestaurants] Google searches count={0}, classifyCount ={1}, restCount={2}.", count, classifyCount, restCount); } catch (Exception e) { log.ErrorFormat("[ClassifyRestaurants] Exception={0}.", e.Message); } }
public void TrainByGoogleSearchResult(string classifierName) { List<RestaurantBasicData> restsList = restaurantsSearchUtilty.GetAllRestaurantsWithBackOfficeSource(); List<string> safeSourceList = new List<string>(); //int count = 0; log.InfoFormat("[TrainByGoogleSearchResult] Restaurants.Count={0}.", restsList.Count); GoogleAPIs.CustomSearch.SearchUtilities customSearchUtil = new GoogleAPIs.CustomSearch.SearchUtilities(); foreach (var rest in restsList) { //if (count > 10) break; var tempQueryPattern = new RestaurantQueryPattern(rest); var result = ClassifyUtility.ClassifyUnit(tempQueryPattern, "CuisinesByDescription"); List<Classifier.uClassify.Models.Response.Class> bestClassifyResult = null; WebSearchResult bestGoogleSearch = null; if (result != null && rest.SearchResults != null) { log.InfoFormat("[TrainByGoogleSearchResult] Restaurant.Name={0}, Restaurant.Id={1}, BestResult={2}, ResultProbability={3}.", rest.Name, rest.Id.ToString(), result[0].className, result[0].p.ToString()); foreach (var item in rest.SearchResults) { if (!string.IsNullOrEmpty(item.Snippet)) { //tempQueryPattern.Description = item.Snippet; //var tempResult = ClassifyUtility.ClassifyUnit(tempQueryPattern, "CuisinesTest"); var tempResult = ClassifyUtility.Classify(item.Snippet, "CuisinesTest"); if (tempResult != null && tempResult[0].p > 0.2 && (bestClassifyResult == null || bestClassifyResult[0].p < tempResult[0].p) && result[0].className == tempResult[0].className) { bestClassifyResult = new List<Classifier.uClassify.Models.Response.Class>(tempResult); bestGoogleSearch = item; } } } if (bestClassifyResult != null && bestGoogleSearch != null) { if (!safeSourceList.Contains(bestGoogleSearch.Source)) safeSourceList.Add(bestGoogleSearch.Source); Classifier.uClassify.TrainUtility.Train(bestGoogleSearch.Snippet, bestClassifyResult[0].className, classifierName); log.InfoFormat("[TrainByGoogleSearchResult] After Google Search, Restaurant.Name={0}, Restaurant.Id={1}, BestResult={2}, ResultProbability={3}, item.DisplayLink={4}, item.Snippet={5}.", rest.Name, rest.Id.ToString(), bestClassifyResult[0].className, bestClassifyResult[0].p.ToString(), bestGoogleSearch.Source, bestGoogleSearch.Snippet); } } else { log.WarnFormat("[TrainByGoogleSearchResult] Null result for Restaurant.Name={0}, Restaurant.Id={1}.", rest.Name, rest.Id.ToString()); } } log.InfoFormat("[TrainByGoogleSearchResult] safeSourceList={0}.", String.Join(", ", safeSourceList.ToArray())); }
public List<RestaurantQueryPattern> GetClassifySet() { //List<RestaurantBasicData> restsList = restaurantsSearchUtilty.GetAllRestaurantsWithEmptyMenu(); List<RestaurantBasicData> restsList = restaurantsSearchUtilty.FindAllRestaurantsWithoutCusine(); if (restsList != null) { List<RestaurantQueryPattern> querySet = new List<RestaurantQueryPattern>(); foreach (var rest in restsList) { RestaurantQueryPattern tempPattern = new RestaurantQueryPattern(rest); if (tempPattern.Name != null || tempPattern.Description != null) { querySet.Add(tempPattern); } } if (querySet.Count > 0) { log.InfoFormat("[GetClassifySet] querySet.Count={0}.", querySet.Count); return querySet; } else log.WarnFormat("[GetClassifySet] querySet is empty."); } else { log.WarnFormat("[GetClassifySet] Can't get restaurants with empty menus."); } return null; }