// Evaluate Modified KNN (dynamic k) public void EvaluateModifiedKNN(DistanceChoice distance_choice, Voting voting) { Console.WriteLine("Evalulating Modified KNN..."); Results results = new Results(0); KNN knn = new KNN(); MLContext ml = new MLContext(); DataManager dm = new DataManager(); // get test data Data[] test_data = dm.GetRecipes(ModelChoice.KNN, DataPurpose.TEST); // get features (ingredients) string[] ingrNames = dm.GetFeatures(); // group data by recipeId IGrouping <int, Data>[] recipes = test_data.GroupBy(d => d.recipeId).ToArray(); // keep track of the k values int[] new_ks = new int[recipes.Length]; int index = 0; // iterate through all test recipes foreach (IGrouping <int, Data> recipe in recipes) { // current recipe int[] current_recipe = dm.GetRecipe(recipe.ToArray()); // get recommendations Recommendation[] recommendations = knn.GetRecommendations_ModifiedKNN(current_recipe, distance_choice, voting, ref new_ks[index]); index++; results = GetResults(results, recommendations, current_recipe); } Console.WriteLine("\nMin k: " + new_ks.Min()); Console.WriteLine("Max k: " + new_ks.Max()); Console.WriteLine("Avg k: " + new_ks.Average() + "\n"); results.ShowResults(); }
// Get ingredients sorted by what is most recommended public Recommendation[] GetRecommendations(ModelChoice model_choice, int[] recipe) { Recommendation[] recommendations = null; if (model_choice.Equals(ModelChoice.NB)) { NaiveBayes nb = new NaiveBayes(); recommendations = nb.RecipeRecommendations(nb.GetModel(), recipe, true, true, false); } else if (model_choice.Equals(ModelChoice.KNN)) { KNN knn = new KNN(); recommendations = knn.GetRecommendations(6, DistanceChoice.Jaccard, recipe, Voting.Unweighted); } else if (model_choice.Equals(ModelChoice.MKNN)) { KNN knn = new KNN(); int new_k = 0; recommendations = knn.GetRecommendations_ModifiedKNN(recipe, DistanceChoice.Jaccard_Similarity, Voting.Unweighted, ref new_k); } return(recommendations); }
// Ingredients to ADD to or REMOVE from a recipe public void TopRecommendations(int top, string[] recipe_str, ModelChoice model_choice, bool add, bool include_recipe_ingrs) { DataManager dm = new DataManager(); // get training data Data[] data = dm.GetRecipes(model_choice, DataPurpose.TRAIN); Console.WriteLine("You model choice: " + model_choice.ToString()); // input recipe int[] recipe = new int[recipe_str.Length]; for (int i = 0; i < recipe_str.Length; i++) { try { // trim and make lowercase recipe_str[i] = recipe_str[i].Trim().ToLower(); // find ingredient recipe[i] = data.Where(d => d.ingredient.name.Equals(recipe_str[i])).ToArray()[0].ingredient.id; } catch { // get features (ingredients) string[] features = GetAllIngredients(false); bool found = false; // try finding a similar ingredient foreach (string ingr in features) { if (ingr.StartsWith(recipe_str[i]) || ingr.Contains(recipe_str[i])) { recipe_str[i] = ingr; recipe[i] = data.Where(d => d.ingredient.name.Equals(ingr)).ToArray()[0].ingredient.id; found = true; break; } } // ingredient not found if (found == false) { Console.WriteLine("Ingredient [" + recipe_str[i] + "] was not found"); return; } } } Console.Write("Your recipe: "); PrintRecipe(recipe_str); // keep track of ingredient recommendations Recommendation[] recommendations = null; // Naive Bayes if (model_choice.Equals(ModelChoice.NB)) { NaiveBayes nb = new NaiveBayes(); recommendations = nb.RecipeRecommendations(nb.GetModel(), recipe, true, true, false); } // k Nearest Neighbors else if (model_choice.Equals(ModelChoice.KNN)) { KNN knn = new KNN(); recommendations = knn.GetRecommendations(6, DistanceChoice.Jaccard, recipe, Voting.Unweighted); } // Modified k Nearest Neighbors (dynamic k) else if (model_choice.Equals(ModelChoice.MKNN)) { KNN knn = new KNN(); int new_k = 0; recommendations = knn.GetRecommendations_ModifiedKNN(recipe, DistanceChoice.Jaccard_Similarity, Voting.Unweighted, ref new_k); } else { return; } // Ingredients to Add if (add == true) { Console.WriteLine("Your recommendations:"); for (int i = 0; i < top; i++) { // skip ingredients in recipe if (include_recipe_ingrs == false && recipe_str.Contains(recommendations[i].ingredient.name)) { top++; continue; } Console.WriteLine(recommendations[i].ingredient.name); } } // Ingredients to Remove else { Console.WriteLine("Ingredients ordered by what to remove first:"); // only keep scores of ingredients in input recipe recommendations = recommendations.Where(d => recipe.Contains(d.ingredient.id)).ToArray(); // sort by score recommendations = recommendations.OrderBy(d => d.score).ToArray(); for (int i = 0; i < recipe.Length; i++) { Console.WriteLine(recommendations[i].ingredient.name); } } Console.WriteLine(); }
// Update results (TP, TN, FP, FN) for KNN // ingredients are recommended if the majority of neighbors contain the ingredient static Results GetKNNResults(int k, Data[] test_data, string[] ingrNames, DistanceChoice distance_choice, Data[] train_data, Voting voting) { DataManager dm = new DataManager(); KNN knn = new KNN(); // keep track of results Results results = new Results(0); // group test data by recipeId IGrouping <int, Data>[] recipes = test_data.GroupBy(d => d.recipeId).ToArray(); int count = 0; // iterate through all test recipes foreach (IGrouping <int, Data> recipe in recipes) { count++; // current test recipe int[] current_recipe = dm.GetRecipe(recipe.ToArray()); // calculate all distances, sort neighbors by distance to current recipe Neighbors[] distances = knn.GetDistances(distance_choice, current_recipe, train_data, voting); // iterate through all features (unique ingredients) for (int i = 0; i < ingrNames.Length; i++) { // keep track of votes from neighboring recipes double recommended = 0; double not_recommended = 0; // k nearest neighbors vote for (int top = 0; top < k; top++) { // recommend ingredient if (distances[top].recipe.Contains(i)) { if (voting.Equals(Voting.Unweighted)) { recommended++; } else { recommended += distances[top].distance; } } // do not recommend else { if (voting.Equals(Voting.Unweighted)) { not_recommended++; } else { not_recommended += distances[top].distance; } } } results = UpdateResults(results, current_recipe.Contains(i), recommended >= not_recommended); } } return(results); }
// find optimal k public void GetOptimalK(DistanceChoice distance_choice, Voting voting, int max_k) { Console.WriteLine("Determining optimal k for " + distance_choice.ToString() + " distance"); Console.WriteLine(DateTime.Now.ToLongTimeString()); KNN knn = new KNN(); MLContext ml = new MLContext(); DataManager dm = new DataManager(); // get training data IDataView train_dataView = dm.GetDataView(ModelChoice.KNN, ml, DataPurpose.TRAIN); // get features IDataView features = dm.GetDataView(ModelChoice.KNN, ml, DataPurpose.FEATURES); string[] ingrNames = features.GetColumn <string>(features.Schema["ingrName"]).ToArray(); // set number of folds to 5 int num_folds = 5; Console.WriteLine(num_folds + "-fold cross validation..."); // Cross validation split var folds = ml.Data.CrossValidationSplit(train_dataView, num_folds, samplingKeyColumnName: "recipeId"); // keep track of f1 scores for each value of k double[] f1s = new double[max_k]; // try different values of k for (int k = 1; k <= max_k; k++) { // show progress Console.WriteLine("\nk = " + k + "\t" + DateTime.Now.ToLongTimeString()); f1s[(k - 1)] = 0.0; // keep track of fold results (update TP, TN, FP, FN to later determine f1 score) Results[] fold_results = new Results[num_folds]; // iterate through each fold for (int fold = 0; fold < num_folds; fold++) { // get training data for current fold Data[] train_data = dm.GetData(folds[fold].TrainSet, features); // get test data for current fold Data[] validation_data = dm.GetData(folds[fold].TestSet, features); // number of training recipes for current fold int num_recipes = train_data.GroupBy(d => d.recipeId).ToArray().Length; // number of test recipes for current fold int num_validation_recipes = validation_data.GroupBy(d => d.recipeId).ToArray().Length; // group test recipes by recipeId IGrouping <int, Data>[] recipes = validation_data.GroupBy(d => d.recipeId).ToArray(); // iterate through test recipes for current fold foreach (IGrouping <int, Data> current in recipes) { // current recipe int[] recipe = dm.GetRecipe(current.ToArray()); // calculate distances between test recipe and training recipes, and get sorted neighbors Neighbors[] distances = knn.GetDistances(distance_choice, recipe, train_data, voting); // iterate through all features (unique ingredients) for (int i = 0; i < ingrNames.Length; i++) { // keep track of votes double recommended = 0; double not_recommended = 0; // find k nearest neighbors for (int top = 0; top < k; top++) { // recommend ingredient if (distances[top].recipe.Contains(i)) { if (voting.Equals(Voting.Unweighted)) { recommended++; } else { recommended += distances[top].distance; } } // do not recommend ingredient else { if (voting.Equals(Voting.Unweighted)) { not_recommended++; } else { not_recommended += distances[top].distance; } } } // update results for current fold fold_results[fold] = UpdateResults(fold_results[fold], recipe.Contains(i), recommended >= not_recommended); } } } f1s[(k - 1)] = fold_results.Average(a => a.getF1()); Console.WriteLine("Average f1: " + f1s[(k - 1)]); } // display the optimal k Console.WriteLine("\nOPTIMAL k = " + (Array.IndexOf(f1s, f1s.Max()) + 1) + " with an f1 score of " + f1s.Max()); }