// Update results (TP, TN, FP, FN) for KNN // ingredients are recommended if the majority of neighbors contain the ingredient static Results GetKNNResults(int k, Data[] test_data, string[] ingrNames, DistanceChoice distance_choice, Data[] train_data, Voting voting) { DataManager dm = new DataManager(); KNN knn = new KNN(); // keep track of results Results results = new Results(0); // group test data by recipeId IGrouping <int, Data>[] recipes = test_data.GroupBy(d => d.recipeId).ToArray(); int count = 0; // iterate through all test recipes foreach (IGrouping <int, Data> recipe in recipes) { count++; // current test recipe int[] current_recipe = dm.GetRecipe(recipe.ToArray()); // calculate all distances, sort neighbors by distance to current recipe Neighbors[] distances = knn.GetDistances(distance_choice, current_recipe, train_data, voting); // iterate through all features (unique ingredients) for (int i = 0; i < ingrNames.Length; i++) { // keep track of votes from neighboring recipes double recommended = 0; double not_recommended = 0; // k nearest neighbors vote for (int top = 0; top < k; top++) { // recommend ingredient if (distances[top].recipe.Contains(i)) { if (voting.Equals(Voting.Unweighted)) { recommended++; } else { recommended += distances[top].distance; } } // do not recommend else { if (voting.Equals(Voting.Unweighted)) { not_recommended++; } else { not_recommended += distances[top].distance; } } } results = UpdateResults(results, current_recipe.Contains(i), recommended >= not_recommended); } } return(results); }
// find optimal k public void GetOptimalK(DistanceChoice distance_choice, Voting voting, int max_k) { Console.WriteLine("Determining optimal k for " + distance_choice.ToString() + " distance"); Console.WriteLine(DateTime.Now.ToLongTimeString()); KNN knn = new KNN(); MLContext ml = new MLContext(); DataManager dm = new DataManager(); // get training data IDataView train_dataView = dm.GetDataView(ModelChoice.KNN, ml, DataPurpose.TRAIN); // get features IDataView features = dm.GetDataView(ModelChoice.KNN, ml, DataPurpose.FEATURES); string[] ingrNames = features.GetColumn <string>(features.Schema["ingrName"]).ToArray(); // set number of folds to 5 int num_folds = 5; Console.WriteLine(num_folds + "-fold cross validation..."); // Cross validation split var folds = ml.Data.CrossValidationSplit(train_dataView, num_folds, samplingKeyColumnName: "recipeId"); // keep track of f1 scores for each value of k double[] f1s = new double[max_k]; // try different values of k for (int k = 1; k <= max_k; k++) { // show progress Console.WriteLine("\nk = " + k + "\t" + DateTime.Now.ToLongTimeString()); f1s[(k - 1)] = 0.0; // keep track of fold results (update TP, TN, FP, FN to later determine f1 score) Results[] fold_results = new Results[num_folds]; // iterate through each fold for (int fold = 0; fold < num_folds; fold++) { // get training data for current fold Data[] train_data = dm.GetData(folds[fold].TrainSet, features); // get test data for current fold Data[] validation_data = dm.GetData(folds[fold].TestSet, features); // number of training recipes for current fold int num_recipes = train_data.GroupBy(d => d.recipeId).ToArray().Length; // number of test recipes for current fold int num_validation_recipes = validation_data.GroupBy(d => d.recipeId).ToArray().Length; // group test recipes by recipeId IGrouping <int, Data>[] recipes = validation_data.GroupBy(d => d.recipeId).ToArray(); // iterate through test recipes for current fold foreach (IGrouping <int, Data> current in recipes) { // current recipe int[] recipe = dm.GetRecipe(current.ToArray()); // calculate distances between test recipe and training recipes, and get sorted neighbors Neighbors[] distances = knn.GetDistances(distance_choice, recipe, train_data, voting); // iterate through all features (unique ingredients) for (int i = 0; i < ingrNames.Length; i++) { // keep track of votes double recommended = 0; double not_recommended = 0; // find k nearest neighbors for (int top = 0; top < k; top++) { // recommend ingredient if (distances[top].recipe.Contains(i)) { if (voting.Equals(Voting.Unweighted)) { recommended++; } else { recommended += distances[top].distance; } } // do not recommend ingredient else { if (voting.Equals(Voting.Unweighted)) { not_recommended++; } else { not_recommended += distances[top].distance; } } } // update results for current fold fold_results[fold] = UpdateResults(fold_results[fold], recipe.Contains(i), recommended >= not_recommended); } } } f1s[(k - 1)] = fold_results.Average(a => a.getF1()); Console.WriteLine("Average f1: " + f1s[(k - 1)]); } // display the optimal k Console.WriteLine("\nOPTIMAL k = " + (Array.IndexOf(f1s, f1s.Max()) + 1) + " with an f1 score of " + f1s.Max()); }