Example #1
0
        // Evaluate Modified KNN (dynamic k)
        public void EvaluateModifiedKNN(DistanceChoice distance_choice, Voting voting)
        {
            Console.WriteLine("Evalulating Modified KNN...");
            Results results = new Results(0);

            KNN         knn = new KNN();
            MLContext   ml  = new MLContext();
            DataManager dm  = new DataManager();

            // get test data
            Data[] test_data = dm.GetRecipes(ModelChoice.KNN, DataPurpose.TEST);
            // get features (ingredients)
            string[] ingrNames = dm.GetFeatures();

            // group data by recipeId
            IGrouping <int, Data>[] recipes = test_data.GroupBy(d => d.recipeId).ToArray();

            // keep track of the k values
            int[] new_ks = new int[recipes.Length];
            int   index  = 0;

            // iterate through all test recipes
            foreach (IGrouping <int, Data> recipe in recipes)
            {
                // current recipe
                int[] current_recipe = dm.GetRecipe(recipe.ToArray());

                // get recommendations
                Recommendation[] recommendations = knn.GetRecommendations_ModifiedKNN(current_recipe, distance_choice, voting, ref new_ks[index]);
                index++;

                results = GetResults(results, recommendations, current_recipe);
            }

            Console.WriteLine("\nMin k: " + new_ks.Min());
            Console.WriteLine("Max k: " + new_ks.Max());
            Console.WriteLine("Avg k: " + new_ks.Average() + "\n");

            results.ShowResults();
        }
        // Get ingredients sorted by what is most recommended
        public Recommendation[] GetRecommendations(ModelChoice model_choice, int[] recipe)
        {
            Recommendation[] recommendations = null;

            if (model_choice.Equals(ModelChoice.NB))
            {
                NaiveBayes nb = new NaiveBayes();
                recommendations = nb.RecipeRecommendations(nb.GetModel(), recipe, true, true, false);
            }
            else if (model_choice.Equals(ModelChoice.KNN))
            {
                KNN knn = new KNN();
                recommendations = knn.GetRecommendations(6, DistanceChoice.Jaccard, recipe, Voting.Unweighted);
            }
            else if (model_choice.Equals(ModelChoice.MKNN))
            {
                KNN knn   = new KNN();
                int new_k = 0;
                recommendations = knn.GetRecommendations_ModifiedKNN(recipe, DistanceChoice.Jaccard_Similarity, Voting.Unweighted, ref new_k);
            }

            return(recommendations);
        }
        // Ingredients to ADD to or REMOVE from a recipe
        public void TopRecommendations(int top, string[] recipe_str, ModelChoice model_choice, bool add, bool include_recipe_ingrs)
        {
            DataManager dm = new DataManager();

            // get training data
            Data[] data = dm.GetRecipes(model_choice, DataPurpose.TRAIN);

            Console.WriteLine("You model choice: " + model_choice.ToString());

            // input recipe
            int[] recipe = new int[recipe_str.Length];
            for (int i = 0; i < recipe_str.Length; i++)
            {
                try
                {
                    // trim and make lowercase
                    recipe_str[i] = recipe_str[i].Trim().ToLower();

                    // find ingredient
                    recipe[i] = data.Where(d => d.ingredient.name.Equals(recipe_str[i])).ToArray()[0].ingredient.id;
                }
                catch
                {
                    // get features (ingredients)
                    string[] features = GetAllIngredients(false);

                    bool found = false;
                    // try finding a similar ingredient
                    foreach (string ingr in features)
                    {
                        if (ingr.StartsWith(recipe_str[i]) || ingr.Contains(recipe_str[i]))
                        {
                            recipe_str[i] = ingr;
                            recipe[i]     = data.Where(d => d.ingredient.name.Equals(ingr)).ToArray()[0].ingredient.id;
                            found         = true;
                            break;
                        }
                    }
                    // ingredient not found
                    if (found == false)
                    {
                        Console.WriteLine("Ingredient [" + recipe_str[i] + "] was not found");
                        return;
                    }
                }
            }

            Console.Write("Your recipe: ");
            PrintRecipe(recipe_str);

            // keep track of ingredient recommendations
            Recommendation[] recommendations = null;
            // Naive Bayes
            if (model_choice.Equals(ModelChoice.NB))
            {
                NaiveBayes nb = new NaiveBayes();
                recommendations = nb.RecipeRecommendations(nb.GetModel(), recipe, true, true, false);
            }
            // k Nearest Neighbors
            else if (model_choice.Equals(ModelChoice.KNN))
            {
                KNN knn = new KNN();
                recommendations = knn.GetRecommendations(6, DistanceChoice.Jaccard, recipe, Voting.Unweighted);
            }
            // Modified k Nearest Neighbors (dynamic k)
            else if (model_choice.Equals(ModelChoice.MKNN))
            {
                KNN knn   = new KNN();
                int new_k = 0;
                recommendations = knn.GetRecommendations_ModifiedKNN(recipe, DistanceChoice.Jaccard_Similarity, Voting.Unweighted, ref new_k);
            }
            else
            {
                return;
            }
            // Ingredients to Add
            if (add == true)
            {
                Console.WriteLine("Your recommendations:");

                for (int i = 0; i < top; i++)
                {
                    // skip ingredients in recipe
                    if (include_recipe_ingrs == false && recipe_str.Contains(recommendations[i].ingredient.name))
                    {
                        top++;
                        continue;
                    }
                    Console.WriteLine(recommendations[i].ingredient.name);
                }
            }
            // Ingredients to Remove
            else
            {
                Console.WriteLine("Ingredients ordered by what to remove first:");
                // only keep scores of ingredients in input recipe
                recommendations = recommendations.Where(d => recipe.Contains(d.ingredient.id)).ToArray();
                // sort by score
                recommendations = recommendations.OrderBy(d => d.score).ToArray();
                for (int i = 0; i < recipe.Length; i++)
                {
                    Console.WriteLine(recommendations[i].ingredient.name);
                }
            }
            Console.WriteLine();
        }
Example #4
0
        // Update results (TP, TN, FP, FN) for KNN
        // ingredients are recommended if the majority of neighbors contain the ingredient
        static Results GetKNNResults(int k, Data[] test_data, string[] ingrNames, DistanceChoice distance_choice, Data[] train_data, Voting voting)
        {
            DataManager dm  = new DataManager();
            KNN         knn = new KNN();
            // keep track of results
            Results results = new Results(0);

            // group test data by recipeId
            IGrouping <int, Data>[] recipes = test_data.GroupBy(d => d.recipeId).ToArray();
            int count = 0;

            // iterate through all test recipes
            foreach (IGrouping <int, Data> recipe in recipes)
            {
                count++;
                // current test recipe
                int[] current_recipe = dm.GetRecipe(recipe.ToArray());

                // calculate all distances, sort neighbors by distance to current recipe
                Neighbors[] distances = knn.GetDistances(distance_choice, current_recipe, train_data, voting);

                // iterate through all features (unique ingredients)
                for (int i = 0; i < ingrNames.Length; i++)
                {
                    // keep track of votes from neighboring recipes
                    double recommended     = 0;
                    double not_recommended = 0;

                    // k nearest neighbors vote
                    for (int top = 0; top < k; top++)
                    {
                        // recommend ingredient
                        if (distances[top].recipe.Contains(i))
                        {
                            if (voting.Equals(Voting.Unweighted))
                            {
                                recommended++;
                            }
                            else
                            {
                                recommended += distances[top].distance;
                            }
                        }
                        // do not recommend
                        else
                        {
                            if (voting.Equals(Voting.Unweighted))
                            {
                                not_recommended++;
                            }
                            else
                            {
                                not_recommended += distances[top].distance;
                            }
                        }
                    }
                    results = UpdateResults(results, current_recipe.Contains(i), recommended >= not_recommended);
                }
            }
            return(results);
        }
Example #5
0
        // find optimal k
        public void GetOptimalK(DistanceChoice distance_choice, Voting voting, int max_k)
        {
            Console.WriteLine("Determining optimal k for " + distance_choice.ToString() + " distance");
            Console.WriteLine(DateTime.Now.ToLongTimeString());

            KNN         knn = new KNN();
            MLContext   ml  = new MLContext();
            DataManager dm  = new DataManager();

            // get training data
            IDataView train_dataView = dm.GetDataView(ModelChoice.KNN, ml, DataPurpose.TRAIN);
            // get features
            IDataView features = dm.GetDataView(ModelChoice.KNN, ml, DataPurpose.FEATURES);

            string[] ingrNames = features.GetColumn <string>(features.Schema["ingrName"]).ToArray();
            // set number of folds to 5
            int num_folds = 5;

            Console.WriteLine(num_folds + "-fold cross validation...");
            // Cross validation split
            var folds = ml.Data.CrossValidationSplit(train_dataView, num_folds, samplingKeyColumnName: "recipeId");

            // keep track of f1 scores for each value of k
            double[] f1s = new double[max_k];
            // try different values of k
            for (int k = 1; k <= max_k; k++)
            {
                // show progress
                Console.WriteLine("\nk = " + k + "\t" + DateTime.Now.ToLongTimeString());

                f1s[(k - 1)] = 0.0;
                // keep track of fold results (update TP, TN, FP, FN to later determine f1 score)
                Results[] fold_results = new Results[num_folds];

                // iterate through each fold
                for (int fold = 0; fold < num_folds; fold++)
                {
                    // get training data for current fold
                    Data[] train_data = dm.GetData(folds[fold].TrainSet, features);
                    // get test data for current fold
                    Data[] validation_data = dm.GetData(folds[fold].TestSet, features);
                    // number of training recipes for current fold
                    int num_recipes = train_data.GroupBy(d => d.recipeId).ToArray().Length;
                    // number of test recipes for current fold
                    int num_validation_recipes = validation_data.GroupBy(d => d.recipeId).ToArray().Length;
                    // group test recipes by recipeId
                    IGrouping <int, Data>[] recipes = validation_data.GroupBy(d => d.recipeId).ToArray();

                    // iterate through test recipes for current fold
                    foreach (IGrouping <int, Data> current in recipes)
                    {
                        // current recipe
                        int[] recipe = dm.GetRecipe(current.ToArray());

                        // calculate distances between test recipe and training recipes, and get sorted neighbors
                        Neighbors[] distances = knn.GetDistances(distance_choice, recipe, train_data, voting);

                        // iterate through all features (unique ingredients)
                        for (int i = 0; i < ingrNames.Length; i++)
                        {
                            // keep track of votes
                            double recommended     = 0;
                            double not_recommended = 0;

                            // find k nearest neighbors
                            for (int top = 0; top < k; top++)
                            {
                                // recommend ingredient
                                if (distances[top].recipe.Contains(i))
                                {
                                    if (voting.Equals(Voting.Unweighted))
                                    {
                                        recommended++;
                                    }
                                    else
                                    {
                                        recommended += distances[top].distance;
                                    }
                                }
                                // do not recommend ingredient
                                else
                                {
                                    if (voting.Equals(Voting.Unweighted))
                                    {
                                        not_recommended++;
                                    }
                                    else
                                    {
                                        not_recommended += distances[top].distance;
                                    }
                                }
                            }
                            // update results for current fold
                            fold_results[fold] = UpdateResults(fold_results[fold], recipe.Contains(i), recommended >= not_recommended);
                        }
                    }
                }
                f1s[(k - 1)] = fold_results.Average(a => a.getF1());
                Console.WriteLine("Average f1: " + f1s[(k - 1)]);
            }
            // display the optimal k
            Console.WriteLine("\nOPTIMAL k = " + (Array.IndexOf(f1s, f1s.Max()) + 1) + " with an f1 score of " + f1s.Max());
        }