示例#1
0
        // Update results (TP, TN, FP, FN) for KNN
        // ingredients are recommended if the majority of neighbors contain the ingredient
        static Results GetKNNResults(int k, Data[] test_data, string[] ingrNames, DistanceChoice distance_choice, Data[] train_data, Voting voting)
        {
            DataManager dm  = new DataManager();
            KNN         knn = new KNN();
            // keep track of results
            Results results = new Results(0);

            // group test data by recipeId
            IGrouping <int, Data>[] recipes = test_data.GroupBy(d => d.recipeId).ToArray();
            int count = 0;

            // iterate through all test recipes
            foreach (IGrouping <int, Data> recipe in recipes)
            {
                count++;
                // current test recipe
                int[] current_recipe = dm.GetRecipe(recipe.ToArray());

                // calculate all distances, sort neighbors by distance to current recipe
                Neighbors[] distances = knn.GetDistances(distance_choice, current_recipe, train_data, voting);

                // iterate through all features (unique ingredients)
                for (int i = 0; i < ingrNames.Length; i++)
                {
                    // keep track of votes from neighboring recipes
                    double recommended     = 0;
                    double not_recommended = 0;

                    // k nearest neighbors vote
                    for (int top = 0; top < k; top++)
                    {
                        // recommend ingredient
                        if (distances[top].recipe.Contains(i))
                        {
                            if (voting.Equals(Voting.Unweighted))
                            {
                                recommended++;
                            }
                            else
                            {
                                recommended += distances[top].distance;
                            }
                        }
                        // do not recommend
                        else
                        {
                            if (voting.Equals(Voting.Unweighted))
                            {
                                not_recommended++;
                            }
                            else
                            {
                                not_recommended += distances[top].distance;
                            }
                        }
                    }
                    results = UpdateResults(results, current_recipe.Contains(i), recommended >= not_recommended);
                }
            }
            return(results);
        }
示例#2
0
        // find optimal k
        public void GetOptimalK(DistanceChoice distance_choice, Voting voting, int max_k)
        {
            Console.WriteLine("Determining optimal k for " + distance_choice.ToString() + " distance");
            Console.WriteLine(DateTime.Now.ToLongTimeString());

            KNN         knn = new KNN();
            MLContext   ml  = new MLContext();
            DataManager dm  = new DataManager();

            // get training data
            IDataView train_dataView = dm.GetDataView(ModelChoice.KNN, ml, DataPurpose.TRAIN);
            // get features
            IDataView features = dm.GetDataView(ModelChoice.KNN, ml, DataPurpose.FEATURES);

            string[] ingrNames = features.GetColumn <string>(features.Schema["ingrName"]).ToArray();
            // set number of folds to 5
            int num_folds = 5;

            Console.WriteLine(num_folds + "-fold cross validation...");
            // Cross validation split
            var folds = ml.Data.CrossValidationSplit(train_dataView, num_folds, samplingKeyColumnName: "recipeId");

            // keep track of f1 scores for each value of k
            double[] f1s = new double[max_k];
            // try different values of k
            for (int k = 1; k <= max_k; k++)
            {
                // show progress
                Console.WriteLine("\nk = " + k + "\t" + DateTime.Now.ToLongTimeString());

                f1s[(k - 1)] = 0.0;
                // keep track of fold results (update TP, TN, FP, FN to later determine f1 score)
                Results[] fold_results = new Results[num_folds];

                // iterate through each fold
                for (int fold = 0; fold < num_folds; fold++)
                {
                    // get training data for current fold
                    Data[] train_data = dm.GetData(folds[fold].TrainSet, features);
                    // get test data for current fold
                    Data[] validation_data = dm.GetData(folds[fold].TestSet, features);
                    // number of training recipes for current fold
                    int num_recipes = train_data.GroupBy(d => d.recipeId).ToArray().Length;
                    // number of test recipes for current fold
                    int num_validation_recipes = validation_data.GroupBy(d => d.recipeId).ToArray().Length;
                    // group test recipes by recipeId
                    IGrouping <int, Data>[] recipes = validation_data.GroupBy(d => d.recipeId).ToArray();

                    // iterate through test recipes for current fold
                    foreach (IGrouping <int, Data> current in recipes)
                    {
                        // current recipe
                        int[] recipe = dm.GetRecipe(current.ToArray());

                        // calculate distances between test recipe and training recipes, and get sorted neighbors
                        Neighbors[] distances = knn.GetDistances(distance_choice, recipe, train_data, voting);

                        // iterate through all features (unique ingredients)
                        for (int i = 0; i < ingrNames.Length; i++)
                        {
                            // keep track of votes
                            double recommended     = 0;
                            double not_recommended = 0;

                            // find k nearest neighbors
                            for (int top = 0; top < k; top++)
                            {
                                // recommend ingredient
                                if (distances[top].recipe.Contains(i))
                                {
                                    if (voting.Equals(Voting.Unweighted))
                                    {
                                        recommended++;
                                    }
                                    else
                                    {
                                        recommended += distances[top].distance;
                                    }
                                }
                                // do not recommend ingredient
                                else
                                {
                                    if (voting.Equals(Voting.Unweighted))
                                    {
                                        not_recommended++;
                                    }
                                    else
                                    {
                                        not_recommended += distances[top].distance;
                                    }
                                }
                            }
                            // update results for current fold
                            fold_results[fold] = UpdateResults(fold_results[fold], recipe.Contains(i), recommended >= not_recommended);
                        }
                    }
                }
                f1s[(k - 1)] = fold_results.Average(a => a.getF1());
                Console.WriteLine("Average f1: " + f1s[(k - 1)]);
            }
            // display the optimal k
            Console.WriteLine("\nOPTIMAL k = " + (Array.IndexOf(f1s, f1s.Max()) + 1) + " with an f1 score of " + f1s.Max());
        }