示例#1
0
        // Get recommendations
        public Recommendation[] GetRecommendations(int k, DistanceChoice distance_choice, int[] recipe, Voting voting)
        {
            MLContext   ml = new MLContext();
            DataManager dm = new DataManager();

            // get features (ingredient names)
            string[] ingrNames = dm.GetFeatures();
            // get training recipes
            Data[] data = dm.GetRecipes(ModelChoice.KNN, DataPurpose.TRAIN);

            Recommendation[] recommendations = new Recommendation[ingrNames.Length];
            // iterate through all ingredients
            for (int i = 0; i < ingrNames.Length; i++)
            {
                Ingredient current_ingr = new Ingredient(i, ingrNames[i]);
                // calculate all distances
                Neighbors[] distances = GetDistances(distance_choice, recipe, data, voting);

                double recommended     = 0;
                double not_recommended = 0;

                // k nearest neighbors vote
                // recommend ingredient if the majority of neighbors contains the ingredient
                for (int top = 0; top < k; top++)
                {
                    // recommend ingredient
                    if (distances[top].recipe.Contains(i))
                    {
                        if (voting.Equals(Voting.Unweighted))
                        {
                            recommended++;
                        }
                        else
                        {
                            recommended += distances[top].distance;
                        }
                    }
                    // do not recommend ingredient
                    else
                    {
                        if (voting.Equals(Voting.Unweighted))
                        {
                            not_recommended++;
                        }
                        else
                        {
                            not_recommended += distances[top].distance;
                        }
                    }
                }
                recommendations[i] = new Recommendation(current_ingr, (recommended + 1.0) / (not_recommended + 2.0));
            }
            recommendations = recommendations.OrderByDescending(r => r.score).ToArray();
            return(recommendations);
        }
示例#2
0
        // Hamming Distance
        static double HammingDistance(int[] a, int[] b, Voting voting)
        {
            // number of differences between two recipes
            double distance = a.Length + b.Length - (a.Intersect(b).ToArray().Length * 2);

            // Weighted voting
            if (voting.Equals(Voting.Weighted))
            {
                return(1 / (distance * distance));
            }
            // Unweighted voting
            else
            {
                return(distance);
            }
        }
示例#3
0
        // Jaccard Similarity
        static double JaccardSimilarity(int[] a, int[] b, Voting voting)
        {
            double intersect = a.Intersect(b).ToArray().Length;
            double union     = a.Union(b).ToArray().Length;
            double distance  = (intersect / union);

            // Weighted voting
            if (voting.Equals(Voting.Weighted))
            {
                return(1 / (distance * distance));
            }
            // Unweighted voting
            else
            {
                return(distance);
            }
        }
示例#4
0
        // Levenshtein distance (minimum number of edits)
        static double LevenshteinDistance(int[] a, int[] b, Voting voting)
        {
            int[][] matrix = new int[a.Length + 1][];

            for (int i = 0; i < a.Length + 1; i++)
            {
                int[] curr_a = a.Take(i).ToArray();
                matrix[i] = new int[b.Length + 1];

                for (int j = 0; j < b.Length + 1; j++)
                {
                    int[] curr_b = a.Take(j).ToArray();

                    if (Math.Min(curr_a.Length, curr_b.Length) == 0)
                    {
                        matrix[i][j] = Math.Max(curr_a.Length, curr_b.Length);
                    }
                    else
                    {
                        int x = matrix[i - 1][j] + 1;
                        int y = matrix[i][j - 1] + 1;
                        int z = matrix[i - 1][j - 1];
                        if (a[i - 1] != b[j - 1])
                        {
                            z += 1;
                        }
                        matrix[i][j] = Math.Min(Math.Min(x, y), z);
                    }
                }
            }
            double distance = matrix[a.Length][b.Length];

            // Weighted voting
            if (voting.Equals(Voting.Weighted))
            {
                return(1 / (distance * distance));
            }
            // Unweighted voting
            else
            {
                return(distance);
            }
        }
示例#5
0
        // Update results (TP, TN, FP, FN) for KNN
        // ingredients are recommended if the majority of neighbors contain the ingredient
        static Results GetKNNResults(int k, Data[] test_data, string[] ingrNames, DistanceChoice distance_choice, Data[] train_data, Voting voting)
        {
            DataManager dm  = new DataManager();
            KNN         knn = new KNN();
            // keep track of results
            Results results = new Results(0);

            // group test data by recipeId
            IGrouping <int, Data>[] recipes = test_data.GroupBy(d => d.recipeId).ToArray();
            int count = 0;

            // iterate through all test recipes
            foreach (IGrouping <int, Data> recipe in recipes)
            {
                count++;
                // current test recipe
                int[] current_recipe = dm.GetRecipe(recipe.ToArray());

                // calculate all distances, sort neighbors by distance to current recipe
                Neighbors[] distances = knn.GetDistances(distance_choice, current_recipe, train_data, voting);

                // iterate through all features (unique ingredients)
                for (int i = 0; i < ingrNames.Length; i++)
                {
                    // keep track of votes from neighboring recipes
                    double recommended     = 0;
                    double not_recommended = 0;

                    // k nearest neighbors vote
                    for (int top = 0; top < k; top++)
                    {
                        // recommend ingredient
                        if (distances[top].recipe.Contains(i))
                        {
                            if (voting.Equals(Voting.Unweighted))
                            {
                                recommended++;
                            }
                            else
                            {
                                recommended += distances[top].distance;
                            }
                        }
                        // do not recommend
                        else
                        {
                            if (voting.Equals(Voting.Unweighted))
                            {
                                not_recommended++;
                            }
                            else
                            {
                                not_recommended += distances[top].distance;
                            }
                        }
                    }
                    results = UpdateResults(results, current_recipe.Contains(i), recommended >= not_recommended);
                }
            }
            return(results);
        }
示例#6
0
        // find optimal k
        public void GetOptimalK(DistanceChoice distance_choice, Voting voting, int max_k)
        {
            Console.WriteLine("Determining optimal k for " + distance_choice.ToString() + " distance");
            Console.WriteLine(DateTime.Now.ToLongTimeString());

            KNN         knn = new KNN();
            MLContext   ml  = new MLContext();
            DataManager dm  = new DataManager();

            // get training data
            IDataView train_dataView = dm.GetDataView(ModelChoice.KNN, ml, DataPurpose.TRAIN);
            // get features
            IDataView features = dm.GetDataView(ModelChoice.KNN, ml, DataPurpose.FEATURES);

            string[] ingrNames = features.GetColumn <string>(features.Schema["ingrName"]).ToArray();
            // set number of folds to 5
            int num_folds = 5;

            Console.WriteLine(num_folds + "-fold cross validation...");
            // Cross validation split
            var folds = ml.Data.CrossValidationSplit(train_dataView, num_folds, samplingKeyColumnName: "recipeId");

            // keep track of f1 scores for each value of k
            double[] f1s = new double[max_k];
            // try different values of k
            for (int k = 1; k <= max_k; k++)
            {
                // show progress
                Console.WriteLine("\nk = " + k + "\t" + DateTime.Now.ToLongTimeString());

                f1s[(k - 1)] = 0.0;
                // keep track of fold results (update TP, TN, FP, FN to later determine f1 score)
                Results[] fold_results = new Results[num_folds];

                // iterate through each fold
                for (int fold = 0; fold < num_folds; fold++)
                {
                    // get training data for current fold
                    Data[] train_data = dm.GetData(folds[fold].TrainSet, features);
                    // get test data for current fold
                    Data[] validation_data = dm.GetData(folds[fold].TestSet, features);
                    // number of training recipes for current fold
                    int num_recipes = train_data.GroupBy(d => d.recipeId).ToArray().Length;
                    // number of test recipes for current fold
                    int num_validation_recipes = validation_data.GroupBy(d => d.recipeId).ToArray().Length;
                    // group test recipes by recipeId
                    IGrouping <int, Data>[] recipes = validation_data.GroupBy(d => d.recipeId).ToArray();

                    // iterate through test recipes for current fold
                    foreach (IGrouping <int, Data> current in recipes)
                    {
                        // current recipe
                        int[] recipe = dm.GetRecipe(current.ToArray());

                        // calculate distances between test recipe and training recipes, and get sorted neighbors
                        Neighbors[] distances = knn.GetDistances(distance_choice, recipe, train_data, voting);

                        // iterate through all features (unique ingredients)
                        for (int i = 0; i < ingrNames.Length; i++)
                        {
                            // keep track of votes
                            double recommended     = 0;
                            double not_recommended = 0;

                            // find k nearest neighbors
                            for (int top = 0; top < k; top++)
                            {
                                // recommend ingredient
                                if (distances[top].recipe.Contains(i))
                                {
                                    if (voting.Equals(Voting.Unweighted))
                                    {
                                        recommended++;
                                    }
                                    else
                                    {
                                        recommended += distances[top].distance;
                                    }
                                }
                                // do not recommend ingredient
                                else
                                {
                                    if (voting.Equals(Voting.Unweighted))
                                    {
                                        not_recommended++;
                                    }
                                    else
                                    {
                                        not_recommended += distances[top].distance;
                                    }
                                }
                            }
                            // update results for current fold
                            fold_results[fold] = UpdateResults(fold_results[fold], recipe.Contains(i), recommended >= not_recommended);
                        }
                    }
                }
                f1s[(k - 1)] = fold_results.Average(a => a.getF1());
                Console.WriteLine("Average f1: " + f1s[(k - 1)]);
            }
            // display the optimal k
            Console.WriteLine("\nOPTIMAL k = " + (Array.IndexOf(f1s, f1s.Max()) + 1) + " with an f1 score of " + f1s.Max());
        }