Ejemplo n.º 1
0
        public void Test_LoadSave()
        {
            string moduleName = "test-action";

            double[][] clusterCentars = new double[3][];
            clusterCentars[0] = new double[] { 5.0, 5.0 };
            clusterCentars[1] = new double[] { 15.0, 15.0 };
            clusterCentars[2] = new double[] { 30.0, 30.0 };

            string[] attributes = new string[] { "Height", "Weight" };

            int numAttributes = attributes.Length; // 2 in this demo (height,weight)
            int numClusters   = 3;                 // vary this to experiment (must be between 2 and number data tuples)
            int maxCount      = 300;               // trial and error

            ClusteringSettings settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 1);

            // Creates learning api object
            LearningApi api = new LearningApi(loadDescriptor());

            //
            // Defines action method, which will generate training data.
            api.UseActionModule <object, double[][]>((data, ctx) =>
            {
                var rawData = Helpers.CreateSampleData(clusterCentars, 2, 10000, 0.5);
                return(rawData);
            }, moduleName);

            api.UseKMeans(settings);

            var resp = api.Run() as KMeansScore;

            Assert.True(resp.Model.Clusters != null);
            Assert.True(resp.Model.Clusters.Length == clusterCentars.Length);

            var result = api.Algorithm.Predict(clusterCentars, api.Context) as KMeansResult;

            Assert.True(result.PredictedClusters[0] == 0);
            Assert.True(result.PredictedClusters[1] == 1);
            Assert.True(result.PredictedClusters[2] == 2);

            // This is where trained model is saved.
            api.Save(nameof(TestLoadSave));

            // Loads the saved model.
            var loadedApi = LearningApi.Load(nameof(TestLoadSave));

            //
            // Because we have used action method in the LearningApi, we will have to setup it again.
            // This is not required because API design limitation. It is restriction of .NET framework. It cannot persist code.
            loadedApi.ReplaceActionModule <object, double[][]>(moduleName, (data, ctx) =>
            {
                var rawData = Helpers.CreateSampleData(clusterCentars, 2, 10000, 0.5);
                return(rawData);
            });

            loadedApi.Run();
        }
Ejemplo n.º 2
0
        public void Test_LoadSave()
        {
            string moduleName = "test-action";

            double[][] clusterCentars = new double[3][];
            clusterCentars[0] = new double[] { 5.0, 5.0 };
            clusterCentars[1] = new double[] { 15.0, 15.0 };
            clusterCentars[2] = new double[] { 30.0, 30.0 };

            string[] attributes = new string[] { "Height", "Weight" };

            int numAttributes = attributes.Length; // 2 in this demo (height,weight)
            int numClusters   = 3;                 // vary this to experiment (must be between 2 and number data tuples)
            int maxCount      = 300;               // trial and error

            ClusteringSettings settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 1);

            // Creates learning api object
            LearningApi api = new LearningApi(loadDescriptor());

            api.UseActionModule <object, double[][]>((data, ctx) =>
            {
                var rawData = Helpers.CreateSampleData(clusterCentars, 2, 10000, 0.5);
                return(rawData);
            }, moduleName);

            api.UseKMeans(settings);

            var resp = api.Run() as KMeansScore;

            Assert.True(resp.Model.Clusters != null);
            Assert.True(resp.Model.Clusters.Length == clusterCentars.Length);

            var result = api.Algorithm.Predict(clusterCentars, api.Context) as KMeansResult;

            Assert.True(result.PredictedClusters[0] == 0);
            Assert.True(result.PredictedClusters[1] == 1);
            Assert.True(result.PredictedClusters[2] == 2);

            api.Save(nameof(LoadSaveTests));

            var loadedApi = LearningApi.Load(nameof(LoadSaveTests));

            loadedApi.ReplaceActionModule <object, double[][]>(moduleName, (data, ctx) =>
            {
                var rawData = Helpers.CreateSampleData(clusterCentars, 2, 10000, 0.5);
                return(rawData);
            });

            loadedApi.Run();
        }
        public void Test_OptimalNumberOfCLusters()
        {
            // directory to load
            string loadDirectory = rootFolder + "Functions\\";
            string FunctionName  = "SIN X"; //without extension
            string savePath      = rootFolder + "Optimal Clusters\\" + FunctionName + " Results.csv";

            double[][] function = Helpers.LoadFunctionData(loadDirectory + FunctionName + "\\" + FunctionName + ".csv");
            function = TestFunctionGenerators.normalizeData(function);

            int numAttributes = 2;   // 2 in this demo (height,weight)
            int numClusters   = 0;   // vary this to experiment (must be between 2 and number data tuples)
            int maxCount      = 300; // trial and error

            ClusteringSettings settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 2);

            // Creates learning api object
            LearningApi api = new LearningApi();

            api.UseActionModule <object, double[][]>((data, ctx) =>
            {
                return(KMeansAlgorithm.transposeFunction(function));
            });

            api.UseKMeans(settings);

            // train
            var resp = api.Run() as KMeansScore;

            Assert.True(resp.Model.NumberOfClusters > 1);

            double[][] OptimalClustersResults = new double[4][];
            OptimalClustersResults[0] = new double[] { 2, 3, 4, 5, 6, 7, 8, 9, 10 };
            OptimalClustersResults[1] = resp.Model.D;
            OptimalClustersResults[2] = resp.Model.DPrime;
            OptimalClustersResults[3] = resp.Model.Fmin;

            Helpers.Write2CSVFile(OptimalClustersResults, savePath);

            // implement
        }
Ejemplo n.º 4
0
        public void Test_FunctionRecognition()
        {
            int numCluster = 2;
            // a value in % representing the tolerance to possible outliers
            double tolerance = 0;
            // directory to load
            string loadDirectory = rootFolder + "Functions\\";
            // directory to save
            string saveDirectory = rootFolder + "Function Recognition\\";

            //Assert.True(File.Exists("KMeans\\TestFiles\\TestFile01.csv"), "Expected file was not deployed to unit test foilder.");

            // functions' paths
            string[] FunctionPaths = new string[]
            {
                loadDirectory + "TestFile01\\NRP10\\TestFile01 SimilarFunctions Normalized Centroids NRP10 KA2 C" + numCluster + " I500 R1.csv",
                loadDirectory + "TestFile02\\NRP10\\TestFile02 SimilarFunctions Normalized Centroids NRP10 KA2 C" + numCluster + " I500 R1.csv"
            };

            int numTrainFun = 800;
            int numTestFun  = 200;

            Tuple <double[][], double[]> trainedClusters = formClusters(FunctionPaths[0], numCluster, numTrainFun);

            // save the formed clusters
            Helpers.Write2CSVFile(trainedClusters.Item1, saveDirectory + "Calculated Centroids.csv");
            double[][] tempMaxDistance = new double[1][];
            tempMaxDistance[0] = trainedClusters.Item2;
            Helpers.Write2CSVFile(tempMaxDistance, saveDirectory + "Calculated Max Distance.csv");

            // start testing for function recognition

            // combine testing data
            double[][] testingCentroids = new double[FunctionPaths.Length * numTestFun * numCluster][];
            double[][] loadedCentroids;
            int        testingCentroidsOffset = numTrainFun * numCluster;

            for (int i = 0; i < FunctionPaths.Length; i++)
            {
                loadedCentroids = Helpers.LoadFunctionData(FunctionPaths[i]);
                for (int j = 0; j < numTestFun * numCluster; j++)
                {
                    testingCentroids[i * numTestFun * numCluster + j] = loadedCentroids[j + testingCentroidsOffset];
                }
                // only needed for to avoid training centroids
                testingCentroidsOffset = 0;
            }
            // save the testing centroids
            Helpers.Write2CSVFile(testingCentroids, saveDirectory + "Testing Centroids.csv");

            // check functions


            //KMeans kMeans = new KMeans();
            //kMeans.setTrivialClusters(numCluster, trainedClusters.Item1, trainedClusters.Item2);

            // Creates learning api object
            LearningApi api = new LearningApi();

            api.UseActionModule <object, double[][]>((data, ctx) =>
            {
                return(null);
            });

            // basic settings for prediction
            ClusteringSettings settings = new ClusteringSettings(0, numCluster, 0, tolerance: tolerance);

            // construct trivial clusters
            api.UseKMeans(settings, trainedClusters.Item1, trainedClusters.Item2);

            double[] funResults = patternTesting(api, settings.NumberOfClusters, testingCentroids);

            // save results
            double[][] tempFunResults = new double[1][];
            tempFunResults[0] = funResults;
            Helpers.Write2CSVFile(tempFunResults, saveDirectory + "Results.csv");
        }
        public void Test_TrainingSimilarFunctions()
        {
            // Settings to import the functions (NRP should match the desired loading file)
            string FunctionName = "SIN 2X"; //without extension
            string directory    = rootFolder + FunctionName + "\\";
            int    NRPmin       = 5;
            int    NRPmax       = 10;
            string NRP          = NRPmin + "-" + NRPmax;

            // Settings for the K-Means Alg
            int maxCount      = 500;
            int numClusters   = 10;
            int numAttributes = 2;
            int KAlg          = 2;
            int Runs          = 1;

            // prepare the functions for clustering
            // Holds the data of all function. Attribute of every function contains data in a row.
            // N dimensions of fnctionmeans N rows per function.
            //double[][] allFunctionsData = Helpers.LoadFunctionData(directory + "\\NRP" + NRP + "\\" + FunctionName + " SimilarFunctions Normalized NRP" + NRP + ".csv");
            double[][] allFunctionsData = Helpers.LoadFunctionData(directory + "\\NRP" + NRP + "\\" + FunctionName + " SimilarFunctions NRP" + NRP + ".csv");

            int numFunc = allFunctionsData.Length / numAttributes;

            // Creates learning api object
            LearningApi api;

            ClusteringSettings clusterSettings;

            double[][] lastCalculatedCentroids = null;

            double[][] Centroids = null;
            // original Centroids
            double[][] oCentroids;
            // matched Centroids
            double[][] mCentroids;

            for (int k = 2; k < numClusters + 1; k++)
            {
                oCentroids      = new double[k][];
                clusterSettings = new ClusteringSettings(maxCount, k, numAttributes, KmeansAlgorithm: KAlg);
                for (int j = 0; j < Runs; j++)
                {
                    // save directory
                    //string savePath = directory + "NRP" + NRP + "\\" + FunctionName + " SimilarFunctions Normalized Centroids NRP" + NRP + " KA" + KAlg + " C" + k + " I" + maxCount + " R" + (j + 1) + ".csv";
                    string savePath = directory + "NRP" + NRP + "\\" + FunctionName + " SimilarFunctions Centroids NRP" + NRP + " KA" + KAlg + " C" + k + " I" + maxCount + " R" + (j + 1) + ".csv";
                    lastCalculatedCentroids = null;
                    for (int funcIndx = 0; funcIndx < numFunc; funcIndx++)
                    {
                        // Get data of specific function with indec funcIndx.
                        double[][] rawData = getSimilarFunctionsData(allFunctionsData, numAttributes, funcIndx + 1);
                        api = new LearningApi();
                        api.UseActionModule <object, double[][]>((data, ctx) =>
                        {
                            return(rawData);
                        });

                        clusterSettings.InitialCentroids = lastCalculatedCentroids;
                        api.UseKMeans(clusterSettings);

                        // train
                        var resp = api.Run() as KMeansScore;

                        // get resulting centroids
                        lastCalculatedCentroids = new double[k][];
                        for (int i = 0; i < k; i++)
                        {
                            lastCalculatedCentroids[i] = resp.Model.Clusters[i].Centroid;
                        }

                        Centroids = lastCalculatedCentroids;

                        /*
                         * // match the centroids centroids
                         * if (funcIndx == 0)
                         * {
                         *  oCentroids = Centroids;
                         *  mCentroids = Centroids;
                         * }
                         * else
                         * {
                         *  mCentroids = matchCentroids(Centroids, oCentroids);
                         * }*/

                        // save centroids
                        if (funcIndx == 0)
                        {
                            // save or overwrite
                            Helpers.Write2CSVFile(Centroids, savePath);
                        }
                        else
                        {
                            // append
                            Helpers.Write2CSVFile(Centroids, savePath, true);
                        }
                    }
                }
            }
        }
        public void Test_OptimalNumberOfCLustersBasic()
        {
            double[][] clusterCenters = new double[3][];
            clusterCenters[0] = new double[] { 5.0, 5.0 };
            clusterCenters[1] = new double[] { 15.0, 15.0 };
            clusterCenters[2] = new double[] { 30.0, 30.0 };

            string[] attributes = new string[] { "Height", "Weight" };

            int numAttributes = attributes.Length; // 2 in this demo (height,weight)
            int numClusters   = 0;                 // vary this to experiment (must be between 2 and number data tuples)
            int maxCount      = 300;               // trial and error

            ClusteringSettings settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 2);

            // Creates learning api object
            LearningApi api = new LearningApi();

            api.UseActionModule <object, double[][]>((data, ctx) =>
            {
                var rawData = Helpers.CreateSampleData(clusterCenters, 2, 10000, 0.5);
                return(rawData);
            });

            api.UseKMeans(settings);

            // train
            var resp = api.Run() as KMeansScore;

            Assert.True(resp.Model.NumberOfClusters > 1);

            int             points = 150;
            var             delta  = 2 * Math.PI / 100;
            List <double[]> rows   = LearningFoundation.Helpers.FunctionGenerator.CreateFunction(points, 2, delta);

            double[][] rawData2 = new double[points][];
            for (int i = 0; i < points; i++)
            {
                rawData2[i] = new double[2];
                for (int j = 0; j < 2; j++)
                {
                    rawData2[i][j] = rows[j][i];
                }
            }


            // Creates learning api object
            LearningApi api2 = new LearningApi();

            api2.UseActionModule <object, double[][]>((data, ctx) =>
            {
                return(rawData2);
            });

            api2.UseKMeans(settings);

            // train
            var resp2 = api2.Run() as KMeansScore;

            Assert.True(resp2.Model.NumberOfClusters > 1);
        }
        public void Test_TrainPartials()
        {
            double[][] clusterCenters = new double[3][];
            clusterCenters[0] = new double[] { 5.0, 5.0 };
            clusterCenters[1] = new double[] { 15.0, 15.0 };
            clusterCenters[2] = new double[] { 30.0, 30.0 };

            double[][] clusterCenters2 = new double[3][];
            clusterCenters2[0] = new double[] { 6, 5 };
            clusterCenters2[1] = new double[] { 17, 18 };
            clusterCenters2[2] = new double[] { 28, 30 };

            string[] attributes = new string[] { "Height", "Weight" };

            int numAttributes = attributes.Length; // 2 in this demo (height,weight)
            int numClusters   = 3;                 // vary this to experiment (must be between 2 and number data tuples)
            int maxCount      = 300;               // trial and error

            double[][] apiResp1Centroid    = new double[numClusters][];
            double[]   apiResp1MaxDistance = new double[numClusters];
            double[]   apiResp1NumSamples  = new double[numClusters];

            ClusteringSettings settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 2);

            // Creates learning api object
            LearningApi api  = new LearningApi(loadDescriptor());
            LearningApi api2 = new LearningApi(loadDescriptor());

            double[][] rawData  = Helpers.CreateSampleData(clusterCenters, 2, 10000, 0.5);
            double[][] rawData2 = Helpers.CreateSampleData(clusterCenters2, 2, 5000, 0.5);

            int runNum = 0;

            api.UseActionModule <object, double[][]>((data, ctx) =>
            {
                if (runNum == 0)
                {
                    return(rawData);
                }
                else
                {
                    return(rawData2);
                }
            });

            api2.UseActionModule <object, double[][]>((data, ctx) =>
            {
                return(rawData2);
            });

            // start api2 that runs only second raw data (rawData2)
            api2.UseKMeans(settings);

            // train
            var api2Resp = api2.Run() as KMeansScore;

            Assert.True(api2Resp.Model.Clusters != null);
            Assert.True(api2Resp.Model.Clusters.Length == clusterCenters.Length);

            // start api that runs first raw data (rawData) and save results in variables
            api.UseKMeans(settings);

            // train
            var apiResp = api.Run() as KMeansScore;

            Assert.True(apiResp.Model.Clusters != null);
            Assert.True(apiResp.Model.Clusters.Length == clusterCenters.Length);

            // save first run results in variables
            for (int i = 0; i < numClusters; i++)
            {
                apiResp1Centroid[i]    = apiResp.Model.Clusters[i].Centroid;
                apiResp1MaxDistance[i] = apiResp.Model.Clusters[i].InClusterMaxDistance;
                apiResp1NumSamples[i]  = apiResp.Model.Clusters[i].NumberOfSamples;
            }


            /// run with new data
            runNum++;

            // continue partial api run using second raw data (rawData2)
            settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 2, initialCentroids: apiResp1Centroid);

            // train
            apiResp = api.Run() as KMeansScore;

            Assert.True(apiResp.Model.Clusters != null);
            Assert.True(apiResp.Model.Clusters.Length == clusterCenters.Length);

            //// compare results

            double f, res;

            for (int i = 0; i < numClusters; i++)
            {
                // partial formula f*res
                f = (double)1 / apiResp.Model.Clusters[i].NumberOfSamples;
                for (int j = 0; j < numAttributes; j++)
                {
                    res = apiResp1Centroid[i][j] * apiResp1NumSamples[i] + api2Resp.Model.Clusters[i].Centroid[j] * api2Resp.Model.Clusters[i].NumberOfSamples;
                    // partial centroid check
                    Assert.True(apiResp.Model.Clusters[i].Centroid[j] == f * res);
                }
                // max distance in cluster check
                Assert.True(apiResp.Model.Clusters[i].InClusterMaxDistance >= apiResp1MaxDistance[i] + KMeansAlgorithm.calculateDistance(apiResp1Centroid[i], apiResp.Model.Clusters[i].Centroid));
            }
        }