예제 #1
0
        private void ClusterWith99BlogsSomeBlogsShouldAlwayBeClusteredTogether(string[] blogsThatShouldBeClusteredTogether)
        {
            // Arrange

            var sut = new KMeansClustering(new PearsonCorrelationSimilarityAlgorithm());

            for (var iteration = 0; iteration <= 10; iteration++)
            {
                // Act

                var result = sut.Cluster(this.blogs, 5, -1).ToArray();

                // Assert

                Assert.Equal(5, result.Count());

                Centroid centroidWithBlogsThatShouldBeClusteredTogether = null;

                foreach (var centroid in result)
                {
                    if (centroid.Blogs.Any(b => b.Name == blogsThatShouldBeClusteredTogether.First()))
                    {
                        centroidWithBlogsThatShouldBeClusteredTogether = centroid;
                        break;
                    }
                }

                foreach (var blogName in blogsThatShouldBeClusteredTogether)
                {
                    Assert.Contains(centroidWithBlogsThatShouldBeClusteredTogether.Blogs, b => b.Name == blogName);
                }
            }
        }
        public ActionResult <IEnumerable <Centroid> > Get(int numberOfClusters, int numberOfIterations)
        {
            var blogs            = blogDataRepository.GetBlogData();
            var pearson          = new PearsonCorrelationSimilarityAlgorithm();
            var kMeansClustering = new KMeansClustering(pearson);

            return(kMeansClustering.Cluster(blogs, numberOfClusters, numberOfIterations).ToArray());
        }
예제 #3
0
        public static Dictionary <string, int> Cluster(IEnumerable <string> itemIds, double[,] features, int numClusters, string centerPath = "",
                                                       ClusteringAlgorithm algorithm = ClusteringAlgorithm.KMeans)
        {
            Console.WriteLine("Clustering...");

            //features = Normalized(features);

            Console.WriteLine("Features normalized.");

            var        dm       = new DoubleMatrix(features);
            ClusterSet clusters = null;

            if (algorithm == ClusteringAlgorithm.KMeans)
            {
                var km = new KMeansClustering(dm);
                km.Cluster(numClusters);

                Console.WriteLine("Num Clusters: {0}, Num Items: {1}, Num Iterations: {2}", km.K, km.N, km.Iterations);

                if (centerPath != "")
                {
                    var cWriter = new StreamWriter(centerPath);
                    km.FinalCenters.WriteAsCSV(cWriter);
                    cWriter.Close();
                }

                clusters = km.Clusters;
            }
            else
            {
                var nmf = new NMFClustering <NMFDivergenceUpdate>();

                nmf.Factor(dm, numClusters);

                if (nmf.Converged)
                {
                    var uWriter = new StreamWriter(Paths.AmazonBooksUsersCluster + ".nmf");
                    var iWriter = new StreamWriter(Paths.AmazonBooksItemsCluster + ".nmf");

                    nmf.W.WriteAsCSV(uWriter);
                    nmf.H.WriteAsCSV(iWriter);

                    uWriter.Flush();
                    iWriter.Flush();

                    uWriter.Close();
                    iWriter.Close();

                    File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".con", nmf.Connectivity.ToTabDelimited().Split('\n'));

                    clusters = nmf.ClusterSet;

                    File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".cluster", clusters.Clusters.Select(c => c.ToString()));

                    Console.WriteLine("Successfully wrote decompose matrixes.");
                }
                else
                {
                    Console.WriteLine("Factorization failed to converge in {0} iterations.", nmf.MaxFactorizationIterations);
                }
            }

            return(itemIds.Zip(clusters.Clusters, (i, c) => new { ItemId = i, Cluster = c }).ToDictionary(i => i.ItemId, i => i.Cluster));
        }
예제 #4
0
        static void Main( string[] args )
        {
            // Class NMathChart and NMathStatsChart provide static methods for plotting NMath
              // types using Syncfusion Essential Chart for Windows Forms controls.

              // EXAMPLE 1: CURVE FITTING

              // This NMath code fits a 4-parameter logistic function to data measuring the evolution
              // of an algal bloom in the Adriatic Sea.
              DoubleVector x = new DoubleVector( 11, 15, 18, 23, 26, 31, 39, 44, 54, 64, 74 );
              DoubleVector y = new DoubleVector( 0.00476, 0.0105, 0.0207, 0.0619, 0.337, 0.74, 1.7, 2.45, 3.5, 4.5, 5.09 );
              DoubleVector start = new DoubleVector( 4, 0.1 );
              OneVariableFunctionFitter<TrustRegionMinimizer> fitter =
            new OneVariableFunctionFitter <TrustRegionMinimizer>( AnalysisFunctions.FourParameterLogistic );
              DoubleVector solution = fitter.Fit( x, y, start );

              // For prototyping and debugging console applications, Show() plots common NMath types
              // and displays the chart in a default form.
              int numInterpolatedValues = 100;
              NMathChart.Show( fitter, x, y, solution, numInterpolatedValues );

              // The default look of the chart is governed by static properties: DefaultSize,
              // DefaultTitleFont, DefaultAxisTitleFont, DefaultMajorGridLineColor, and DefaultMarker.

              // For more control, ToChart() returns an instance of Syncfusion.Windows.Forms.Chart.ChartControl,
              // which can be customized as desired.
              ChartControl chart = NMathChart.ToChart( fitter, x, y, solution, numInterpolatedValues );
              chart.Titles[0].Text = "Algal Bloom in the Adriatic Sea";
              chart.PrimaryXAxis.Title = "Days";
              chart.PrimaryYAxis.Title = "Size (mm2)";
              chart.Series[0].Text = "Observed";
              chart.Series[1].Text = "Fitted 4PL";
              chart.BackColor = Color.Beige;
              NMathChart.Show( chart );

              // If you are developing a Windows Forms application using the Designer, add a ChartControl
              // to your form, then update it with an NMath object using the appropriate Update() function
              // after initialization.

              // InitializeComponent();
              // NMathChart.Update( ref this.chart1, fitter, x, y, solution, numInterpolatedValues );

              // EXAMPLE 2: FFT

              // This chart shows a complex signal vector with three component sine waves.
              int n = 100;
              DoubleVector t = new DoubleVector( n, 0, 0.1 );
              DoubleVector signal = new DoubleVector( n );
              for( int i = 0; i < n; i++ )
              {
            signal[i] = Math.Sin( 2 * Math.PI * t[i] ) + 2 * Math.Sin( 2 * Math.PI * 2 * t[i] ) + 3 * Math.Sin( 2 * Math.PI * 3 * t[i] );
              }
              chart = NMathChart.ToChart( signal, new NMathChart.Unit( 0, 0.1, "Time (s)" ) );
              chart.Titles[0].Text = "Signal";
              chart.ChartArea.PrimaryYAxis.Title = "Voltage";
              NMathChart.Show( chart );

              // We use NMath to compute the forward discrete fourier transform, then plot the power in the frequency domain.
              DoubleForward1DFFT fft = new DoubleForward1DFFT( n );
              fft.FFTInPlace( signal );
              DoubleSymmetricSignalReader reader = fft.GetSignalReader( signal );
              DoubleComplexVector unpacked = reader.UnpackSymmetricHalfToVector();
              chart = NMathChart.ToChart( unpacked, new NMathChart.Unit( 0, 0.1, "Frequency (Hz)" ) );
              chart.Titles[0].Text = "FFT";
              chart.ChartArea.PrimaryYAxis.Title = "Power";
              NMathChart.Show( chart );

              // EXAMPLE 3: PEAK FINDING

              // NMath class PeakFinderSavitzkyGolay uses smooth Savitzky-Golay derivatives to find peaks in data.
              // A peak is defined as a smoothed derivative zero crossing.
              double step_size = 0.1;
              x = new DoubleVector( 1000, 0.01, step_size );
              y = NMathFunctions.Sin( x ) / x;
              int width = 5;
              int polynomial_degree = 4;
              PeakFinderSavitzkyGolay pf = new PeakFinderSavitzkyGolay( y, width, polynomial_degree );
              pf.AbscissaInterval = step_size;
              pf.SlopeSelectivity = 0;
              pf.RootFindingTolerance = 0.0001;
              pf.LocatePeaks();

              // Plot the peaks.
              double xmin = 20;
              double xmax = 50;
              NMathChart.Show( pf, xmin, xmax );

              // EXAMPLE 4: K-MEANS CLUSTERING

              // The k-means clustering method assigns data points into k groups such that the sum of squares from points
              // to the computed cluster centers is minimized. Here we cluster 30 points in 3-dimensional space into 5 clusters.
              DoubleMatrix data = new DoubleMatrix( @"30 x 3 [
            0.62731478808400   0.71654239725005   0.11461282117064
            0.69908013774534   0.51131144816890   0.66485556714021
            0.39718395379261   0.77640121193349   0.36537389168912
            0.41362889533818   0.48934547589850   0.14004445653473
            0.65521294635567   0.18590445122522   0.56677280030311
            0.83758509883186   0.70063540514612   0.82300831429067
            0.37160803224266   0.98270880190626   0.67394863209536
            0.42525315848265   0.80663774928874   0.99944730494940
            0.59466337145257   0.70356765500360   0.96163640714857
            0.56573857208571   0.48496371932457   0.05886216545559
            1.36031117091978   1.43187338560697   1.73265064912939
            1.54851281373460   1.63426595631548   1.42222658611939
            1.26176956987179   1.80302634023193   1.96136999885631
            1.59734484793384   1.08388100700103   1.07205923855201
            1.04927799659601   1.94546278791039   1.55340796803039
            1.57105749438466   1.91594245989412   1.29198392114244
            1.70085723323733   1.60198742363800   1.85796351308408
            1.96228825871716   1.25356057873233   1.33575513868621
            1.75051823194427   1.87345080554039   1.68020385037051
            1.73999304537847   1.51340070999628   1.05344442131849
            2.35665553727760   2.67000386489368   2.90898934903532
            2.49830459603553   2.20087641229516   2.59624713810572
            2.43444053822029   2.27308816154697   2.32895530216404
            2.56245841710735   2.62623463865051   2.47819442572535
            2.61662113016546   2.53685169481751   2.59717077926034
            2.11333998089856   2.05950405092050   2.16144875489995
            2.89825174061313   2.08896175947532   2.82947425087386
            2.75455137523865   2.27130817438170   2.95612240635488
            2.79112319571067   2.40907231577105   2.59554799520203
            2.81495206793323   2.47404145037448   2.02874821321149 ]" );
              KMeansClustering km = new KMeansClustering( data );
              ClusterSet clusters = km.Cluster( 5 );

              // We have to specify which plane to plot.
              int xColIndex = 0;
              int yColIndex = 1;
              NMathStatsChart.Show( clusters, data, xColIndex, yColIndex );
        }
예제 #5
0
        public static Dictionary<string, int> Cluster(IEnumerable<string> itemIds, double[,] features, int numClusters, string centerPath = "",
            ClusteringAlgorithm algorithm = ClusteringAlgorithm.KMeans)
        {
            Console.WriteLine("Clustering...");

            //features = Normalized(features);

            Console.WriteLine("Features normalized.");

            var dm = new DoubleMatrix(features);
            ClusterSet clusters = null;

            if (algorithm == ClusteringAlgorithm.KMeans)
            {
                var km = new KMeansClustering(dm);
                km.Cluster(numClusters);

                Console.WriteLine("Num Clusters: {0}, Num Items: {1}, Num Iterations: {2}", km.K, km.N, km.Iterations);

                if (centerPath != "")
                {
                    var cWriter = new StreamWriter(centerPath);
                    km.FinalCenters.WriteAsCSV(cWriter);
                    cWriter.Close();
                }

                clusters = km.Clusters;
            }
            else
            {
                var nmf = new NMFClustering<NMFDivergenceUpdate>();

                nmf.Factor(dm, numClusters);

                if (nmf.Converged)
                {
                    var uWriter = new StreamWriter(Paths.AmazonBooksUsersCluster + ".nmf");
                    var iWriter = new StreamWriter(Paths.AmazonBooksItemsCluster + ".nmf");

                    nmf.W.WriteAsCSV(uWriter);
                    nmf.H.WriteAsCSV(iWriter);

                    uWriter.Flush();
                    iWriter.Flush();

                    uWriter.Close();
                    iWriter.Close();

                    File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".con", nmf.Connectivity.ToTabDelimited().Split('\n'));

                    clusters = nmf.ClusterSet;

                    File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".cluster", clusters.Clusters.Select(c => c.ToString()));

                    Console.WriteLine("Successfully wrote decompose matrixes.");

                }
                else
                {
                    Console.WriteLine("Factorization failed to converge in {0} iterations.", nmf.MaxFactorizationIterations);
                }

            }

            return itemIds.Zip(clusters.Clusters, (i, c) => new { ItemId = i, Cluster = c }).ToDictionary(i => i.ItemId, i => i.Cluster);
        }
예제 #6
0
        public async Task <HttpResponseMessage> GetTrainSOM()
        {
            try
            {
                if (!Request.Content.IsMimeMultipartContent())
                {
                    throw new HttpResponseException(HttpStatusCode.UnsupportedMediaType);
                }

                var root = HttpContext.Current.Server.MapPath(FILE_UPLOAD_PATH);
                Directory.CreateDirectory(root);
                var provider = new MultipartFormDataStreamProvider(root);
                var result   = await Request.Content.ReadAsMultipartAsync(provider);

                var jsonModel = result.FormData["model"];
                if (jsonModel == null)
                {
                    throw new HttpResponseException(HttpStatusCode.BadRequest);
                }

                JObject parsedModel = JObject.Parse(jsonModel);

                var epoch        = (int)parsedModel["Epoch"];
                var learningRate = (double)parsedModel["LearningRate"];
                var height       = (int)parsedModel["Height"];
                var width        = (int)parsedModel["Width"];
                var kmeans       = (int)parsedModel["KMeans"];
                var k            = (int)parsedModel["K"];
                var regions      = JsonConvert.DeserializeObject <List <Region> >(parsedModel["Regions"].ToString());


                var csvFile = result.FileData.First();

                SSOM model = new SSOM(width, height, learningRate, epoch, k);
                model.Regions = regions;

                var featureLabel = (string)parsedModel["FeatureLabel"];
                var labels       = ((string)parsedModel["Labels"]).Split(',').ToList();


                IReader reader = new CSVReader(csvFile.LocalFileName);

                model.GetData(reader);

                foreach (var item in labels)
                {
                    model.Dataset.SetLabel(item);
                }

                model.FeatureLabel = featureLabel;
                model.InitializeMap();
                model.Train();
                model.LabelNodes();

                IClusterer cluster = new KMeansClustering();

                var flattenedMap = ArrayHelper <Node> .FlattenMap(model.Map);

                var clusteredNodes = cluster.Cluster(flattenedMap, kmeans);

                foreach (var node in clusteredNodes)
                {
                    model.Map[node.Coordinate.X, node.Coordinate.Y].ClusterGroup = node.ClusterGroup;
                }

                FileInfo fileInfo = new FileInfo(csvFile.LocalFileName);
                fileInfo.Delete();

                TrainSOMResponse response = new TrainSOMResponse()
                {
                    MapId = Guid.NewGuid(),
                    Model = model
                };

                var message = Request.CreateResponse(HttpStatusCode.OK, response);

                File.Delete(result.FileData.First().LocalFileName);

                return(message);
            } catch (Exception ex)
            {
                var message = Request.CreateResponse(HttpStatusCode.InternalServerError, ex);
                return(message);
            }
        }
예제 #7
0
        public static void Program2(string[] args)
        {
            string filePath = @"C:\Users\Vilson\Desktop\Datasets\Kalaw-Dataset\config.json";

            if (args.Length > 0)
            {
                filePath = args[0];
            }

            var content = System.IO.File.ReadAllText(filePath);
            var config  = ReadToObject(content);

            // Build the Model
            SSOM model = new SSOM(config.Width, config.Height, config.ConstantLearningRate, config.Epoch, config.K);

            model.Regions = config.Regions;

            // Subscribe to OnTrainingEvent
            model.Training += _model_Training;

            // Instantiate the reader
            IReader _reader = new CSVReader(config.Dataset);

            // Instantiate the clusterer
            IClusterer clusterer = new KMeansClustering();

            model.GetData(_reader);

            // Get the labels
            string[] labels = config.Labels.Split(',');

            foreach (var label in labels)
            {
                model.Dataset.SetLabel(label);
            }

            // Set the feature label
            model.FeatureLabel = config.FeatureLabel;

            // Initialize the training
            Stopwatch stopwatch = new Stopwatch();

            Console.WriteLine("Start initializing map...");
            stopwatch.Start();
            model.InitializeMap();
            stopwatch.Stop();
            Console.WriteLine("Completed initialization...");
            Console.WriteLine("Time elapsed: {0:hh\\:mm\\:ss}", stopwatch.Elapsed);

            Console.WriteLine("Start training model...");
            stopwatch.Restart();
            model.Train();
            stopwatch.Stop();
            Console.WriteLine("Completed training model...");
            Console.WriteLine("Time elapsed: {0:hh\\:mm\\:ss}", stopwatch.Elapsed);

            Console.WriteLine("Start labelling node...");
            stopwatch.Restart();
            model.LabelNodes();
            stopwatch.Stop();
            Console.WriteLine("Completed labelling node...");
            Console.WriteLine("Time elapsed: {0:hh\\:mm\\:ss}", stopwatch.Elapsed);

            if (config.Clusters > 0)
            {
                Console.WriteLine("Start clustering nodes...");
                stopwatch.Restart();
                var flattenedMap = ArrayHelper <Node> .FlattenMap(model.Map);

                var clusteredNodes = clusterer.Cluster(flattenedMap, config.Clusters);

                foreach (var node in clusteredNodes)
                {
                    model.Map[node.Coordinate.X, node.Coordinate.Y].ClusterGroup = node.ClusterGroup;
                }

                stopwatch.Stop();
                Console.WriteLine("Completed clustering nodes...");
                Console.WriteLine("Time elapsed: {0:hh\\:mm\\:ss}", stopwatch.Elapsed);
            }

            // Export the model
            Console.WriteLine("Exporting model...");
            var guid = Guid.NewGuid();

            model.MapId   = guid;
            model.Dataset = null;

            var serializeObject = JsonConvert.SerializeObject(model, Formatting.Indented);

            string exportFileName = string.Format("{0}Map_{1}.json", config.Export, guid);

            System.IO.File.WriteAllText(exportFileName, serializeObject);

            Console.WriteLine("Training completed...");

            Console.ReadLine();
        }
예제 #8
0
        public static void Program1(string[] args)
        {
            Stopwatch stopwatch = new Stopwatch();


            string filepath = @"C:\Users\Vilson\Desktop\Datasets\Financial Distress\FD_Training.csv";

            SOM _model = new SOM(10, 10, 0.3, 20);

            _model.Training += _model_Training;
            IReader    _reader = new CSVReader(filepath);
            IClusterer _kmeans = new KMeansClustering();

            _model.GetData(_reader);
            _model.Dataset.SetLabel("Company");
            _model.Dataset.SetLabel("Time");
            _model.Dataset.SetLabel("Financial Distress");
            _model.Dataset.SetLabel("Status");

            _model.FeatureLabel = "Status";

            Console.WriteLine("Start initializing map...");
            stopwatch.Start();
            _model.InitializeMap();
            stopwatch.Stop();
            Console.WriteLine("Completed initialization...");
            Console.WriteLine("Time elapsed: {0:hh\\:mm\\:ss}", stopwatch.Elapsed);

            Console.WriteLine("Start training model...");
            stopwatch.Restart();
            _model.Train();
            stopwatch.Stop();
            Console.WriteLine("Completed training model...");
            Console.WriteLine("Time elapsed: {0:hh\\:mm\\:ss}", stopwatch.Elapsed);

            Console.WriteLine("Start labelling node...");
            stopwatch.Restart();
            _model.LabelNodes();
            stopwatch.Stop();
            Console.WriteLine("Completed labelling node...");
            Console.WriteLine("Time elapsed: {0:hh\\:mm\\:ss}", stopwatch.Elapsed);

            Console.WriteLine("Start clustering nodes...");
            stopwatch.Restart();
            var flattenedMap = ArrayHelper <Node> .FlattenMap(_model.Map);

            var clusteredNodes = _kmeans.Cluster(flattenedMap, 3);

            stopwatch.Stop();
            Console.WriteLine("Completed clustering nodes...");
            Console.WriteLine("Time elapsed: {0:hh\\:mm\\:ss}", stopwatch.Elapsed);

            string trainingPath = @"C:\Users\Vilson\Desktop\Datasets\Financial Distress\Training";

            List <TrajectoryMapper> dbTrajectories = new List <TrajectoryMapper>();

            Console.WriteLine("Start plotting trajectories...");
            stopwatch.Restart();

            foreach (var file in Directory.EnumerateFiles(trainingPath))
            {
                TrajectoryMapper trajectoryMapper = new TrajectoryMapper(_model);
                IReader          trajectoryReader = new CSVReader(file);

                trajectoryMapper.GetData(trajectoryReader);
                trajectoryMapper.GetTrajectories();

                dbTrajectories.Add(trajectoryMapper);
            }

            stopwatch.Stop();
            Console.WriteLine("Completed plotting trajectories...");
            Console.WriteLine("Time elapsed: {0:hh\\:mm\\:ss}", stopwatch.Elapsed);


            string testingPath = @"C:\Users\Vilson\Desktop\Datasets\Financial Distress\Test\fd_297.csv";

            TrajectoryMapper testMapper           = new TrajectoryMapper(_model);
            IReader          trjaectoryDataReader = new CSVReader(testingPath);

            testMapper.GetData(trjaectoryDataReader);
            var unknownTrajectory = testMapper.GetTrajectories();

            IFileHelper        fileHelper        = new FileHelper();
            ISimilarityMeasure similarityMeasure = new CompressionDissimilarityMeasure(fileHelper);

            foreach (var trajectory in dbTrajectories)
            {
                var currentTrajectory = trajectory.GetTrajectories();

                var score = similarityMeasure.MeasureSimilarity(currentTrajectory, unknownTrajectory);
                Console.WriteLine("{0}: {1}", trajectory.FileName, score);
            }

            Console.ReadLine();
        }