예제 #1
0
        public void GetClustersByDistance(double clusterDistance)
        {
            ClusterSet clustersByDistance = this.bestClusterAnalysis.CutTree(clusterDistance);

            ConsoleWriter.WriteClusterCountries(countryStatistics, clustersByDistance);
            ChartCreator.CreateChart(countryStatistics, clustersByDistance, "clustersByDistance");
        }
예제 #2
0
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet, IDictionary <TInstance, TClass> instanceClasses)
        {
            // for each cluster
            var classCount = 0d;

            foreach (var cluster in clusterSet)
            {
                // gets the class with the highest frequency
                var clusterClassCounts = new Dictionary <TClass, uint>();
                var maxClassCount      = 0u;
                foreach (var idx in cluster)
                {
                    var pointClass = instanceClasses[idx];
                    if (clusterClassCounts.ContainsKey(pointClass))
                    {
                        clusterClassCounts[pointClass]++;
                    }
                    else
                    {
                        clusterClassCounts[pointClass] = 1;
                    }
                    if (clusterClassCounts[pointClass] > maxClassCount)
                    {
                        maxClassCount = clusterClassCounts[pointClass];
                    }
                }

                // add max count to purity
                classCount += maxClassCount;
            }

            // divide by total number of points
            return(classCount / instanceClasses.Count);
        }
예제 #3
0
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet, IDictionary <TInstance, TClass> instanceClasses)
        {
            // counts the positives for each cluster
            var truePositives = 0L;
            var positives     = 0L;

            foreach (var cluster in clusterSet)
            {
                // gets class counts
                var clusterClassCounts = new Dictionary <TClass, int>();
                foreach (var instance in cluster)
                {
                    var instanceClass = instanceClasses[instance];
                    if (clusterClassCounts.ContainsKey(instanceClass))
                    {
                        clusterClassCounts[instanceClass]++;
                    }
                    else
                    {
                        clusterClassCounts[instanceClass] = 1;
                    }
                }

                // updates positives
                positives += Combinatorics.GetCombinations(cluster.Count, 2);

                // updates true positives (pairs of same class within cluster)
                truePositives += clusterClassCounts.Values
                                 .Where(count => count > 1)
                                 .Sum(count => Combinatorics.GetCombinations(count, 2));
            }

            // returns precision
            return((double)truePositives / positives);
        }
예제 #4
0
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet)
        {
            // undefined if only one cluster
            if (clusterSet.Count < 2)
            {
                return(double.NaN);
            }

            // gets clusters' centroids
            var centroids = clusterSet.Select(t => this._centroidFunc(t)).ToList();

            var n   = 0;
            var sum = 0d;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                n += clusterSet[i].Count;

                // updates sum of squared distances to centroids
                foreach (var instance in clusterSet[i])
                {
                    var dist = this.DissimilarityMetric.Calculate(instance, centroids[i]);
                    sum += dist * dist;
                }
            }

            return(Math.Sqrt(sum / n));
        }
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet)
        {
            // undefined if only one cluster
            if (clusterSet.Count < 2)
            {
                return(double.NaN);
            }

            // gets clusters' centroids
            var centroids = clusterSet.Select(t => this._centroidFunc(t)).ToList();

            var n           = 0;
            var sum         = 0d;
            var minCentDist = double.MaxValue;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                n += clusterSet[i].Count;
                for (var j = i + 1; j < clusterSet.Count; j++)
                {
                    // updates weighted pairwise distances
                    var betweenClusterDist = this.DissimilarityMetric.Calculate(centroids[i], centroids[j]);
                    minCentDist = Math.Min(minCentDist, betweenClusterDist);
                    sum        += clusterSet[i].Sum(
                        inst1 => clusterSet[j]
                        .Sum(inst2 => this.DissimilarityMetric.Calculate(inst1, inst2) * betweenClusterDist));
                }
            }

            return(2 * sum * minCentDist / (n * (n - 1)));
        }
예제 #6
0
        public void GetClustersByAmount(int clusterCount)
        {
            ClusterSet clustersByAmount = this.bestClusterAnalysis.CutTree(clusterCount);

            ConsoleWriter.WriteClusterCountries(countryStatistics, clustersByAmount);
            ChartCreator.CreateChart(countryStatistics, clustersByAmount, "clustersByAmount");
        }
예제 #7
0
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet)
        {
            // undefined if only one cluster
            if (clusterSet.Count < 2)
            {
                return(double.NaN);
            }

            // gets clusters' centroids and total cluster
            var centroids = new List <TInstance>();
            var n         = 0d;

            foreach (var cluster in clusterSet)
            {
                n += cluster.Count;
                centroids.Add(this._centroidFunc(cluster));
            }

            // updates sum of squared distances to centroids
            var sumDistWithin = 0d;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                foreach (var instance in clusterSet[i])
                {
                    var distWithin = this.DissimilarityMetric.Calculate(instance, centroids[i]);
                    sumDistWithin += distWithin * distWithin;
                }
            }

            return(-(Math.Log(Math.Sqrt(sumDistWithin / (n * n)), 2) + Math.Log(clusterSet.Count)));
        }
예제 #8
0
        public void ClusterSetLengthTest()
        {
            var cluster1   = new Cluster <int>(new[] { 1, 2, 3 });
            var cluster2   = new Cluster <int>(new[] { 1, 2, 3 });
            var clusterSet = new ClusterSet <int>(new[] { cluster1, cluster2 });

            Console.WriteLine(clusterSet);
            Assert.IsTrue(clusterSet.Count == 2, $"Cluster-set {clusterSet} should have length of 2.");
        }
예제 #9
0
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet, IDictionary <TInstance, TClass> instanceClasses)
        {
            var numPoints = instanceClasses.Count;

            // organizes by class counts
            var classCounts = new Dictionary <TClass, uint>();

            foreach (var pointClass in instanceClasses.Values)
            {
                if (classCounts.ContainsKey(pointClass))
                {
                    classCounts[pointClass]++;
                }
                else
                {
                    classCounts[pointClass] = 1;
                }
            }

            // gets class entropy
            var classEntropy = -classCounts
                               .Where(classCount => classCount.Value > 0)
                               .Sum(classCount => (double)classCount.Value / numPoints *
                                    Math.Log((double)classCount.Value / numPoints));

            // gets mutual information and cluster entropy
            var mi             = 0d;
            var clusterEntropy = 0d;

            foreach (var cluster in clusterSet)
            {
                foreach (var classCount in classCounts)
                {
                    // gets intersection between class and group (num points in group that belong to the class)
                    var clusterClassCount = cluster.Count(idx => instanceClasses[idx].Equals(classCount.Key));

                    // updates mutual information
                    if (clusterClassCount > 0)
                    {
                        mi += (double)clusterClassCount / numPoints *
                              Math.Log((double)numPoints * clusterClassCount / (cluster.Count * classCount.Value));
                    }
                }

                // updates cluster entropy
                if (cluster.Count > 0)
                {
                    clusterEntropy -= (double)cluster.Count / numPoints *
                                      Math.Log((double)cluster.Count / numPoints);
                }
            }

            return(mi / (0.5 * (clusterEntropy + classEntropy)));
        }
예제 #10
0
        /// <summary>
        ///     Calculates the silhouette coefficient for each element in the given <see cref="ClusterSet{TInstance}" />.
        /// </summary>
        /// <param name="clusterSet">The clustering partition.</param>
        /// <returns>A dictionary containing the silhouette coefficient for each element in the given partition.</returns>
        public IDictionary <TInstance, double> EvaluateEach(ClusterSet <TInstance> clusterSet)
        {
            // gets silhouette coefficient for all instances in all clusters
            var coefs = new Dictionary <TInstance, double>();

            for (var i = 0; i < clusterSet.Count; i++)
            {
                var cluster = clusterSet[i].ToList();
                for (var j = 0; j < cluster.Count; j++)
                {
                    var instance = cluster[j];
                    if (clusterSet.Count == 1 || cluster.Count == 1)
                    {
                        // silhouette is undefined for singletons or when there is only one cluster
                        coefs.Add(instance, 0); // double.NaN);
                        continue;
                    }

                    // gets the average distance with all other data within the same cluster
                    var avgWithinDist = 0d;
                    for (var k = 0; k < cluster.Count; k++)
                    {
                        if (k != j)
                        {
                            avgWithinDist += this.DissimilarityMetric.Calculate(instance, cluster[k]);
                        }
                    }
                    avgWithinDist /= cluster.Count - 1;

                    // gets minimal dissimilarity to other clusters
                    var minAvgBetweenDist = clusterSet.Count == 1 ? 0 : double.MaxValue;
                    for (var l = 0; l < clusterSet.Count; l++)
                    {
                        if (l == i)
                        {
                            continue;
                        }
                        var avgBetweenDist = clusterSet[l]
                                             .Average(other => this.DissimilarityMetric.Calculate(instance, other));
                        minAvgBetweenDist = Math.Min(minAvgBetweenDist, avgBetweenDist);
                    }

                    // calculates silhouette coefficient for the instance
                    // (if both distances are 0 clusters should be together, so set -1)
                    var maxDist = Math.Max(minAvgBetweenDist, avgWithinDist);
                    var coef    = Math.Abs(maxDist) < double.Epsilon ? -1 : (minAvgBetweenDist - avgWithinDist) / maxDist;
                    coefs.Add(instance, coef);
                }
            }

            return(coefs);
        }
예제 #11
0
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet, IDictionary <TInstance, TClass> instanceClasses)
        {
            // counts the positives for each cluster
            var truePositives  = 0L;
            var falseNegatives = 0L;
            var positives      = 0L;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                var clusterGroup = clusterSet[i];

                // gets class counts
                var clusterClassCounts = new Dictionary <TClass, int>();
                foreach (var instance in clusterGroup)
                {
                    var instanceClass = instanceClasses[instance];
                    if (clusterClassCounts.ContainsKey(instanceClass))
                    {
                        clusterClassCounts[instanceClass]++;
                    }
                    else
                    {
                        clusterClassCounts[instanceClass] = 1;
                    }

                    // updates false negatives (pairs of same class in diff clusters)
                    for (var j = i + 1; j < clusterSet.Count; j++)
                    {
                        falseNegatives += clusterSet[j]
                                          .Count(instance2 => instanceClass.Equals(instanceClasses[instance2]));
                    }
                }

                // updates positives
                positives += Combinatorics.GetCombinations(clusterGroup.Count, 2);

                // updates true positives (pairs of same class within cluster)
                truePositives += clusterClassCounts.Values
                                 .Where(count => count > 1)
                                 .Sum(count => Combinatorics.GetCombinations(count, 2));
            }

            var precision = (double)truePositives / positives;
            var recall    = (double)truePositives / (truePositives + falseNegatives);

            // returns f-measure
            var weightSquare = this.RecallWeight * this.RecallWeight;

            return((weightSquare + 1) * precision * recall / (weightSquare * precision + recall));
        }
예제 #12
0
        public static void CreateChart(CountryStatistics[] countryStatistics, ClusterSet clusters, string name)
        {
            ConsoleWriter.WriteSystemMessage("Drawing diagrams for displaying cluster data...");
            var chartCreator = new ChartCreator("Alcohol", "Fruit", name);

            for (int i = 0; i < clusters.NumberOfClusters; i++)
            {
                var clusterCountryStatistics = clusters.Cluster(i).Select(x => countryStatistics[x]);
                foreach (var countryStat in clusterCountryStatistics)
                {
                    chartCreator.AddPoint(countryStat.AlcoholConsumption, countryStat.FruitConsumption, countryStat.Country, i);
                }
            }

            chartCreator.SaveAsFile();
            ConsoleWriter.WriteSystemMessage($"Saved at the app resources directory as '{name}.png'.");
        }
예제 #13
0
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet)
        {
            // undefined if only one cluster
            if (clusterSet.Count < 2)
            {
                return(double.NaN);
            }

            // gets clusters' centroids and total cluster
            var centroids = new List <TInstance>();
            Cluster <TInstance> totalCluster = null;

            foreach (var cluster in clusterSet)
            {
                totalCluster = totalCluster == null
                    ? new Cluster <TInstance>(cluster)
                    : new Cluster <TInstance>(totalCluster, cluster, 0);
                centroids.Add(this._centroidFunc(cluster));
            }

            var centroid = this._centroidFunc(totalCluster);

            var sumTotal       = 0d;
            var sumWithin      = 0d;
            var maxBetweenDist = double.MinValue;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                // updates within- and total-cluster distance sum
                foreach (var instance in clusterSet[i])
                {
                    sumWithin += this.DissimilarityMetric.Calculate(instance, centroids[i]);
                    sumTotal  += this.DissimilarityMetric.Calculate(instance, centroid);
                }

                // updates max between-cluster distance
                for (var j = i + 1; j < clusterSet.Count; j++)
                {
                    maxBetweenDist = Math.Max(maxBetweenDist,
                                              this.DissimilarityMetric.Calculate(centroids[i], centroids[j]));
                }
            }

            return(maxBetweenDist * sumTotal / (clusterSet.Count * sumWithin));
        }
        private bool PostProcessClusters(ClusterSet <IPointData> dbscanClusters)
        {
            foreach (var clusters in dbscanClusters.Clusters)
            {
                var customCluster = new Cluster();
                for (int i = 0; i < clusters.Objects.Count; i++)
                {
                    PointData pointData = (PointData)clusters.Objects[i];

                    Location theLocation = DatabaseService.Select_Locations_By_ID(pointData.Location_ID).FirstOrDefault();

                    customCluster.AddLocation(theLocation);
                }
                customCluster.Structurize();
                DatabaseService.Insert_Cluster(customCluster);
            }
            return(true);
        }
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet)
        {
            // undefined if only one cluster
            if (clusterSet.Count < 2)
            {
                return(double.NaN);
            }

            // gets clusters' centroids and overall centroid
            var centroids = new List <TInstance>();
            Cluster <TInstance> allPoints = null;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                allPoints = allPoints == null
                    ? new Cluster <TInstance>(clusterSet[i])
                    : new Cluster <TInstance>(allPoints, clusterSet[i], 0);
                centroids.Add(this._centroidFunc(clusterSet[i]));
            }

            var overallCentroid = this._centroidFunc(allPoints);

            var betweenVar = 0d;
            var withinVar  = 0d;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                // updates overall between-cluster variance
                var betweenDist = this.DissimilarityMetric.Calculate(centroids[i], overallCentroid);
                betweenVar += betweenDist * betweenDist * clusterSet[i].Count;

                // updates overall within-cluster variance
                foreach (var instance in clusterSet[i])
                {
                    var withinDist = this.DissimilarityMetric.Calculate(instance, centroids[i]);
                    withinVar += withinDist * withinDist;
                }
            }

            return(Math.Abs(withinVar) < double.Epsilon
                ? double.NaN
                : betweenVar *(allPoints.Count - clusterSet.Count) / (withinVar * (clusterSet.Count - 1)));
        }
예제 #16
0
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet, IDictionary <TInstance, TClass> instanceClasses)
        {
            // counts the positives for each cluster
            var truePositives = 0L;
            var trueNegatives = 0L;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                var cluster = clusterSet[i];

                // gets class counts
                var clusterClassCounts = new Dictionary <TClass, int>();
                foreach (var instance in cluster)
                {
                    var instanceClass = instanceClasses[instance];
                    if (clusterClassCounts.ContainsKey(instanceClass))
                    {
                        clusterClassCounts[instanceClass]++;
                    }
                    else
                    {
                        clusterClassCounts[instanceClass] = 1;
                    }

                    // updates true negatives (pairs of diff class in diff clusters)
                    for (var j = i + 1; j < clusterSet.Count; j++)
                    {
                        trueNegatives += clusterSet[j]
                                         .Count(instance2 => !instanceClass.Equals(instanceClasses[instance2]));
                    }
                }

                // updates true positives (pairs of same class within cluster)
                truePositives += clusterClassCounts.Values
                                 .Where(count => count > 1)
                                 .Sum(count => Combinatorics.GetCombinations(count, 2));
            }

            // returns accuracy
            return((double)(truePositives + trueNegatives) /
                   Combinatorics.GetCombinations(instanceClasses.Count, 2));
        }
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet)
        {
            // undefined if only one cluster
            if (clusterSet.Count < 2)
            {
                return(double.NaN);
            }

            // gets clusters' centroids and total cluster
            var centroids = new List <TInstance>();
            Cluster <TInstance> totalCluster = null;

            foreach (var cluster in clusterSet)
            {
                totalCluster = totalCluster == null
                    ? new Cluster <TInstance>(cluster)
                    : new Cluster <TInstance>(totalCluster, cluster, 0);
                centroids.Add(this._centroidFunc(cluster));
            }

            var centroid = this._centroidFunc(totalCluster);

            var sumSquaresBetween = 0d;
            var sumSquaresWithin  = 0d;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                // updates sum of squared distances to centroid
                foreach (var instance in clusterSet[i])
                {
                    var distWithin = this.DissimilarityMetric.Calculate(instance, centroids[i]);
                    sumSquaresWithin += distWithin * distWithin;
                }

                // updates sum of squared distances of cluster centroid to global centroid
                var distBetween = this.DissimilarityMetric.Calculate(centroids[i], centroid);
                sumSquaresBetween += distBetween * distBetween * clusterSet[i].Count;
            }

            // - m * (SSW / SSB)
            return(-clusterSet.Count * (sumSquaresWithin / sumSquaresBetween));
        }
예제 #18
0
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet)
        {
            // undefined if only one cluster
            if (clusterSet.Count < 2)
            {
                return(double.NaN);
            }

            var minInterClusterSep = double.MaxValue;
            var maxIntraClusterSep = double.MinValue;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                var cluster = clusterSet[i].ToList();

                for (var j = 0; j < cluster.Count; j++)
                {
                    var instance = cluster[j];

                    // gets dissimilarity with all other elems within the same cluster (get max intra distance)
                    for (var k = j + 1; k < cluster.Count; k++)
                    {
                        maxIntraClusterSep = Math.Max(maxIntraClusterSep,
                                                      this.DissimilarityMetric.Calculate(instance, cluster[k]));
                    }

                    // gets dissimilarity to other clusters' elems (gets min inter cluster)
                    for (var l = i + 1; l < clusterSet.Count; l++)
                    {
                        foreach (var other in clusterSet[l])
                        {
                            minInterClusterSep = Math.Min(minInterClusterSep,
                                                          this.DissimilarityMetric.Calculate(instance, other));
                        }
                    }
                }
            }

            return(minInterClusterSep / maxIntraClusterSep);
        }
예제 #19
0
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet)
        {
            // undefined if only one cluster
            if (clusterSet.Count < 2)
            {
                return(double.NaN);
            }

            // gets clusters' centroids and total cluster
            var centroids = new List <TInstance>();
            Cluster <TInstance> totalCluster = null;

            foreach (var cluster in clusterSet)
            {
                totalCluster = totalCluster == null
                    ? new Cluster <TInstance>(cluster)
                    : new Cluster <TInstance>(totalCluster, cluster, 0);
                centroids.Add(this._centroidFunc(cluster));
            }

            var centroid = this._centroidFunc(totalCluster);

            var sumDistTotal  = 0d;
            var sumDistWithin = 0d;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                // updates sum of squared distances to centroids
                foreach (var instance in clusterSet[i])
                {
                    var distWithin = this.DissimilarityMetric.Calculate(instance, centroids[i]);
                    sumDistWithin += distWithin * distWithin;

                    var distTotal = this.DissimilarityMetric.Calculate(instance, centroid);
                    sumDistTotal += distTotal * distTotal;
                }
            }

            return((sumDistTotal - sumDistWithin) / sumDistTotal);
        }
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet)
        {
            // undefined if only one cluster
            if (clusterSet.Count < 2)
            {
                return(double.NaN);
            }

            var centroids   = new List <TInstance>();
            var dispersions = new List <double>();

            for (var i = 0; i < clusterSet.Count; i++)
            {
                var centroid = this._centroidFunc(clusterSet[i]);
                centroids.Add(centroid);
                dispersions.Add(CalcDispersion(clusterSet[i].ToList(), centroid));
            }

            var sum = 0d;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                // gets max compactness of clusters compared to the distance between the cluster centroids
                var maxDisp = double.MinValue;
                for (var j = 0; j < clusterSet.Count; j++)
                {
                    if (j != i)
                    {
                        maxDisp = Math.Max(maxDisp,
                                           (dispersions[i] + dispersions[j]) /
                                           this.DissimilarityMetric.Calculate(centroids[i], centroids[j]));
                    }
                }

                sum += maxDisp;
            }

            return(-sum / clusterSet.Count);
        }
예제 #21
0
        /// <inheritdoc />
        public double Evaluate(ClusterSet <TInstance> clusterSet)
        {
            // undefined if only one cluster
            if (clusterSet.Count < 2)
            {
                return(double.NaN);
            }

            // gets clusters' centroids
            var centroids = clusterSet.Select(t => this._centroidFunc(t)).ToList();

            var n           = 0;
            var sum         = 0d;
            var minCentDist = double.MaxValue;

            for (var i = 0; i < clusterSet.Count; i++)
            {
                n += clusterSet[i].Count;

                // updates sum of distances to centroids
                foreach (var instance in clusterSet[i])
                {
                    var dist = this.DissimilarityMetric.Calculate(instance, centroids[i]);
                    sum += dist * dist;
                }

                // updates min between-cluster distance
                for (var j = i + 1; j < clusterSet.Count; j++)
                {
                    var betweenClusterDist = this.DissimilarityMetric.Calculate(centroids[i], centroids[j]);
                    minCentDist = Math.Min(minCentDist, betweenClusterDist * betweenClusterDist);
                }
            }

            return(-sum / (n * minCentDist));
        }
예제 #22
0
        public static Dictionary <string, int> Cluster(IEnumerable <string> itemIds, double[,] features, int numClusters, string centerPath = "",
                                                       ClusteringAlgorithm algorithm = ClusteringAlgorithm.KMeans)
        {
            Console.WriteLine("Clustering...");

            //features = Normalized(features);

            Console.WriteLine("Features normalized.");

            var        dm       = new DoubleMatrix(features);
            ClusterSet clusters = null;

            if (algorithm == ClusteringAlgorithm.KMeans)
            {
                var km = new KMeansClustering(dm);
                km.Cluster(numClusters);

                Console.WriteLine("Num Clusters: {0}, Num Items: {1}, Num Iterations: {2}", km.K, km.N, km.Iterations);

                if (centerPath != "")
                {
                    var cWriter = new StreamWriter(centerPath);
                    km.FinalCenters.WriteAsCSV(cWriter);
                    cWriter.Close();
                }

                clusters = km.Clusters;
            }
            else
            {
                var nmf = new NMFClustering <NMFDivergenceUpdate>();

                nmf.Factor(dm, numClusters);

                if (nmf.Converged)
                {
                    var uWriter = new StreamWriter(Paths.AmazonBooksUsersCluster + ".nmf");
                    var iWriter = new StreamWriter(Paths.AmazonBooksItemsCluster + ".nmf");

                    nmf.W.WriteAsCSV(uWriter);
                    nmf.H.WriteAsCSV(iWriter);

                    uWriter.Flush();
                    iWriter.Flush();

                    uWriter.Close();
                    iWriter.Close();

                    File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".con", nmf.Connectivity.ToTabDelimited().Split('\n'));

                    clusters = nmf.ClusterSet;

                    File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".cluster", clusters.Clusters.Select(c => c.ToString()));

                    Console.WriteLine("Successfully wrote decompose matrixes.");
                }
                else
                {
                    Console.WriteLine("Factorization failed to converge in {0} iterations.", nmf.MaxFactorizationIterations);
                }
            }

            return(itemIds.Zip(clusters.Clusters, (i, c) => new { ItemId = i, Cluster = c }).ToDictionary(i => i.ItemId, i => i.Cluster));
        }
예제 #23
0
 /// <inheritdoc />
 public double Evaluate(ClusterSet <TInstance> clusterSet)
 {
     // gets average silhouette coefficient of all instances in all clusters
     return(clusterSet.Count < 2 ? double.NaN : this.EvaluateEach(clusterSet).Values.Average());
 }
        public static void WriteClusterCountries(CountryStatistics[] countryStatistics, ClusterSet clustersBySetNumber)
        {
            int clusterCount = clustersBySetNumber.NumberOfClusters;

            for (int i = 0; i < clusterCount; i++)
            {
                List <int> countryIndices = new List <int>();
                for (int j = 0; j < clustersBySetNumber.Clusters.Length; j++)
                {
                    if (clustersBySetNumber.Clusters[j] == i)
                    {
                        countryIndices.Add(j);
                    }
                }
                List <CountryStatistics> clusterCountries =
                    countryIndices.Select(countryIndex => countryStatistics[countryIndex]).ToList();
                if (clusterCountries.Count > 0)
                {
                    Console.WriteLine($"Cluster {i + 1}:");
                }
                for (int j = 0; j < clusterCountries.Count - 1; j++)
                {
                    Console.Write(clusterCountries[j].Country + ", ");
                }
                Console.Write(clusterCountries[clusterCountries.Count - 1].Country + "\r\n\r\n");
            }
        }
예제 #25
0
        protected void Clustering(List <Line> lines, Image <Bgr, byte> frame)
        {
            List <Point> intersections = new List <Point>();

            foreach (Line inLine in lines)
            {
                foreach (Line cmpLine in lines)
                {
                    if (inLine == cmpLine)
                    {
                        continue;
                    }

                    Point intersection = inLine.Intersect(cmpLine);

                    if (intersection != null && !intersections.Contains(intersection))
                    {
                        intersections.Add(intersection);
                    }
                }
            }


            if (intersections.Count > 0)
            {
                ClusterSet clusters = DBSCAN.DBSCAN.CalculateClusters(
                    intersections.Select(p => new PointContainer(p)).ToList(),
                    20,
                    (int)Math.Round(0.1 * intersections.Count, 0)
                    );

                if (clusters.IsValid())
                {
                    if (Filtering.Add(clusters.GetBestCluster().GetMean()))
                    {
                        if (Confidence < 100)
                        {
                            Confidence = Confidence >= 100 ? 100 : Confidence + 5f;
                        }
                    }
                    else
                    {
                        Confidence = 10;
                    }
                }

                Vector vector = BoxContainer.Hit(Filtering.GetMean());
                try
                {
                    ((RenderPoint)Filtering.GetMean()).Render(frame);
                }
                catch (Exception e)
                {
                }

                LatestResponse = !vector.IsNull()
                    ? new Response(true, BoxContainer.Hit(Filtering.GetMean()), Confidence)
                    : new Response(false, null);

                CvInvoke.Imshow("frame", frame);
                CvInvoke.WaitKey(1);
            }
        }
		public void SetAffinity (IMulticlusterProcess p, ClusterSet affinity)
		{
			throw new NotImplementedException ();
		}
예제 #27
0
 /// <inheritdoc />
 public double Evaluate(ClusterSet <TInstance> clusterSet, IDictionary <TInstance, TClass> instanceClasses) =>
 this._criteria.Sum(
     criterion => criterion.Key.Evaluate(clusterSet, instanceClasses) * criterion.Value);
 /// <summary>
 /// Shows a new chart in a default form.
 /// </summary>
 /// <param name="clusters">The cluster assignments.</param>
 /// <param name="data">A matrix of data. Each row in the matrix represents an object that was clustered.</param>
 /// <param name="xColIndex">The index of the matrix column containing the x data.</param>
 /// <param name="yColIndex">The index of the matrix column containing the y data.</param>
 /// <exception cref="Core.IndexOutOfRangeException">Thrown if either column index is outside the
 /// range of the columns of the given data matrix.</exception>
 /// <remarks>
 /// Instances of class ClusterSet are created by ClusterAnalysis, KMeanClustering, and NMFClustering objects
 /// and cannot be constructed independently.
 /// <br/>
 /// Objects are shown plotted in the specified x,y plane, colored according to their cluster assignment.
 /// <br/>
 /// Equivalent to:
 /// <code>
 /// NMathStatsChart.Show( ToChart( clusters, data, xColIndex, yColIndex ) );
 /// </code>
 /// </remarks>
 public static void Show( ClusterSet clusters, DoubleMatrix data, int xColIndex, int yColIndex )
 {
     Show( ToChart( clusters, data, xColIndex, yColIndex ) );
 }
        /// <summary>
        /// Updates the given chart with the specified clusters.
        /// </summary>
        /// <param name="chart">A chart.</param>
        /// <param name="clusters">The cluster assignments.</param>
        /// <param name="data">A matrix of data. Each row in the matrix represents an object that was clustered.</param>
        /// <param name="xColIndex">The index of the matrix column containing the x data.</param>
        /// <param name="yColIndex">The index of the matrix column containing the y data.</param>
        /// <exception cref="Core.IndexOutOfRangeException">Thrown if either column index is outside the
        /// range of the columns of the given data matrix.</exception>
        /// <remarks>
        /// Instances of class ClusterSet are created by ClusterAnalysis, KMeanClustering, and NMFClustering objects
        /// and cannot be constructed independently.
        /// <br/>
        /// Objects are shown plotted in the specified x,y plane, and colored according to their cluster assignment.
        /// <br/>
        /// Titles are added only if chart does not currently contain any titles.
        /// <br/>
        /// The first clusters.NumberOfClusters data series are replaced, or added if necessary.
        /// </remarks>
        public static void Update( ref ChartControl chart, ClusterSet clusters, DoubleMatrix data, int xColIndex, int yColIndex )
        {
            if( xColIndex < 0 || xColIndex > data.Cols - 1 )
              {
            throw new Core.IndexOutOfRangeException( xColIndex );
              }

              if( yColIndex < 0 || yColIndex > data.Cols - 1 )
              {
            throw new Core.IndexOutOfRangeException( yColIndex );
              }

              List<string> titles = new List<string>()
              {
            "ClusterSet",
              };
              string xTitle = String.Format( "Col {0}", xColIndex );
              string yTitle = String.Format( "Col {0}", yColIndex );

              List<ChartSeries> series = new List<ChartSeries>();
              for( int i = 0; i < clusters.NumberOfClusters; i++ )
              {
            int[] members = clusters.Cluster( i );
            ChartSeries s = new ChartSeries()
            {
              Text = "Cluster " + i,
              Type = ChartSeriesType.Scatter,
            };
            s.Style.Symbol.Shape = DefaultMarker;
            for( int j = 0; j < members.Length; j++ )
            {
              if( members[j] < 0 || members[j] > data.Rows)
              {
            throw new Core.IndexOutOfRangeException( members[j] );
              }
              s.Points.Add( data[members[j], xColIndex], data[members[j], yColIndex] );
            }
            series.Add( s );
              }

              Update( ref chart, series, titles, xTitle, yTitle );
        }
 /// <inheritdoc />
 public double Evaluate(ClusterSet <TInstance> clusterSet) =>
 this._criteria.Sum(criterion => criterion.Key.Evaluate(clusterSet) * criterion.Value);
 /// <summary>
 /// Returns a new point chart plotting the given clusters.
 /// </summary>
 /// <param name="clusters">The cluster assignments.</param>
 /// <param name="data">A matrix of data. Each row in the matrix represents an object that was clustered.</param>
 /// <param name="xColIndex">The index of the matrix column containing the x data.</param>
 /// <param name="yColIndex">The index of the matrix column containing the y data.</param>
 /// <returns>A new chart.</returns>
 /// <exception cref="Core.IndexOutOfRangeException">Thrown if either column index is outside the
 /// range of the columns of the given data matrix.</exception>
 /// <remarks>
 /// Instances of class ClusterSet are created by ClusterAnalysis, KMeanClustering, and NMFClustering objects
 /// and cannot be constructed independently.
 /// <br/>
 /// Objects are shown plotted in the specified x,y plane, colored according to their cluster assignment.
 /// </remarks>
 public static ChartControl ToChart( ClusterSet clusters, DoubleMatrix data, int xColIndex, int yColIndex )
 {
     ChartControl chart = GetDefaultChart();
       Update( ref chart, clusters, data, xColIndex, yColIndex );
       return chart;
 }
예제 #32
0
        public static void Main(string[] args)
        {
            Plot    generatedDataPlot = new Plot();
            Spawner spawner           = new Spawner(STD_DEV);

            List <PointF> allPoints = new List <PointF>();

            for (int i = 0; i < CLUSTER_COUNT; ++i)
            {
                spawner.ResetCenter(MIN_CENTER_DISTANCE, MAX_CENTER_DISTANCE);

                PointF[] points = spawner.Spawn(POINT_COUNT);
                allPoints.AddRange(points);

                Color color = generatedDataPlot.GetNextColor();

                generatedDataPlot.AddScatterPoints(points, color, label: $"Points {i + 1}");
                generatedDataPlot.AddPoint(spawner.Center.X, spawner.Center.Y, color, 25);
            }

            generatedDataPlot.Legend();

            PlotForm generatedDataPlotForm = new PlotForm(generatedDataPlot, "source_data");

            generatedDataPlotForm.ShowDialog();

            Plot grayDataPlot = new Plot();

            grayDataPlot.AddScatterPoints(allPoints.ToArray(), label: "Gray points");
            grayDataPlot.Legend();

            PlotForm grayDataPlotForm = new PlotForm(grayDataPlot, "gray_data");

            grayDataPlotForm.ShowDialog();

            KMeansClusterizer clusterizer = new KMeansClusterizer();

            List <Dictionary <PointF, List <PointF> > > clusterizingHistory = clusterizer.Clusterize(allPoints, CLUSTER_COUNT);

            PlotForm resultPlotForm = new PlotForm(CreateClusterizingPlot(clusterizingHistory.Last()), "crusterized");

            resultPlotForm.ShowDialog();

            PlotForm historyForm = new PlotForm(clusterizingHistory.Select(c => CreateClusterizingPlot(c)).ToList(), "history_");

            historyForm.ShowDialog();

            CentroidLinkage <DataPoint> linkage = new CentroidLinkage <DataPoint>(
                new DissimilarityMetric(),
                cluster => new DataPoint(
                    cluster.Average(p => p.X),
                    cluster.Average(p => p.Y)
                    )
                );
            AgglomerativeClusteringAlgorithm <DataPoint> algorithm = new AgglomerativeClusteringAlgorithm <DataPoint>(linkage);

            HashSet <DataPoint>          dataPoints       = allPoints.Select(p => new DataPoint(p)).ToHashSet();
            ClusteringResult <DataPoint> clusteringResult = algorithm.GetClustering(dataPoints);
            ClusterSet <DataPoint>       result           = clusteringResult[clusteringResult.Count - 3];

            Plot aglomeraPlot = new Plot();

            foreach (Cluster <DataPoint> resultCluster in result)
            {
                Color color = aglomeraPlot.GetNextColor();

                aglomeraPlot.AddScatterPoints(
                    resultCluster.Select(p => (double)p.X).ToArray(),
                    resultCluster.Select(p => (double)p.Y).ToArray(),
                    color
                    );

                aglomeraPlot.AddPoint(
                    resultCluster.Select(p => p.X).Average(),
                    resultCluster.Select(p => p.Y).Average(),
                    color, 25
                    );
            }

            PlotForm aglomeraForm = new PlotForm(aglomeraPlot, "aglomera");

            aglomeraForm.ShowDialog();

            clusteringResult.SaveD3DendrogramFile(Environment.CurrentDirectory + "/dendro.json");

            Console.ReadLine();
        }