public static HdbscanResult Run <T>(HdbscanParameters <T> parameters)
        {
            var numPoints = parameters.DataSet?.Length ?? parameters.Distances.Length;

            PrecomputeNormalMatrixDistancesIfApplicable(parameters, numPoints);
            var sparseDistance       = PrecomputeSparseMatrixDistancesIfApplicable(parameters, numPoints);
            var internalDistanceFunc = DetermineInternalDistanceFunc(parameters, sparseDistance, numPoints);

            // Compute core distances
            var coreDistances = HdbscanAlgorithm.CalculateCoreDistances(
                internalDistanceFunc,
                numPoints,
                parameters.MinPoints);

            // Calculate minimum spanning tree
            var mst = HdbscanAlgorithm.ConstructMst(
                internalDistanceFunc,
                numPoints,
                coreDistances,
                true);

            mst.QuicksortByEdgeWeight();

            var pointNoiseLevels  = new double[numPoints];
            var pointLastClusters = new int[numPoints];
            var hierarchy         = new List <int[]>();

            // Compute hierarchy and cluster tree
            var clusters = HdbscanAlgorithm.ComputeHierarchyAndClusterTree(
                mst,
                parameters.MinClusterSize,
                parameters.Constraints,
                hierarchy,
                pointNoiseLevels,
                pointLastClusters);

            // Propagate clusters
            var infiniteStability = HdbscanAlgorithm.PropagateTree(clusters);

            // Compute final flat partitioning
            var prominentClusters = HdbscanAlgorithm.FindProminentClusters(
                clusters,
                hierarchy,
                numPoints);

            // Compute outlier scores for each point
            var scores = HdbscanAlgorithm.CalculateOutlierScores(
                clusters,
                pointNoiseLevels,
                pointLastClusters,
                coreDistances);

            return(new HdbscanResult
            {
                Labels = prominentClusters,
                OutliersScore = scores,
                HasInfiniteStability = infiniteStability
            });
        }
Exemple #2
0
        public static HdbscanResult Run(HdbscanParameters parameters)
        {
            var numPoints = parameters.DataSet != null
                ? parameters.DataSet.Length
                : parameters.Distances.Length;

            if (parameters.Distances == null)
            {
                // Precompute distances.
                var distances = new double[numPoints][];
                for (var i = 0; i < distances.Length; i++)
                {
                    distances[i] = new double[numPoints];
                }

                if (parameters.UseMultipleThread)
                {
                    var size = numPoints * numPoints;

                    var maxDegreeOfParallelism = parameters.MaxDegreeOfParallelism;
                    if (maxDegreeOfParallelism == 0)
                    {
                        // Not specified. Use all threads.
                        maxDegreeOfParallelism = Environment.ProcessorCount;
                    }
                    var option = new ParallelOptions {
                        MaxDegreeOfParallelism = Math.Max(1, maxDegreeOfParallelism)
                    };

                    Parallel.For(0, size, option, index =>
                    {
                        var i = index % numPoints;
                        var j = index / numPoints;
                        if (i < j)
                        {
                            var distance = parameters.DistanceFunction.ComputeDistance(
                                parameters.DataSet[i],
                                parameters.DataSet[j]);
                            distances[i][j] = distance;
                            distances[j][i] = distance;
                        }
                    });
                }
                else
                {
                    for (var i = 0; i < numPoints; i++)
                    {
                        for (var j = 0; j < i; j++)
                        {
                            var distance = parameters.DistanceFunction.ComputeDistance(
                                parameters.DataSet[i],
                                parameters.DataSet[j]);
                            distances[i][j] = distance;
                            distances[j][i] = distance;
                        }
                    }
                }

                parameters.Distances = distances;
            }

            // Compute core distances
            var coreDistances = HdbscanAlgorithm.CalculateCoreDistances(
                parameters.Distances,
                parameters.MinPoints);

            // Calculate minimum spanning tree
            var mst = HdbscanAlgorithm.ConstructMst(
                parameters.Distances,
                coreDistances,
                true);

            mst.QuicksortByEdgeWeight();

            var pointNoiseLevels  = new double[numPoints];
            var pointLastClusters = new int[numPoints];

            var hierarchy = new List <int[]>();

            // Compute hierarchy and cluster tree
            var clusters = HdbscanAlgorithm.ComputeHierarchyAndClusterTree(
                mst,
                parameters.MinClusterSize,
                parameters.Constraints,
                hierarchy,
                pointNoiseLevels,
                pointLastClusters);

            // Propagate clusters
            var infiniteStability = HdbscanAlgorithm.PropagateTree(clusters);

            // Compute final flat partitioning
            var prominentClusters = HdbscanAlgorithm.FindProminentClusters(
                clusters,
                hierarchy,
                numPoints);

            // Compute outlier scores for each point
            var scores = HdbscanAlgorithm.CalculateOutlierScores(
                clusters,
                pointNoiseLevels,
                pointLastClusters,
                coreDistances);

            return(new HdbscanResult
            {
                Labels = prominentClusters,
                OutliersScore = scores,
                HasInfiniteStability = infiniteStability
            });
        }