public static HdbscanResult Run <T>(HdbscanParameters <T> parameters) { var numPoints = parameters.DataSet?.Length ?? parameters.Distances.Length; PrecomputeNormalMatrixDistancesIfApplicable(parameters, numPoints); var sparseDistance = PrecomputeSparseMatrixDistancesIfApplicable(parameters, numPoints); var internalDistanceFunc = DetermineInternalDistanceFunc(parameters, sparseDistance, numPoints); // Compute core distances var coreDistances = HdbscanAlgorithm.CalculateCoreDistances( internalDistanceFunc, numPoints, parameters.MinPoints); // Calculate minimum spanning tree var mst = HdbscanAlgorithm.ConstructMst( internalDistanceFunc, numPoints, coreDistances, true); mst.QuicksortByEdgeWeight(); var pointNoiseLevels = new double[numPoints]; var pointLastClusters = new int[numPoints]; var hierarchy = new List <int[]>(); // Compute hierarchy and cluster tree var clusters = HdbscanAlgorithm.ComputeHierarchyAndClusterTree( mst, parameters.MinClusterSize, parameters.Constraints, hierarchy, pointNoiseLevels, pointLastClusters); // Propagate clusters var infiniteStability = HdbscanAlgorithm.PropagateTree(clusters); // Compute final flat partitioning var prominentClusters = HdbscanAlgorithm.FindProminentClusters( clusters, hierarchy, numPoints); // Compute outlier scores for each point var scores = HdbscanAlgorithm.CalculateOutlierScores( clusters, pointNoiseLevels, pointLastClusters, coreDistances); return(new HdbscanResult { Labels = prominentClusters, OutliersScore = scores, HasInfiniteStability = infiniteStability }); }
public static HdbscanResult Run(HdbscanParameters parameters) { var numPoints = parameters.DataSet != null ? parameters.DataSet.Length : parameters.Distances.Length; if (parameters.Distances == null) { // Precompute distances. var distances = new double[numPoints][]; for (var i = 0; i < distances.Length; i++) { distances[i] = new double[numPoints]; } if (parameters.UseMultipleThread) { var size = numPoints * numPoints; var maxDegreeOfParallelism = parameters.MaxDegreeOfParallelism; if (maxDegreeOfParallelism == 0) { // Not specified. Use all threads. maxDegreeOfParallelism = Environment.ProcessorCount; } var option = new ParallelOptions { MaxDegreeOfParallelism = Math.Max(1, maxDegreeOfParallelism) }; Parallel.For(0, size, option, index => { var i = index % numPoints; var j = index / numPoints; if (i < j) { var distance = parameters.DistanceFunction.ComputeDistance( parameters.DataSet[i], parameters.DataSet[j]); distances[i][j] = distance; distances[j][i] = distance; } }); } else { for (var i = 0; i < numPoints; i++) { for (var j = 0; j < i; j++) { var distance = parameters.DistanceFunction.ComputeDistance( parameters.DataSet[i], parameters.DataSet[j]); distances[i][j] = distance; distances[j][i] = distance; } } } parameters.Distances = distances; } // Compute core distances var coreDistances = HdbscanAlgorithm.CalculateCoreDistances( parameters.Distances, parameters.MinPoints); // Calculate minimum spanning tree var mst = HdbscanAlgorithm.ConstructMst( parameters.Distances, coreDistances, true); mst.QuicksortByEdgeWeight(); var pointNoiseLevels = new double[numPoints]; var pointLastClusters = new int[numPoints]; var hierarchy = new List <int[]>(); // Compute hierarchy and cluster tree var clusters = HdbscanAlgorithm.ComputeHierarchyAndClusterTree( mst, parameters.MinClusterSize, parameters.Constraints, hierarchy, pointNoiseLevels, pointLastClusters); // Propagate clusters var infiniteStability = HdbscanAlgorithm.PropagateTree(clusters); // Compute final flat partitioning var prominentClusters = HdbscanAlgorithm.FindProminentClusters( clusters, hierarchy, numPoints); // Compute outlier scores for each point var scores = HdbscanAlgorithm.CalculateOutlierScores( clusters, pointNoiseLevels, pointLastClusters, coreDistances); return(new HdbscanResult { Labels = prominentClusters, OutliersScore = scores, HasInfiniteStability = infiniteStability }); }