/// <summary>
        /// Performs hierarchical clustering based on a matrix of distances.
        /// </summary>
        /// <param name="distMatrix">The matrix of distances. It is lower triangular, excluding the diagonal.</param>
        /// <param name="linkage">Specifies the linkage for the clustering.</param>
        /// <param name="preserveOrder"></param>
        /// <param name="periodic"></param>
        /// <param name="nthreads"></param>
        /// <param name="progress"></param>
        /// <returns>An array of cluster nodes defining the resulting tree.</returns>
        public HierarchicalClusterNode[] TreeCluster(MatrixIndexer distMatrix, HierarchicalClusterLinkage linkage,
                                                     bool preserveOrder, bool periodic, int nthreads, Action <int> progress)
        {
            double avDist = CalcAverageDistance(distMatrix);

            switch (linkage)
            {
            case HierarchicalClusterLinkage.Average:
                return(preserveOrder
                                                ? AverageLinkageClusterLinear(distMatrix, periodic)
                                                : AverageLinkageCluster(distMatrix, nthreads, avDist));

            case HierarchicalClusterLinkage.Maximum:
                return(preserveOrder
                                                ? MaximumLinkageClusterLinear(distMatrix, periodic)
                                                : MaximumLinkageCluster(distMatrix, nthreads, avDist));

            case HierarchicalClusterLinkage.Single:
                return(preserveOrder
                                                ? SingleLinkageClusterLinear(distMatrix, periodic)
                                                : SingleLinkageCluster(distMatrix, nthreads, avDist));

            default:
                throw new ArgumentException();
            }
        }
        private static HierarchicalClusterNode[] GenericLinkageClusterLinear(MatrixIndexer matrix, bool periodic, Func <double, int, double, int, double> linkage)
        {
            int nelements = matrix.RowCount;

            int[] clusterid = new int[nelements];
            int[] number    = new int[nelements];
            int[] position  = new int[nelements];
            HierarchicalClusterNode[] result = ArrayUtils.FillArray(i => new HierarchicalClusterNode(), nelements - 1);
            for (int j = 0; j < nelements; j++)
            {
                number[j]    = 1;
                clusterid[j] = j;
                position[j]  = j;
            }
            for (int n = nelements; n > 1; n--)
            {
                result[nelements - n].distance = FindClosestPairLinear(n, matrix, out int i1, out int j1, position, periodic,
                                                                       out bool reverse,
                                                                       out bool carryOver);
                result[nelements - n].left  = reverse ? clusterid[j1] : clusterid[i1];
                result[nelements - n].right = reverse ? clusterid[i1] : clusterid[j1];
                for (int j = 0; j < j1; j++)
                {
                    matrix[j1, j] = linkage(matrix[i1, j], number[i1], matrix[j1, j], number[j1]);
                }
                for (int j = j1 + 1; j < i1; j++)
                {
                    matrix[j, j1] = linkage(matrix[i1, j], number[i1], matrix[j, j1], number[j1]);
                }
                for (int j = i1 + 1; j < n; j++)
                {
                    matrix[j, j1] = linkage(matrix[j, i1], number[i1], matrix[j, j1], number[j1]);
                }
                for (int j = 0; j < i1; j++)
                {
                    matrix[i1, j] = matrix[n - 1, j];
                }
                for (int j = i1 + 1; j < n - 1; j++)
                {
                    matrix[j, i1] = matrix[n - 1, j];
                }
                number[j1]    = number[i1] + number[j1];
                number[i1]    = number[n - 1];
                clusterid[j1] = n - nelements - 1;
                clusterid[i1] = clusterid[n - 1];
                position[j1]  = Math.Min(position[j1], position[i1]);
                position[i1]  = position[n - 1];
                if (!carryOver)
                {
                    for (int i = 0; i < position.Length; i++)
                    {
                        if (position[i] > position[j1])
                        {
                            position[i]--;
                        }
                    }
                }
            }
            return(result);
        }
 /// <summary>
 /// Run k-means clustering
 /// </summary>
 /// <param name="data"></param>
 /// <param name="k">number of clusters</param>
 /// <param name="maxIter">maximal number of iterations, if not converging</param>
 /// <param name="restarts"></param>
 /// <param name="progress"></param>
 /// <param name="clusterCenters"></param>
 /// <param name="clusterIndices"></param>
 public static void GenerateClusters(MatrixIndexer data, int k, int maxIter, int restarts, Action <int> progress,
                                     out float[,] clusterCenters, out int[] clusterIndices)
 {
     ExtractUniqueRows(data, out var rowIndexMap, out var reducedData);
     GenerateClustersImpl(reducedData, k, maxIter, restarts, progress, out clusterCenters, out var uniqueClusterIndices);
     clusterIndices = new int[data.RowCount];
     RestoreRowIndices(rowIndexMap, uniqueClusterIndices, clusterIndices);
 }
        private static HierarchicalClusterNode[] GenericLinkageCluster(MatrixIndexer distMatrix, int nthreads,
                                                                       double defaultDist, Func <double, int, double, int, double> linkage)
        {
            int nelements = distMatrix.RowCount;

            int[] clusterid = new int[nelements];
            int[] number    = new int[nelements];
            HierarchicalClusterNode[] result = ArrayUtils.FillArray(i => new HierarchicalClusterNode(), nelements - 1);
            for (int j = 0; j < nelements; j++)
            {
                number[j]    = 1;
                clusterid[j] = j;
            }
            for (int n = nelements; n > 1; n--)
            {
                double dist = FindClosestPairDistance(n, distMatrix, out int i1, out int j1, nthreads, defaultDist);
                if (i1 != -1)
                {
                    result[nelements - n].distance = dist;
                    result[nelements - n].left     = clusterid[i1];
                    result[nelements - n].right    = clusterid[j1];
                }
                else
                {
                    i1   = 1;
                    j1   = 0;
                    dist = nelements - n > 0 ? result[nelements - n - 1].distance * 1.01 : 1;
                    result[nelements - n].distance = dist;
                    result[nelements - n].left     = clusterid[i1];
                    result[nelements - n].right    = clusterid[j1];
                }
                for (int j = 0; j < j1; j++)
                {
                    distMatrix[j1, j] = linkage(distMatrix[i1, j], number[i1], distMatrix[j1, j], number[j1]);
                }
                for (int j = j1 + 1; j < i1; j++)
                {
                    distMatrix[j, j1] = linkage(distMatrix[i1, j], number[i1], distMatrix[j, j1], number[j1]);
                }
                for (int j = i1 + 1; j < n; j++)
                {
                    distMatrix[j, j1] = linkage(distMatrix[j, i1], number[i1], distMatrix[j, j1], number[j1]);
                }
                for (int j = 0; j < i1; j++)
                {
                    distMatrix[i1, j] = distMatrix[n - 1, j];
                }
                for (int j = i1 + 1; j < n - 1; j++)
                {
                    distMatrix[j, i1] = distMatrix[n - 1, j];
                }
                number[j1]    = number[i1] + number[j1];
                number[i1]    = number[n - 1];
                clusterid[j1] = n - nelements - 1;
                clusterid[i1] = clusterid[n - 1];
            }
            return(result);
        }
Exemple #5
0
 public static bool HasNaNOrInfColumns(MatrixIndexer m)
 {
     for (int i = 0; i < m.ColumnCount; i++)
     {
         if (m.IsNanOrInfColumn(i))
         {
             return(true);
         }
     }
     return(false);
 }
        /// <summary>
        /// Performs a hierarchical clustering on the the given data matrix.
        /// </summary>
        /// <param name="data">Data matrix that is going to be clustered.</param>
        /// <param name="access">Specifies whether rows or columns are to be clustered</param>
        /// <param name="distance">Defines the distance between two elements</param>
        /// <param name="linkage">Specifies the linkage for the clustering.</param>
        /// <param name="preserveOrder"></param>
        /// <param name="periodic"></param>
        /// <param name="nthreads"></param>
        /// <param name="progress"></param>
        /// <returns>An array of cluster nodes defining the resulting tree.</returns>
        public HierarchicalClusterNode[] TreeCluster(MatrixIndexer data, MatrixAccess access, IDistance distance,
                                                     HierarchicalClusterLinkage linkage, bool preserveOrder, bool periodic, int nthreads, Action <int> progress)
        {
            int nelements = (access == MatrixAccess.Rows) ? data.RowCount : data.ColumnCount;

            if (nelements < 2)
            {
                return(new HierarchicalClusterNode[0]);
            }
            float[,] distMatrix = DistanceMatrix(data, distance, access);
            return(TreeCluster(distMatrix, linkage, preserveOrder, periodic, nthreads, progress));
        }
Exemple #7
0
        /// <summary>
        /// Run k-means clustering
        /// </summary>
        /// <param name="data"></param>
        /// <param name="k">number of clusters</param>
        /// <param name="maxIter">maximal number of iterations, if not converging</param>
        /// <param name="restarts"></param>
        /// <param name="progress"></param>
        /// <param name="clusterCenters"></param>
        /// <param name="clusterIndices"></param>
        public static void GenerateClusters(MatrixIndexer data, int k, int maxIter, int restarts, Action <int> progress,
                                            out float[,] clusterCenters, out int[] clusterIndices)
        {
            Dictionary <EquatableArray <double>, List <int> > rowIndexMap;

            double[][] reducedData;
            ExtractUniqueRows(data, out rowIndexMap, out reducedData);
            int[] uniqueClusterIndices;
            GenerateClustersImpl(reducedData, k, maxIter, restarts, progress, out clusterCenters, out uniqueClusterIndices);
            clusterIndices = new int[data.RowCount];
            RestoreRowIndices(rowIndexMap, uniqueClusterIndices, clusterIndices);
        }
Exemple #8
0
        public static bool HasNanOrInf(MatrixIndexer m, MatrixAccess access)
        {
            switch (access)
            {
            case MatrixAccess.Columns:
                return(HasNaNOrInfColumns(m));

            case MatrixAccess.Rows:
                return(HasNaNOrInfRows(m));

            default:
                throw new NotImplementedException($"Not implemented for access {access}");
            }
        }
        private static double FindClosestPairDistance(int n, MatrixIndexer distMatrix, out int ip, out int jp, int nthreads, double defaultDist)
        {
            if (nthreads <= 1 || n <= 1000)
            {
                return(FindClosestPairDistance(0, n, distMatrix, out ip, out jp, defaultDist));
            }
            int[] nk = new int[nthreads + 1];
            for (int k = 0; k < nthreads + 1; k++)
            {
                nk[k] = (int)Math.Round(0.5 + Math.Sqrt(0.25 + n * (n - 1) * k / (double)nthreads));
            }
            int[]    ips  = new int[nthreads];
            int[]    jps  = new int[nthreads];
            double[] maxs = new double[nthreads];
            Thread[] t    = new Thread[nthreads];
            for (int i = 0; i < nthreads; i++)
            {
                int index0 = i;
                t[i] =
                    new Thread(
                        new ThreadStart(
                            delegate {
                    maxs[index0] = FindClosestPairDistance(nk[index0], nk[index0 + 1], distMatrix, out ips[index0], out jps[index0],
                                                           defaultDist);
                }));
                t[i].Start();
            }
            for (int i = 0; i < nthreads; i++)
            {
                t[i].Join();
            }
            ip = -1;
            jp = -1;
            double distance = double.MaxValue;

            for (int i = 0; i < nthreads; i++)
            {
                if (maxs[i] < distance)
                {
                    distance = maxs[i];
                    ip       = ips[i];
                    jp       = jps[i];
                }
            }
            if (distance == double.MaxValue)
            {
                return(defaultDist);
            }
            return(distance);
        }
        /// <summary>
        /// Create distance matrix from <see cref="IDistance"/>.
        /// </summary>
        /// <param name="data"></param>
        /// <param name="distance"></param>
        public GenericDistanceMatrix(MatrixIndexer data, IDistance distance)
        {
            N         = data.RowCount;
            distances = new double[N * (N - 1) / 2];
            int k = 0;

            for (int i = 0; i < N; i++)
            {
                BaseVector xi = data.GetRow(i);
                for (int j = i + 1; j < N; j++)
                {
                    distances[k++] = distance.Get(xi, data.GetRow(j));
                }
            }
        }
        private static float[,] DistanceMatrix(MatrixIndexer data, IDistance distance, MatrixAccess access)
        {
            int nrows     = data.RowCount;
            int ncols     = data.ColumnCount;
            int nelements = (access == MatrixAccess.Rows) ? nrows : ncols;

            float[,] result = new float[nelements, nelements];
            for (int i = 0; i < nelements; i++)
            {
                for (int j = 0; j < i; j++)
                {
                    result[i, j] = (float)distance.Get(GetVector(data, i, access), GetVector(data, j, access));
                }
            }
            return(result);
        }
Exemple #12
0
        private static void ExtractUniqueRows(MatrixIndexer data,
                                              out Dictionary <EquatableArray <double>, List <int> > rowIndexMap, out double[][] reducedData)
        {
            var uniqueRows = new List <double[]>();

            rowIndexMap = new Dictionary <EquatableArray <double>, List <int> >();
            for (int row = 0; row < data.RowCount; row++)
            {
                var rowArray   = data.GetRow(row).ToArray();
                var rowEqArray = new EquatableArray <double>(rowArray);
                if (!rowIndexMap.ContainsKey(rowEqArray))
                {
                    rowIndexMap.Add(rowEqArray, new List <int>());
                    uniqueRows.Add(rowArray);
                }
                rowIndexMap[rowEqArray].Add(row);
            }
            reducedData = uniqueRows.ToArray();
        }
        private static double CalcAverageDistance(MatrixIndexer distMatrix)
        {
            double result = 0;
            double count  = 0;

            for (int i = 0; i < distMatrix.RowCount; i++)
            {
                for (int j = 0; j < i; j++)
                {
                    double x = distMatrix[i, j];
                    if (!double.IsNaN(x) && !double.IsInfinity(x))
                    {
                        result += x;
                        count++;
                    }
                }
            }
            return(result / count);
        }
        /// <summary>
        /// Run K-medoid clustering.
        /// </summary>
        /// <param name="data">data matrix with n rows</param>
        /// <param name="distance"></param>
        /// <param name="k">number of clusters k &lt; n</param>
        /// <returns>Array of length n. <code>assignment[i]</code> returns the index of the cluster medoid in the data matrix.</returns>
        public static int[] GenerateClusters(MatrixIndexer data, IDistanceMatrix distance, int k)
        {
            var n           = data.RowCount;
            var medoids     = SelectInitialMedoids(distance, k, n);
            var assignments = AssignClusters(distance, medoids, n);
            var cost        = CalculateCost(distance, assignments);

            while (true)
            {
                medoids     = SelectMedoids(distance, assignments);
                assignments = AssignClusters(distance, medoids, n);
                var newCost = CalculateCost(distance, assignments);
                if (Math.Abs(newCost - cost) < 1E-10)
                {
                    break;
                }
                cost = newCost;
            }
            return(assignments);
        }
        private static double FindClosestPairLinear(int n, MatrixIndexer matrix, out int ip, out int jp, IList <int> position,
                                                    bool periodic, out bool reverse, out bool carryOver)
        {
            ip        = -1;
            jp        = -1;
            reverse   = false;
            carryOver = false;
            double distance = double.MaxValue;

            for (int i = 0; i < n; i++)
            {
                for (int j = 0; j < i; j++)
                {
                    if (ValidPositions(position[i], position[j], n, periodic, out bool p, out bool c))
                    {
                        if (matrix[i, j] < distance)
                        {
                            distance  = matrix[i, j];
                            ip        = i;
                            jp        = j;
                            reverse   = p;
                            carryOver = c;
                        }
                    }
                }
            }
            for (int i = 0; i < n; i++)
            {
                for (int j = 0; j < i; j++)
                {
                    if (matrix[i, j] < distance)
                    {
                        matrix[i, j] = (float)distance;
                    }
                }
            }
            return(distance);
        }
        private static double FindClosestPairDistance(int nmin, int nmax, MatrixIndexer matrix, out int ip, out int jp, double defaultDist)
        {
            ip = -1;
            jp = -1;
            double distance = double.MaxValue;

            for (int i = nmin; i < nmax; i++)
            {
                for (int j = 0; j < i; j++)
                {
                    if (matrix[i, j] < distance)
                    {
                        distance = matrix[i, j];
                        ip       = i;
                        jp       = j;
                    }
                }
            }
            if (distance == double.MaxValue)
            {
                return(defaultDist);
            }
            return(distance);
        }
    private MatrixIndexer FindFreeCell()
    {
        int foundRow = 0;
        int foundCol = 0;
        for (int row = 0; row < this.DimensionLength; row++)
        {
            for (int col = 0; col < this.DimensionLength; col++)
            {
                if (this.matrix[row, col] == 0)
                {
                    foundRow = row;
                    foundCol = col;
                    break;
                }
            }
        }

        MatrixIndexer foundMatrixCoords = new MatrixIndexer(foundRow, foundCol);
        return foundMatrixCoords;
    }
 /// <summary>
 /// Run K-medoid clustering.
 /// </summary>
 /// <param name="data">data matrix with n rows</param>
 /// <param name="distance"></param>
 /// <param name="k">number of clusters k &lt; n</param>
 /// <returns>Array of length n. <code>assignment[i]</code> returns the index of the cluster medoid in the data matrix.</returns>
 public static int[] GenerateClusters(MatrixIndexer data, IDistance distance, int k)
 {
     // return GenerateClusters(data, new GenericDistanceMatrix(data, distance), k); // TODO allow calling GenericDistanceMatrix without circular dependancy
     throw new NotImplementedException("Use GenericDistanceMatrix to convert IDistance to IDistanceMatrix");
 }
        public HierarchicalClusterNode[] TreeClusterKmeans(MatrixIndexer data, MatrixAccess access, IDistance distance,
                                                           HierarchicalClusterLinkage linkage, bool preserveOrder, bool periodic, int nthreads, int nmeans, int restarts,
                                                           int maxIter, Action <int> progress)
        {
            int nelements = (access == MatrixAccess.Rows) ? data.RowCount : data.ColumnCount;

            if (nelements <= nmeans)
            {
                return(TreeCluster(data, access, distance, linkage, preserveOrder, periodic, nthreads, progress));
            }
            float[,] c;
            int[] inds;
            if (access == MatrixAccess.Rows)
            {
                KmeansClustering.GenerateClusters(data, nmeans, maxIter, restarts, progress, out c, out inds);
            }
            else
            {
                KmeansClustering.GenerateClusters(data.Transpose(), nmeans, maxIter, restarts, progress, out c, out inds);
            }
            float[,] distMatrix = DistanceMatrix(new FloatMatrixIndexer(c), distance, MatrixAccess.Rows);
            HierarchicalClusterNode[] nodes = TreeCluster(distMatrix, linkage, preserveOrder, periodic, nthreads, progress);
            Dictionary <int, int[]>   clusters;
            Dictionary <int, int>     singletons;

            RearrangeClusters(inds, c.GetLength(0), out clusters, out singletons);
            HierarchicalClusterNode[] newNodes = new HierarchicalClusterNode[nelements - 1];
            int fill = nelements - nmeans;

            Array.Copy(nodes, 0, newNodes, fill, nodes.Length);
            int pos = 0;

            for (int i = fill; i < newNodes.Length; i++)
            {
                HierarchicalClusterNode node = newNodes[i];
                if (node.left < 0)
                {
                    node.left -= fill;
                }
                else if (singletons.ContainsKey(node.left))
                {
                    node.left = singletons[node.left];
                }
                else
                {
                    if (clusters.ContainsKey(node.left))
                    {
                        HierarchicalClusterNode[] branch = FillTerminalBranch(clusters[node.left], pos);
                        Array.Copy(branch, 0, newNodes, pos, branch.Length);
                        pos      += branch.Length;
                        node.left = -pos;
                    }
                }
                if (node.right < 0)
                {
                    node.right -= fill;
                }
                else if (singletons.ContainsKey(node.right))
                {
                    node.right = singletons[node.right];
                }
                else
                {
                    if (clusters.ContainsKey(node.right))
                    {
                        HierarchicalClusterNode[] branch = FillTerminalBranch(clusters[node.right], pos);
                        Array.Copy(branch, 0, newNodes, pos, branch.Length);
                        pos       += branch.Length;
                        node.right = -pos;
                    }
                }
            }
            return(newNodes);
        }
 private static BaseVector GetVector(MatrixIndexer data, int index, MatrixAccess access)
 {
     return(access == MatrixAccess.Rows ? data.GetRow(index) : data.GetColumn(index));
 }
 private static HierarchicalClusterNode[] AverageLinkageCluster(MatrixIndexer distMatrix, int nthreads, double defaultDist)
 {
     return(GenericLinkageCluster(distMatrix, nthreads, defaultDist, AverageLinkage));
 }
 private static HierarchicalClusterNode[] MaximumLinkageClusterLinear(MatrixIndexer matrix, bool periodic)
 {
     return(GenericLinkageClusterLinear(matrix, periodic, MaximumLinkage));
 }