private void Update(SparseMatrix <double> simMtx, SparseMatrix <double> clustMtxTr, int numClusters, int idx1, int idx2, ArrayList <Cluster> clusters, IUnlabeledExampleCollection <SparseVector <double> > dataset, double damping) { Debug.Assert(idx1 < idx2); // create new parent Cluster c1 = clusters[idx1]; Cluster c2 = clusters[idx2]; Cluster parent = new Cluster(); parent.Items.AddRange(c1.Items); parent.Items.AddRange(c2.Items); parent.ClusterInfo = Math.Max((int)c1.ClusterInfo, (int)c2.ClusterInfo) + 1; c1.Parent = parent; c2.Parent = parent; parent.AddChild(c1); parent.AddChild(c2); SparseVector <double> centroid = ModelUtils.ComputeCentroid(parent.Items, dataset, CentroidType.NrmL2); centroid = Trim(centroid, 1000, 0.8); // remove clusters clusters.RemoveAt(idx2); clusters.RemoveAt(idx1); // add new parent clusters.Add(parent); // remove rows at idx1 and idx2 simMtx.PurgeRowAt(idx2); simMtx.PurgeRowAt(idx1); // remove cols at idx1 and idx2 simMtx.PurgeColAt(idx2); simMtx.PurgeColAt(idx1); clustMtxTr.PurgeColAt(idx2); clustMtxTr.PurgeColAt(idx1); // update matrices numClusters -= 2; foreach (IdxDat <double> item in centroid) { if (clustMtxTr[item.Idx] == null) { clustMtxTr[item.Idx] = new SparseVector <double>(new IdxDat <double>[] { new IdxDat <double>(numClusters, item.Dat) }); } else { clustMtxTr[item.Idx].InnerIdx.Add(numClusters); clustMtxTr[item.Idx].InnerDat.Add(item.Dat); } } double[] simVec = ModelUtils.GetDotProductSimilarity(clustMtxTr, numClusters + 1, centroid); for (int i = 0; i < simVec.Length; i++) { simVec[i] *= Math.Pow(damping, (double)((int)parent.ClusterInfo + (int)clusters[i].ClusterInfo) / 2.0); } SparseMatrix <double> col = new SparseMatrix <double>(); col[0] = new SparseVector <double>(simVec); simMtx.AppendCols(col.GetTransposedCopy(), numClusters); }
public ClusteringResult Cluster(IUnlabeledExampleCollection <SparseVector <double> > dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count < 2 ? new ArgumentValueException("dataset") : null); ClusteringResult clusteringResult = new ClusteringResult(); Queue <Cluster> queue = new Queue <Cluster>(); // create root Cluster root = new Cluster(); for (int i = 0; i < dataset.Count; i++) { Utils.ThrowException(dataset[i].Count == 0 ? new ArgumentValueException("dataset") : null); root.Items.Add(i); } clusteringResult.AddRoot(root); // add root to queue queue.Enqueue(root); while (queue.Count > 0) { // get next cluster Cluster cluster = queue.Dequeue(); // compute cluster quality UnlabeledDataset <SparseVector <double> > localDataset = GetDatasetSubset(cluster.Items, dataset); SparseVector <double> centroid; double quality = GetClusterQuality(localDataset, out centroid); cluster.ClusterInfo = new Pair <SparseVector <double>, double>(centroid, quality); if (quality < mMinQuality) { // split cluster, add children to queue ClusteringResult localResult = mKMeansClustering.Cluster(localDataset); for (int i = 0; i < 2; i++) { cluster.AddChild(localResult.Roots[i]); localResult.Roots[i].Parent = cluster; queue.Enqueue(localResult.Roots[i]); } } } return(clusteringResult); }