private void Update(SparseMatrix <double> simMtx, SparseMatrix <double> clustMtxTr, int numClusters, int idx1, int idx2, ArrayList <Cluster> clusters,
                            IUnlabeledExampleCollection <SparseVector <double> > dataset, double damping)
        {
            Debug.Assert(idx1 < idx2);
            // create new parent
            Cluster c1     = clusters[idx1];
            Cluster c2     = clusters[idx2];
            Cluster parent = new Cluster();

            parent.Items.AddRange(c1.Items);
            parent.Items.AddRange(c2.Items);
            parent.ClusterInfo = Math.Max((int)c1.ClusterInfo, (int)c2.ClusterInfo) + 1;
            c1.Parent          = parent;
            c2.Parent          = parent;
            parent.AddChild(c1);
            parent.AddChild(c2);
            SparseVector <double> centroid = ModelUtils.ComputeCentroid(parent.Items, dataset, CentroidType.NrmL2);

            centroid = Trim(centroid, 1000, 0.8);
            // remove clusters
            clusters.RemoveAt(idx2);
            clusters.RemoveAt(idx1);
            // add new parent
            clusters.Add(parent);
            // remove rows at idx1 and idx2
            simMtx.PurgeRowAt(idx2);
            simMtx.PurgeRowAt(idx1);
            // remove cols at idx1 and idx2
            simMtx.PurgeColAt(idx2);
            simMtx.PurgeColAt(idx1);
            clustMtxTr.PurgeColAt(idx2);
            clustMtxTr.PurgeColAt(idx1);
            // update matrices
            numClusters -= 2;
            foreach (IdxDat <double> item in centroid)
            {
                if (clustMtxTr[item.Idx] == null)
                {
                    clustMtxTr[item.Idx] = new SparseVector <double>(new IdxDat <double>[] { new IdxDat <double>(numClusters, item.Dat) });
                }
                else
                {
                    clustMtxTr[item.Idx].InnerIdx.Add(numClusters);
                    clustMtxTr[item.Idx].InnerDat.Add(item.Dat);
                }
            }
            double[] simVec = ModelUtils.GetDotProductSimilarity(clustMtxTr, numClusters + 1, centroid);
            for (int i = 0; i < simVec.Length; i++)
            {
                simVec[i] *= Math.Pow(damping, (double)((int)parent.ClusterInfo + (int)clusters[i].ClusterInfo) / 2.0);
            }
            SparseMatrix <double> col = new SparseMatrix <double>();

            col[0] = new SparseVector <double>(simVec);
            simMtx.AppendCols(col.GetTransposedCopy(), numClusters);
        }
예제 #2
0
        public ClusteringResult Cluster(IUnlabeledExampleCollection <SparseVector <double> > dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count < 2 ? new ArgumentValueException("dataset") : null);
            ClusteringResult clusteringResult = new ClusteringResult();
            Queue <Cluster>  queue            = new Queue <Cluster>();
            // create root
            Cluster root = new Cluster();

            for (int i = 0; i < dataset.Count; i++)
            {
                Utils.ThrowException(dataset[i].Count == 0 ? new ArgumentValueException("dataset") : null);
                root.Items.Add(i);
            }
            clusteringResult.AddRoot(root);
            // add root to queue
            queue.Enqueue(root);
            while (queue.Count > 0)
            {
                // get next cluster
                Cluster cluster = queue.Dequeue();
                // compute cluster quality
                UnlabeledDataset <SparseVector <double> > localDataset = GetDatasetSubset(cluster.Items, dataset);
                SparseVector <double> centroid;
                double quality = GetClusterQuality(localDataset, out centroid);
                cluster.ClusterInfo = new Pair <SparseVector <double>, double>(centroid, quality);
                if (quality < mMinQuality)
                {
                    // split cluster, add children to queue
                    ClusteringResult localResult = mKMeansClustering.Cluster(localDataset);
                    for (int i = 0; i < 2; i++)
                    {
                        cluster.AddChild(localResult.Roots[i]);
                        localResult.Roots[i].Parent = cluster;
                        queue.Enqueue(localResult.Roots[i]);
                    }
                }
            }
            return(clusteringResult);
        }