public ClusteringResult Cluster(IUnlabeledExampleCollection <SparseVector <double> > dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count < NumLeaves ? new ArgumentValueException("dataset") : null); ClusteringResult clusters = mKMeansClustering.Cluster(dataset); UnlabeledDataset <SparseVector <double> > centroids = new UnlabeledDataset <SparseVector <double> >(); foreach (Cluster cluster in clusters.Roots) { SparseVector <double> centroid = ModelUtils.ComputeCentroid(cluster.Items, dataset, CentroidType.NrmL2); centroids.Add(centroid); centroid = Trim(centroid, 1000, 0.8); cluster.ClusterInfo = 1; // cluster level } SparseMatrix <double> simMtx = ModelUtils.GetDotProductSimilarity(centroids, /*thresh=*/ 0, /*fullMatrix=*/ false); SparseMatrix <double> clustMtxTr = ModelUtils.GetTransposedMatrix(centroids); int iter = 1; while (clusters.Roots.Count > 1) { Console.WriteLine("Iteration {0} ...", iter++); int idx1, idx2; FindMaxSim(simMtx, out idx1, out idx2); Update(simMtx, clustMtxTr, clusters.Roots.Count, idx1, idx2, clusters.Roots.Inner, dataset, /*damping=*/ 0.9); Console.WriteLine(simMtx.ToString("E0.00")); Console.WriteLine(); } return(clusters); }
private UnlabeledDataset <SparseVector <double> > GetDatasetSubset(IEnumerable <int> items, IUnlabeledExampleCollection <SparseVector <double> > dataset) { UnlabeledDataset <SparseVector <double> > datasetSubset = new UnlabeledDataset <SparseVector <double> >(); foreach (int item in items) { datasetSubset.Add(dataset[item]); } return(datasetSubset); }
public static IUnlabeledExampleCollection <ExT> ConvertToUnlabeledDataset <LblT, ExT>(ILabeledExampleCollection <LblT, ExT> dataset) { UnlabeledDataset <ExT> unlabeledDataset = new UnlabeledDataset <ExT>(); foreach (LabeledExample <LblT, ExT> labeledExample in dataset) { unlabeledDataset.Add(labeledExample.Example); } return(unlabeledDataset); }
public static UnlabeledDataset <ExT> ConvertToUnlabeledDataset <LblT, ExT>(ILabeledExampleCollection <LblT, ExT> dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); UnlabeledDataset <ExT> unlabeledDataset = new UnlabeledDataset <ExT>(); foreach (LabeledExample <LblT, ExT> labeledExample in dataset) { unlabeledDataset.Add(labeledExample.Example); } return(unlabeledDataset); }