public VIC(Dataset dataset, IEnumerable <ISupervisedClassifier> supervisedClassifiers, IUnsupervisedClassifier clusteringAlgorithm) { OriginalDataset = dataset; ClusteredDataset = null; SupervisedClassifiers = supervisedClassifiers; ClusteringAlgorithm = clusteringAlgorithm; }
public Dataset GetClusteredDataset(Dataset dataset, IUnsupervisedClassifier clusteringAlgorithm) { if (dataset == null) { throw new ArgumentNullException("An initial dataset is required"); } Header header = new Header($"Clustered {dataset.Header.RelationName}", dataset.Header.Features); IEnumerable <Instance> instances = dataset.Instances; instances.ToList().ForEach(i => i[dataset.Header.ClassFeature] = null); Dataset clusteredDataset = new Dataset(header, instances.ToArray()); Feature classFeature = clusteredDataset.Header.ClassFeature; List <Feature> features = clusteredDataset.Header.Features.ToList(); features.Remove(clusteredDataset.Header.ClassFeature); clusteringAlgorithm.Initialize(new Dictionary <string, object> { { "k", ((NominalFeature)classFeature.Type).Values.Count } }); clusteringAlgorithm.Train(clusteredDataset.GetColumns(features), clusteredDataset.GetClasses()); int[] clusteringResults = clusteringAlgorithm.Classify(clusteredDataset.GetColumns(features)); for (int i = 0; i < clusteringResults.Length; i++) { clusteredDataset.Instances[i][classFeature] = clusteringResults[i]; } return(clusteredDataset); }