public ClusteringService() { var metric = new RecordDissimilarityMetric(); var linkage = new AverageLinkage <Record>(metric); algorithm = new AgglomerativeClusteringAlgorithm <Record>(linkage); }
private static ClusteringResult <DataPoint> GetClustering() { // performs hierarchical clustering var linkage = new AverageLinkage <DataPoint>(Metric); var clusteringAlg = new AgglomerativeClusteringAlgorithm <DataPoint>(linkage); return(clusteringAlg.GetClustering(ClusteringTests.DataPoints)); }
static void Main(string[] args) { var txgraphOptions = new ConnectionOptions() { DatabaseName = "txgraph", DatabaseType = ODatabaseType.Graph, HostName = "localhost", Password = "******", Port = 2424, UserName = "******" }; //var data = new DataSourceProvider("dbaf14e1c476e76ea05a8b71921a46d6b06f0a950f17c5f9f1a03b8fae467f10", LimitType.DATE, 1); //var addresses = data.GetAddresses(txgraphOptions); //if(addresses.Count == 0) //{ // return; //} // DEBUG var addresses = ReadGold(); // DEBUG Console.WriteLine($"{addresses.Count} addresses of interest will be processed..."); var algoPipe = new Pipeline(); algoPipe.Add(new TotalAmounts()); algoPipe.Add(new Amounts()); algoPipe.Add(new SocialNetwork()); algoPipe.Add(new TimeSlots()); algoPipe.Add(new Core.Clustering.FeatureExtractors.DayOfWeek()); algoPipe.Add(new TransactionShape()); algoPipe.Add(new CommonTimes()); algoPipe.Add(new Heuristic1()); algoPipe.Add(new Heuristic2()); algoPipe.Process(txgraphOptions, addresses); // DEBUG Console.WriteLine("Processing of addresses done."); var metric = new AddressDissimilarityMetric(algoPipe, addresses); var linkage = new AverageLinkage <string>(metric); var algorithm = new AgglomerativeClusteringAlgorithm <string>(linkage); var clusteringResult = algorithm.GetClustering(new HashSet <string>(addresses)); var index = 0; foreach (var clusterSet in clusteringResult.OrderBy(x => x.Dissimilarity)) { var sb = new StringBuilder(); sb.AppendLine($"{clusterSet.Dissimilarity}"); foreach (var cluster in clusterSet) { sb.AppendLine(string.Join("\t", cluster)); } Directory.CreateDirectory("report"); File.WriteAllText(Path.Combine("report", $"{index++}.txt".Replace(",", "#")), sb.ToString()); } }
private void ClusteringWorkerDoWork(object sender, DoWorkEventArgs e) { // checks data points if (this._dataPoints == null || this._dataPoints.Count == 0) { return; } // selects linkage criterion ILinkageCriterion <DataPoint> linkage; var selectedIndex = e.Argument; switch (selectedIndex) { case 1: linkage = new CompleteLinkage <DataPoint>(this._dissimilarityMetric); break; case 2: linkage = new SingleLinkage <DataPoint>(this._dissimilarityMetric); break; case 3: linkage = new MinimumEnergyLinkage <DataPoint>(this._dissimilarityMetric); break; case 4: linkage = new CentroidLinkage <DataPoint>(this._dissimilarityMetric, DataPoint.GetMedoid); break; case 5: linkage = new WardsMinimumVarianceLinkage <DataPoint>( this._dissimilarityMetric, DataPoint.GetMedoid); break; default: linkage = new AverageLinkage <DataPoint>(this._dissimilarityMetric); break; } // clusters data-points var clusteringAlg = new AgglomerativeClusteringAlgorithm <DataPoint>(linkage); this._clusteringResult = clusteringAlg.GetClustering(this._dataPoints); }
private static void MelhorCluster(HashSet <DataPoint <int> > dataPoints) { //inicializa classes do algoritmo var metric = new DissimilarityMetric <int>(); var linkage = new AverageLinkage <DataPoint <int> >(metric); var algorithm = new AgglomerativeClusteringAlgorithm <DataPoint <int> >(linkage); //enquanto tiverem entregas pra clusterizar while (dataPoints.Any()) { //clusteriza var clusteringResult = algorithm.GetClustering(dataPoints); //printa a clusterização clusteringResult.Select(x => { Console.WriteLine(x); return(0); }).ToArray(); //maximo de itens por cluster var MAX_ITEMS = 7; //vai ser feito uma força bruta no número minimo de itens, começando igual ao MAX_ITEMS e decrementando até 1 (uma entrega) var minItems = MAX_ITEMS; //Variavel de retorno da operação Cluster <DataPoint <int> > bestCluster = null; //força bruta while (minItems > 0) { //vai percorrendo a lista de clusters do ultimo ao primeiro for (var i = clusteringResult.Count() - 1; i >= 0; i--) { //ordenando decrescente pela quantidade de entregas pega o primeiro q tem a mesma quantidade ou menos q o MAX_ITEMS bestCluster = clusteringResult[i]. OrderByDescending(x => x.Count()). FirstOrDefault(y => y.Count() <= MAX_ITEMS); //se a quantidade de entregas nesse cluster for igual ao minimo q estamos decrescendo finaliza o processo if (bestCluster?.Count() == minItems) { break; } } //como são dois loops, verifica de novo e repete o break if (bestCluster?.Count() == minItems) { break; } //decresce o minItens pra tentar de novo minItems--; } //resultado dessa iteração Console.WriteLine(bestCluster); //remove as entregas já clusterizadas dataPoints.RemoveWhere(x => bestCluster.Any(y => y.id == x.id)); Console.WriteLine(); Console.WriteLine(); //acalma o coração e repete até não ter mais entregas pra clusterizar Console.ReadLine(); } }
public static void AgglomerativeClustering(AllResults loaded) { var imgName = "241666.jpg"; //"159161.jpg"; var imgId = loaded.ImageEncoding[imgName]; var relevantRows = loaded.Rows //.Where(r => r.Query.ImageId == imgId) .ToList(); // cluster all of them together? Include query into dissimilarity function then // Or product by product, filter down to big elements, and offer to transitively load more and more? var metric = new ResultsRowSetBasedDistance(); var linkage = new AverageLinkage <ResultsRow>(metric); var algorithm = new AgglomerativeClusteringAlgorithm <ResultsRow>(linkage); var clusters = algorithm.GetClustering(new HashSet <ResultsRow>(relevantRows)); clusters.SaveToCsv(@"G:\siret\zoot\protobuf\clustertest.csv"); //RenderData(); var dummyResults = new AllResults { ImageEncoding = loaded.ImageEncoding, PatchEncoding = loaded.PatchEncoding }; var clusterQueue = new Queue <Cluster <ResultsRow> >(new[] { clusters.SingleCluster }); while (clusterQueue.Count > 0) { var item = clusterQueue.Dequeue(); if (item.Dissimilarity <= 0.70 && item.Count < 50) { dummyResults.Rows.Add(new ResultsRow { Query = item.First().Query, Hits = item.SelectMany(x => x.Hits) .GroupBy(x => x.Hit) .Select(x => new SearchHit { Hit = x.Key, Distance = x.Min(y => y.Distance) }) .Concat(item.Select(i => new SearchHit { Hit = i.Query, Distance = -1 })) .ToArray() }); } else { clusterQueue.Enqueue(item.Parent1); clusterQueue.Enqueue(item.Parent2); } } loaded.RefreshReferenceMap(); foreach (var k in AllResults.ReferenceMap.Keys) { AllResults.ReferenceMap[k][k] = 1; } using (var sw = new StreamWriter(@"G:\siret\zoot\protobuf\clusteringTestMega.html", append: false)) { dummyResults.Render(sw); } }
private static void Main(string[] args) { var globalPerf = new PerformanceMeasure(); globalPerf.Start(); // loads points from csv file var dataSetFile = args.Length > 0 ? args[0] : DATASET_FILE; var filePath = Path.GetFullPath(dataSetFile); Console.WriteLine($"Loading data-points from {filePath}..."); var parser = new CsvParser(); var dataPoints = parser.Load(filePath); Console.WriteLine($"Clustering {dataPoints.Count} data-points..."); // performs hierarchical clustering var clusterPerf = new PerformanceMeasure(); clusterPerf.Start(); var metric = new DataPoint(); // Euclidean distance var linkage = new AverageLinkage <DataPoint>(metric); var clusteringAlg = new AgglomerativeClusteringAlgorithm <DataPoint>(linkage); var clustering = clusteringAlg.GetClustering(dataPoints); clusterPerf.Stop(); Directory.CreateDirectory(Path.GetFullPath(RESULTS_PATH)); clustering.SaveD3DendrogramFile(Path.GetFullPath(Path.Combine(RESULTS_PATH, "dendrogram.json"))); clustering.SaveToCsv(Path.GetFullPath(Path.Combine(RESULTS_PATH, "clustering.csv"))); Console.WriteLine($"Finished clustering: {clusterPerf}"); // evaluates the clustering according to several criteria //CentroidFunction<DataPoint> centroidFunc = DataPoint.GetMedoid; CentroidFunction <DataPoint> centroidFunc = DataPoint.GetCentroid; var criteria = new Dictionary <string, IInternalEvaluationCriterion <DataPoint> > { { "Silhouette coefficient", new SilhouetteCoefficient <DataPoint>(metric) }, { "Dunn index", new DunnIndex <DataPoint>(metric) }, { "Davies-Bouldin index", new DaviesBouldinIndex <DataPoint>(metric, centroidFunc) }, { "Calinski-Harabasz index", new CalinskiHarabaszIndex <DataPoint>(metric, centroidFunc) }, { "Modified Gamma statistic", new ModifiedGammaStatistic <DataPoint>(metric, centroidFunc) }, { "Xie-Beni index", new XieBeniIndex <DataPoint>(metric, centroidFunc) }, { "Within-Between ratio", new WithinBetweenRatio <DataPoint>(metric, centroidFunc) }, { "I-index", new IIndex <DataPoint>(metric, centroidFunc) }, { "Xu index", new XuIndex <DataPoint>(metric, centroidFunc) } //{"RMSSD", new RootMeanSquareStdDev<DataPoint>(metric, centroidFunc)}, //{"R-squared", new RSquared<DataPoint>(metric, centroidFunc)}, }; foreach (var criterion in criteria) { GetBestPartition(clustering, criterion.Value, criterion.Key); } globalPerf.Stop(); Console.WriteLine($"\nFinished: {globalPerf}"); Console.ReadKey(); }