Ejemplo n.º 1
0
        public ClusteringService()
        {
            var metric  = new RecordDissimilarityMetric();
            var linkage = new AverageLinkage <Record>(metric);

            algorithm = new AgglomerativeClusteringAlgorithm <Record>(linkage);
        }
Ejemplo n.º 2
0
        private static ClusteringResult <DataPoint> GetClustering()
        {
            // performs hierarchical clustering
            var linkage       = new AverageLinkage <DataPoint>(Metric);
            var clusteringAlg = new AgglomerativeClusteringAlgorithm <DataPoint>(linkage);

            return(clusteringAlg.GetClustering(ClusteringTests.DataPoints));
        }
Ejemplo n.º 3
0
        static void Main(string[] args)
        {
            var txgraphOptions = new ConnectionOptions()
            {
                DatabaseName = "txgraph", DatabaseType = ODatabaseType.Graph, HostName = "localhost", Password = "******", Port = 2424, UserName = "******"
            };
            //var data = new DataSourceProvider("dbaf14e1c476e76ea05a8b71921a46d6b06f0a950f17c5f9f1a03b8fae467f10", LimitType.DATE, 1);
            //var addresses = data.GetAddresses(txgraphOptions);
            //if(addresses.Count == 0)
            //{
            //    return;
            //}
            // DEBUG
            var addresses = ReadGold();

            // DEBUG
            Console.WriteLine($"{addresses.Count} addresses of interest will be processed...");

            var algoPipe = new Pipeline();

            algoPipe.Add(new TotalAmounts());
            algoPipe.Add(new Amounts());
            algoPipe.Add(new SocialNetwork());
            algoPipe.Add(new TimeSlots());
            algoPipe.Add(new Core.Clustering.FeatureExtractors.DayOfWeek());
            algoPipe.Add(new TransactionShape());
            algoPipe.Add(new CommonTimes());
            algoPipe.Add(new Heuristic1());
            algoPipe.Add(new Heuristic2());

            algoPipe.Process(txgraphOptions, addresses);
            // DEBUG
            Console.WriteLine("Processing of addresses done.");

            var metric    = new AddressDissimilarityMetric(algoPipe, addresses);
            var linkage   = new AverageLinkage <string>(metric);
            var algorithm = new AgglomerativeClusteringAlgorithm <string>(linkage);

            var clusteringResult = algorithm.GetClustering(new HashSet <string>(addresses));
            var index            = 0;

            foreach (var clusterSet in clusteringResult.OrderBy(x => x.Dissimilarity))
            {
                var sb = new StringBuilder();
                sb.AppendLine($"{clusterSet.Dissimilarity}");
                foreach (var cluster in clusterSet)
                {
                    sb.AppendLine(string.Join("\t", cluster));
                }
                Directory.CreateDirectory("report");
                File.WriteAllText(Path.Combine("report", $"{index++}.txt".Replace(",", "#")), sb.ToString());
            }
        }
Ejemplo n.º 4
0
        private void ClusteringWorkerDoWork(object sender, DoWorkEventArgs e)
        {
            // checks data points
            if (this._dataPoints == null || this._dataPoints.Count == 0)
            {
                return;
            }

            // selects linkage criterion
            ILinkageCriterion <DataPoint> linkage;
            var selectedIndex = e.Argument;

            switch (selectedIndex)
            {
            case 1:
                linkage = new CompleteLinkage <DataPoint>(this._dissimilarityMetric);
                break;

            case 2:
                linkage = new SingleLinkage <DataPoint>(this._dissimilarityMetric);
                break;

            case 3:
                linkage = new MinimumEnergyLinkage <DataPoint>(this._dissimilarityMetric);
                break;

            case 4:
                linkage = new CentroidLinkage <DataPoint>(this._dissimilarityMetric, DataPoint.GetMedoid);
                break;

            case 5:
                linkage = new WardsMinimumVarianceLinkage <DataPoint>(
                    this._dissimilarityMetric, DataPoint.GetMedoid);
                break;

            default:
                linkage = new AverageLinkage <DataPoint>(this._dissimilarityMetric);
                break;
            }

            // clusters data-points
            var clusteringAlg = new AgglomerativeClusteringAlgorithm <DataPoint>(linkage);

            this._clusteringResult = clusteringAlg.GetClustering(this._dataPoints);
        }
Ejemplo n.º 5
0
        private static void MelhorCluster(HashSet <DataPoint <int> > dataPoints)
        {
            //inicializa classes do algoritmo
            var metric    = new DissimilarityMetric <int>();
            var linkage   = new AverageLinkage <DataPoint <int> >(metric);
            var algorithm = new AgglomerativeClusteringAlgorithm <DataPoint <int> >(linkage);

            //enquanto tiverem entregas pra clusterizar
            while (dataPoints.Any())
            {
                //clusteriza
                var clusteringResult = algorithm.GetClustering(dataPoints);

                //printa a clusterização
                clusteringResult.Select(x =>
                {
                    Console.WriteLine(x);

                    return(0);
                }).ToArray();

                //maximo de itens por cluster
                var MAX_ITEMS = 7;

                //vai ser feito uma força bruta no número minimo de itens, começando igual ao MAX_ITEMS e decrementando até 1 (uma entrega)
                var minItems = MAX_ITEMS;

                //Variavel de retorno da operação
                Cluster <DataPoint <int> > bestCluster = null;

                //força bruta
                while (minItems > 0)
                {
                    //vai percorrendo a lista de clusters do ultimo ao primeiro
                    for (var i = clusteringResult.Count() - 1; i >= 0; i--)
                    {
                        //ordenando decrescente pela quantidade de entregas pega o primeiro q tem a mesma quantidade ou menos q o MAX_ITEMS
                        bestCluster = clusteringResult[i].
                                      OrderByDescending(x => x.Count()).
                                      FirstOrDefault(y => y.Count() <= MAX_ITEMS);

                        //se a quantidade de entregas nesse cluster for igual ao minimo q estamos decrescendo finaliza o processo
                        if (bestCluster?.Count() == minItems)
                        {
                            break;
                        }
                    }

                    //como são dois loops, verifica de novo e repete o break
                    if (bestCluster?.Count() == minItems)
                    {
                        break;
                    }

                    //decresce o minItens pra tentar de novo
                    minItems--;
                }

                //resultado dessa iteração
                Console.WriteLine(bestCluster);

                //remove as entregas já clusterizadas
                dataPoints.RemoveWhere(x => bestCluster.Any(y => y.id == x.id));

                Console.WriteLine();

                Console.WriteLine();

                //acalma o coração e repete até não ter mais entregas pra clusterizar
                Console.ReadLine();
            }
        }
Ejemplo n.º 6
0
        public static void AgglomerativeClustering(AllResults loaded)
        {
            var imgName = "241666.jpg"; //"159161.jpg";
            var imgId   = loaded.ImageEncoding[imgName];

            var relevantRows = loaded.Rows
                               //.Where(r => r.Query.ImageId == imgId)
                               .ToList();

            // cluster all of them together? Include query into dissimilarity function then

            // Or product by product, filter down to big elements, and offer to transitively load more and more?


            var metric    = new ResultsRowSetBasedDistance();
            var linkage   = new AverageLinkage <ResultsRow>(metric);
            var algorithm = new AgglomerativeClusteringAlgorithm <ResultsRow>(linkage);

            var clusters = algorithm.GetClustering(new HashSet <ResultsRow>(relevantRows));

            clusters.SaveToCsv(@"G:\siret\zoot\protobuf\clustertest.csv");
            //RenderData();

            var dummyResults = new AllResults {
                ImageEncoding = loaded.ImageEncoding, PatchEncoding = loaded.PatchEncoding
            };
            var clusterQueue = new Queue <Cluster <ResultsRow> >(new[] { clusters.SingleCluster });

            while (clusterQueue.Count > 0)
            {
                var item = clusterQueue.Dequeue();
                if (item.Dissimilarity <= 0.70 && item.Count < 50)
                {
                    dummyResults.Rows.Add(new ResultsRow
                    {
                        Query = item.First().Query,
                        Hits  = item.SelectMany(x => x.Hits)
                                .GroupBy(x => x.Hit)
                                .Select(x => new SearchHit {
                            Hit = x.Key, Distance = x.Min(y => y.Distance)
                        })
                                .Concat(item.Select(i => new SearchHit {
                            Hit = i.Query, Distance = -1
                        }))
                                .ToArray()
                    });
                }
                else
                {
                    clusterQueue.Enqueue(item.Parent1);
                    clusterQueue.Enqueue(item.Parent2);
                }
            }

            loaded.RefreshReferenceMap();
            foreach (var k in AllResults.ReferenceMap.Keys)
            {
                AllResults.ReferenceMap[k][k] = 1;
            }

            using (var sw = new StreamWriter(@"G:\siret\zoot\protobuf\clusteringTestMega.html", append: false))
            {
                dummyResults.Render(sw);
            }
        }
Ejemplo n.º 7
0
        private static void Main(string[] args)
        {
            var globalPerf = new PerformanceMeasure();

            globalPerf.Start();

            // loads points from csv file
            var dataSetFile = args.Length > 0 ? args[0] : DATASET_FILE;
            var filePath    = Path.GetFullPath(dataSetFile);

            Console.WriteLine($"Loading data-points from {filePath}...");
            var parser     = new CsvParser();
            var dataPoints = parser.Load(filePath);

            Console.WriteLine($"Clustering {dataPoints.Count} data-points...");

            // performs hierarchical clustering
            var clusterPerf = new PerformanceMeasure();

            clusterPerf.Start();
            var metric        = new DataPoint(); // Euclidean distance
            var linkage       = new AverageLinkage <DataPoint>(metric);
            var clusteringAlg = new AgglomerativeClusteringAlgorithm <DataPoint>(linkage);
            var clustering    = clusteringAlg.GetClustering(dataPoints);

            clusterPerf.Stop();

            Directory.CreateDirectory(Path.GetFullPath(RESULTS_PATH));
            clustering.SaveD3DendrogramFile(Path.GetFullPath(Path.Combine(RESULTS_PATH, "dendrogram.json")));
            clustering.SaveToCsv(Path.GetFullPath(Path.Combine(RESULTS_PATH, "clustering.csv")));
            Console.WriteLine($"Finished clustering: {clusterPerf}");

            // evaluates the clustering according to several criteria
            //CentroidFunction<DataPoint> centroidFunc = DataPoint.GetMedoid;
            CentroidFunction <DataPoint> centroidFunc = DataPoint.GetCentroid;
            var criteria =
                new Dictionary <string, IInternalEvaluationCriterion <DataPoint> >
            {
                { "Silhouette coefficient", new SilhouetteCoefficient <DataPoint>(metric) },
                { "Dunn index", new DunnIndex <DataPoint>(metric) },
                { "Davies-Bouldin index", new DaviesBouldinIndex <DataPoint>(metric, centroidFunc) },
                { "Calinski-Harabasz index", new CalinskiHarabaszIndex <DataPoint>(metric, centroidFunc) },
                { "Modified Gamma statistic", new ModifiedGammaStatistic <DataPoint>(metric, centroidFunc) },
                { "Xie-Beni index", new XieBeniIndex <DataPoint>(metric, centroidFunc) },
                { "Within-Between ratio", new WithinBetweenRatio <DataPoint>(metric, centroidFunc) },
                { "I-index", new IIndex <DataPoint>(metric, centroidFunc) },
                { "Xu index", new XuIndex <DataPoint>(metric, centroidFunc) }

                //{"RMSSD", new RootMeanSquareStdDev<DataPoint>(metric, centroidFunc)},
                //{"R-squared", new RSquared<DataPoint>(metric, centroidFunc)},
            };

            foreach (var criterion in criteria)
            {
                GetBestPartition(clustering, criterion.Value, criterion.Key);
            }

            globalPerf.Stop();
            Console.WriteLine($"\nFinished: {globalPerf}");
            Console.ReadKey();
        }