Exemplo n.º 1
0
        private Dictionary <string, Dictionary <PatternRule, List <PatternMiningCoordinates> > > FindClusterCentroids(
            Dictionary <string, int> domainCounts,
            Dictionary <string, Dictionary <PatternRule, int> > ruleCoordOccsForDomains,
            Dictionary <string, Dictionary <PatternRule, Dictionary <Coord, int> > > domainsToRulesToCoordinateCounts,
            double clusterThresholdKm,
            int minItemsPerCluster,
            double minSupportRatioPerCluster)
        {
            var domainsToRulesToCentroids = new Dictionary <string, Dictionary <PatternRule, List <PatternMiningCoordinates> > >();

            var totalDomains       = domainsToRulesToCoordinateCounts.Count;
            var currentDomainCount = 0;

            foreach (var domainsToRulesToCoordinateCountsEntry in domainsToRulesToCoordinateCounts)
            {
                currentDomainCount++;
                var domain = domainsToRulesToCoordinateCountsEntry.Key;
                var rulesToCoordinateCounts = domainsToRulesToCoordinateCountsEntry.Value;

                var ruleCoordOccsForDomain = ruleCoordOccsForDomains[domain];

                var totalRulesForDomain = rulesToCoordinateCounts.Count();

                Dictionary <PatternRule, List <PatternMiningCoordinates> > rulesToCentroids = null;

                var currentRuleCount = 0;

                foreach (var rulesToCoordinateCountsEntry in rulesToCoordinateCounts)
                {
                    currentRuleCount++;
                    var rule = rulesToCoordinateCountsEntry.Key;
                    List <PatternMiningCoordinates> centroidsForRule = null;

                    Console.WriteLine($"{currentDomainCount}/{totalDomains} - {currentRuleCount}/{totalRulesForDomain} - Finding clusters for domain {domain} and rule {rule}");

                    var coordinateToCounts        = rulesToCoordinateCountsEntry.Value;
                    var originalCoordinatesOccSum = ruleCoordOccsForDomain[rule];

                    var coordinatesWithOcc = new HashSet <CoordWithOcc>(coordinateToCounts.Select(x => new CoordWithOcc(x.Key, x.Value)));

                    var clustering = new QTClustering <CoordWithOcc>(
                        distanceHelper: new CoordWithOccDistanceHelper(),
                        clusterDiameter: clusterThresholdKm,
                        itemsSet: coordinatesWithOcc);

                    Cluster <CoordWithOcc> cluster = null;

                    do
                    {
                        cluster = clustering.NextCluster();

                        if (cluster == null)
                        {
                            break;
                        }

                        var clusterMembersSum = cluster.Members.Sum(c => c.Occurrences);

                        if (clusterMembersSum < minItemsPerCluster)
                        {
                            break;
                        }

                        var supportRatio = clusterMembersSum / ((1.0d) * originalCoordinatesOccSum);

                        if (supportRatio < minSupportRatioPerCluster)
                        {
                            break;
                        }

                        if (rulesToCentroids == null)
                        {
                            rulesToCentroids = this.RetrieveRulesToCentroidsList(domainsToRulesToCentroids, domain);
                        }

                        if (centroidsForRule == null)
                        {
                            centroidsForRule = this.RetrieveCentroidsList(rulesToCentroids, rule);
                        }

                        var centroidCoordinates = new PatternMiningCoordinates()
                        {
                            Latitude   = cluster.Centroid.Coord.Latitude,
                            Longitude  = cluster.Centroid.Coord.Longitude,
                            Confidence = supportRatio
                        };

                        centroidsForRule.Add(centroidCoordinates);

                        Console.WriteLine($"                                                Found centroid: {centroidCoordinates}");

                        coordinatesWithOcc.ExceptWith(cluster.Members);

                        var maxRemainingSupportRatio = coordinatesWithOcc.Sum(c => c.Occurrences) / ((1.0d) * originalCoordinatesOccSum);

                        if (maxRemainingSupportRatio < minSupportRatioPerCluster)
                        {
                            break;
                        }
                    }while (cluster != null);
                }
            }

            return(domainsToRulesToCentroids);
        }
        private Dictionary <string, Dictionary <PatternRule, List <PatternMiningCoordinates> > > FindClusterCentroids(
            Dictionary <string, int> domainCounts,
            Dictionary <string, Dictionary <PatternRule, int> > ruleCountsForDomains,
            Dictionary <string, Dictionary <PatternRule, HashSet <PatternMiningCoordinates> > > domainsToRulesToCoordinates,
            double clusterThresholdKm,
            int minItemsPerCluster,
            double minSupportRatioPerCluster)
        {
            var domainsToRulesToCentroids = new Dictionary <string, Dictionary <PatternRule, List <PatternMiningCoordinates> > >();

            foreach (var domainsToRulesToCoordinatesEntry in domainsToRulesToCoordinates)
            {
                var domain             = domainsToRulesToCoordinatesEntry.Key;
                var rulesToCoordinates = domainsToRulesToCoordinatesEntry.Value;

                Dictionary <PatternRule, List <PatternMiningCoordinates> > rulesToCentroids = null;

                foreach (var rulesToCoordinatesEntry in rulesToCoordinates)
                {
                    var rule = rulesToCoordinatesEntry.Key;
                    List <PatternMiningCoordinates> centroidsForRule = null;

                    var coordinates = rulesToCoordinatesEntry.Value;
                    var originalCoordinatesCount = coordinates.Count;

                    var clustering = new QTClustering <PatternMiningCoordinates>(
                        distanceHelper: new PatternMiningCoordinatesDistanceHelper(),
                        clusterDiameter: clusterThresholdKm,
                        itemsSet: coordinates);

                    Cluster <PatternMiningCoordinates> cluster = null;

                    do
                    {
                        cluster = clustering.NextCluster();

                        if (cluster == null)
                        {
                            break;
                        }

                        if (cluster.Members.Count < minItemsPerCluster)
                        {
                            break;
                        }

                        var supportRatio = cluster.Members.Count / ((1.0d) * originalCoordinatesCount);

                        if (supportRatio < minSupportRatioPerCluster)
                        {
                            break;
                        }

                        if (rulesToCentroids == null)
                        {
                            rulesToCentroids = this.RetrieveRulesToCentroidsList(domainsToRulesToCentroids, domain);
                        }

                        if (centroidsForRule == null)
                        {
                            centroidsForRule = this.RetrieveCentroidsList(rulesToCentroids, rule);
                        }

                        centroidsForRule.Add(new PatternMiningCoordinates()
                        {
                            Latitude   = cluster.Centroid.Latitude,
                            Longitude  = cluster.Centroid.Longitude,
                            Confidence = supportRatio
                        });

                        coordinates.ExceptWith(cluster.Members);

                        var maxRemainingSupportRatio = coordinates.Count / ((1.0d) * originalCoordinatesCount);

                        if (maxRemainingSupportRatio < minSupportRatioPerCluster)
                        {
                            break;
                        }
                    }while (cluster != null);
                }
            }

            return(domainsToRulesToCentroids);
        }