private Dictionary <string, Dictionary <PatternRule, List <PatternMiningCoordinates> > > FindClusterCentroids( Dictionary <string, int> domainCounts, Dictionary <string, Dictionary <PatternRule, int> > ruleCoordOccsForDomains, Dictionary <string, Dictionary <PatternRule, Dictionary <Coord, int> > > domainsToRulesToCoordinateCounts, double clusterThresholdKm, int minItemsPerCluster, double minSupportRatioPerCluster) { var domainsToRulesToCentroids = new Dictionary <string, Dictionary <PatternRule, List <PatternMiningCoordinates> > >(); var totalDomains = domainsToRulesToCoordinateCounts.Count; var currentDomainCount = 0; foreach (var domainsToRulesToCoordinateCountsEntry in domainsToRulesToCoordinateCounts) { currentDomainCount++; var domain = domainsToRulesToCoordinateCountsEntry.Key; var rulesToCoordinateCounts = domainsToRulesToCoordinateCountsEntry.Value; var ruleCoordOccsForDomain = ruleCoordOccsForDomains[domain]; var totalRulesForDomain = rulesToCoordinateCounts.Count(); Dictionary <PatternRule, List <PatternMiningCoordinates> > rulesToCentroids = null; var currentRuleCount = 0; foreach (var rulesToCoordinateCountsEntry in rulesToCoordinateCounts) { currentRuleCount++; var rule = rulesToCoordinateCountsEntry.Key; List <PatternMiningCoordinates> centroidsForRule = null; Console.WriteLine($"{currentDomainCount}/{totalDomains} - {currentRuleCount}/{totalRulesForDomain} - Finding clusters for domain {domain} and rule {rule}"); var coordinateToCounts = rulesToCoordinateCountsEntry.Value; var originalCoordinatesOccSum = ruleCoordOccsForDomain[rule]; var coordinatesWithOcc = new HashSet <CoordWithOcc>(coordinateToCounts.Select(x => new CoordWithOcc(x.Key, x.Value))); var clustering = new QTClustering <CoordWithOcc>( distanceHelper: new CoordWithOccDistanceHelper(), clusterDiameter: clusterThresholdKm, itemsSet: coordinatesWithOcc); Cluster <CoordWithOcc> cluster = null; do { cluster = clustering.NextCluster(); if (cluster == null) { break; } var clusterMembersSum = cluster.Members.Sum(c => c.Occurrences); if (clusterMembersSum < minItemsPerCluster) { break; } var supportRatio = clusterMembersSum / ((1.0d) * originalCoordinatesOccSum); if (supportRatio < minSupportRatioPerCluster) { break; } if (rulesToCentroids == null) { rulesToCentroids = this.RetrieveRulesToCentroidsList(domainsToRulesToCentroids, domain); } if (centroidsForRule == null) { centroidsForRule = this.RetrieveCentroidsList(rulesToCentroids, rule); } var centroidCoordinates = new PatternMiningCoordinates() { Latitude = cluster.Centroid.Coord.Latitude, Longitude = cluster.Centroid.Coord.Longitude, Confidence = supportRatio }; centroidsForRule.Add(centroidCoordinates); Console.WriteLine($" Found centroid: {centroidCoordinates}"); coordinatesWithOcc.ExceptWith(cluster.Members); var maxRemainingSupportRatio = coordinatesWithOcc.Sum(c => c.Occurrences) / ((1.0d) * originalCoordinatesOccSum); if (maxRemainingSupportRatio < minSupportRatioPerCluster) { break; } }while (cluster != null); } } return(domainsToRulesToCentroids); }
private Dictionary <string, Dictionary <PatternRule, List <PatternMiningCoordinates> > > FindClusterCentroids( Dictionary <string, int> domainCounts, Dictionary <string, Dictionary <PatternRule, int> > ruleCountsForDomains, Dictionary <string, Dictionary <PatternRule, HashSet <PatternMiningCoordinates> > > domainsToRulesToCoordinates, double clusterThresholdKm, int minItemsPerCluster, double minSupportRatioPerCluster) { var domainsToRulesToCentroids = new Dictionary <string, Dictionary <PatternRule, List <PatternMiningCoordinates> > >(); foreach (var domainsToRulesToCoordinatesEntry in domainsToRulesToCoordinates) { var domain = domainsToRulesToCoordinatesEntry.Key; var rulesToCoordinates = domainsToRulesToCoordinatesEntry.Value; Dictionary <PatternRule, List <PatternMiningCoordinates> > rulesToCentroids = null; foreach (var rulesToCoordinatesEntry in rulesToCoordinates) { var rule = rulesToCoordinatesEntry.Key; List <PatternMiningCoordinates> centroidsForRule = null; var coordinates = rulesToCoordinatesEntry.Value; var originalCoordinatesCount = coordinates.Count; var clustering = new QTClustering <PatternMiningCoordinates>( distanceHelper: new PatternMiningCoordinatesDistanceHelper(), clusterDiameter: clusterThresholdKm, itemsSet: coordinates); Cluster <PatternMiningCoordinates> cluster = null; do { cluster = clustering.NextCluster(); if (cluster == null) { break; } if (cluster.Members.Count < minItemsPerCluster) { break; } var supportRatio = cluster.Members.Count / ((1.0d) * originalCoordinatesCount); if (supportRatio < minSupportRatioPerCluster) { break; } if (rulesToCentroids == null) { rulesToCentroids = this.RetrieveRulesToCentroidsList(domainsToRulesToCentroids, domain); } if (centroidsForRule == null) { centroidsForRule = this.RetrieveCentroidsList(rulesToCentroids, rule); } centroidsForRule.Add(new PatternMiningCoordinates() { Latitude = cluster.Centroid.Latitude, Longitude = cluster.Centroid.Longitude, Confidence = supportRatio }); coordinates.ExceptWith(cluster.Members); var maxRemainingSupportRatio = coordinates.Count / ((1.0d) * originalCoordinatesCount); if (maxRemainingSupportRatio < minSupportRatioPerCluster) { break; } }while (cluster != null); } } return(domainsToRulesToCentroids); }