public Dictionary <string, Dictionary <PatternRule, List <PatternMiningCoordinates> > > MinePatternsFromGT(
            GroundTruthParser datasetParser,
            string inPath,
            int minRuleOcc,
            double clusterThresholdKm,
            int minItemsPerCluster,
            double minSupportRatioPerCluster,
            int pruneIntervalCount    = 10000,
            int pruneMinKeepThreshold = 10)
        {
            // Example:               frontiernet.net  435463
            var domainCounts = new Dictionary <string, int>();

            // Keys example:                     frontiernet.net         wlfr|rtl1   79
            var ruleCountsForDomains = new Dictionary <string, Dictionary <PatternRule, int> >();

            // Keys example:                            frontiernet.net         wlfr|rtl1           X,Y (coordinates)
            var domainsToRulesToCoordinates = new Dictionary <string, Dictionary <PatternRule, HashSet <PatternMiningCoordinates> > >();

            /*
             * // Keys example:             frontiernet.net       wlfr|rtl1           drkh7        15
             * var rulesGeohashCounts = new Dictionary<string, Dictionary<PatternRule, Dictionary<string, int>>>();
             */

            var processCount = 0;

            foreach (var gtItem in datasetParser.Parse(inPath, populateTextualLocationInfo: true))
            {
                var hostname     = gtItem.Hostname;
                var splitResults = HostnameSplitter.Split(hostname);

                if (splitResults == null || splitResults.DomainInfo?.RegistrableDomain == null || splitResults.SubdomainParts == null || splitResults.SubdomainParts.Count == 0)
                {
                    continue;
                }

                processCount++;

                if (processCount % 100000 == 0)
                {
                    Console.WriteLine(processCount);
                }

                var domain         = splitResults.DomainInfo.RegistrableDomain;
                var subdomainParts = splitResults.SubdomainParts;

                var ruleAtoms = this.CreateRuleAtoms(subdomainParts);
                var rules     = this.GeneratePossibleRules(ruleAtoms);

                this.AddRulesCoordinatesToDomain(domainCounts, ruleCountsForDomains, domainsToRulesToCoordinates, domain, rules, gtItem, pruneIntervalCount, pruneMinKeepThreshold);
            }

            this.DeleteRulesBelowOccThreshold(domainCounts, ruleCountsForDomains, domainsToRulesToCoordinates, minRuleOcc);
            this.DeleteEquivalentRules(ruleCountsForDomains, domainsToRulesToCoordinates);

            var domainsToRulesToCentroids = this.FindClusterCentroids(domainCounts, ruleCountsForDomains, domainsToRulesToCoordinates, clusterThresholdKm, minItemsPerCluster, minSupportRatioPerCluster);

            return(domainsToRulesToCentroids);
        }
        public PatternMiningResult MineCommonStringGeohashesFromGT(
            GroundTruthParser datasetParser,
            string inPath,
            int pruneIntervalCount    = 10000,
            int pruneMinKeepThreshold = 10)
        {
            /*
             * Conceptual example for: static-32-213-114-101.wlfr.ct.frontiernet.net
             * key: frontiernet.net
             * value:
             *      key: wlfr|rtl1   (it means the string "wlfr, located at right-to-left index 1")
             *      value:
             *          key: drkh7   (geohash with precision 5 -> +/- 2.4 km)
             *          value: 100   (we found it 100 times in the dataset for this key)
             *
             */

            // Keys example:             frontiernet.net       wlfr|rtl1           drkh7        15
            var rulesGeohashCounts = new Dictionary <string, Dictionary <PatternRule, Dictionary <string, int> > >();

            // Keys example:                     frontiernet.net         wlfr|rtl1   79
            var rulesCounts = new Dictionary <string, Dictionary <PatternRule, int> >();

            // Example:               frontiernet.net  435463
            var domainCounts = new Dictionary <string, int>();

            var processCount = 0;

            foreach (var item in datasetParser.Parse(inPath, populateTextualLocationInfo: true))
            {
                var hostname     = item.Hostname;
                var splitResults = HostnameSplitter.Split(hostname);

                if (splitResults == null || splitResults.DomainInfo?.RegistrableDomain == null || splitResults.SubdomainParts == null || splitResults.SubdomainParts.Count == 0)
                {
                    continue;
                }

                processCount++;

                if (processCount % 100000 == 0)
                {
                    Console.WriteLine(processCount);
                }

                var domain         = splitResults.DomainInfo.RegistrableDomain;
                var subdomainParts = splitResults.SubdomainParts;

                var rulesToGeohashCounts = this.AddRetrieveDomainToRulesGeohashCounts(rulesGeohashCounts, domain);
                var rulesToCounts        = this.AddRetrieveDomainToRulesCounts(rulesCounts, domain);

                /*
                 * Geohash Precision:
                 #   km
                 #  1   ±2500
                 #  2   ±630
                 #  3   ±78
                 #  4   ±20
                 #  5   ±2.4
                 #  6   ±0.61
                 #  7   ±0.076
                 #  8   ±0.019
                 */

                var geohashes = new HashSet <string>();
                //geohashes.Add(GeoHash.Encode(item.Latitude, item.Longitude, numberOfChars: 2)); // 2 = ±630km
                //geohashes.Add(GeoHash.Encode(item.Latitude, item.Longitude, numberOfChars: 3)); // 3 = ±78km
                geohashes.Add(GeoHash.Encode(item.Latitude, item.Longitude, numberOfChars: 4)); // 4 = ±20 km

                var ruleAtoms = this.CreateRuleAtoms(subdomainParts);
                var rules     = this.GeneratePossibleRules(ruleAtoms);

                this.IncrementGeohashCounts(rulesToGeohashCounts, rulesToCounts, geohashes, rules);

                var domainOcc = this.IncrementOccurrences(domainCounts, domain);

                if (domainOcc % pruneIntervalCount == 0)
                {
                    this.PruneCounts(rulesToGeohashCounts, rulesToCounts, minKeepThreshold: pruneMinKeepThreshold);
                }
            }

            return(new PatternMiningResult()
            {
                RulesGeohashCounts = rulesGeohashCounts,
                RulesCounts = rulesCounts
            });
        }