예제 #1
0
        public List <PatternMiningCoordinates> RetrieveCentroidsList(
            Dictionary <PatternRule, List <PatternMiningCoordinates> > rulesToCentroids,
            PatternRule rule)
        {
            List <PatternMiningCoordinates> centroids;

            if (!rulesToCentroids.TryGetValue(rule, out centroids))
            {
                centroids = new List <PatternMiningCoordinates>();
                rulesToCentroids[rule] = centroids;
            }

            return(centroids);
        }
예제 #2
0
        private PatternRule DetermineRuleToDelete(PatternRule rule1, PatternRule rule2)
        {
            if (rule1.Atoms.Count > rule2.Atoms.Count)
            {
                return(rule2);
            }
            else if (rule1.Atoms.Count == rule2.Atoms.Count)
            {
                var rule1AtomsLength = 0;
                rule1.Atoms.ForEach(a => rule1AtomsLength += a.Substring.Length);

                var rule2AtomsLength = 0;
                rule2.Atoms.ForEach(a => rule2AtomsLength += a.Substring.Length);

                if (rule1AtomsLength > rule2AtomsLength)
                {
                    return(rule2);
                }
                if (rule1AtomsLength == rule2AtomsLength)
                {
                    var rule1RTLCount = 0;
                    rule1.Atoms.ForEach(a => rule1RTLCount += a.IndexType == IndexType.RTL ? 1 : 0);

                    var rule2RTLCount = 0;
                    rule2.Atoms.ForEach(a => rule2RTLCount += a.IndexType == IndexType.RTL ? 1 : 0);

                    if (rule1RTLCount >= rule2RTLCount)
                    {
                        return(rule2);
                    }

                    return(rule1);
                }
                else
                {
                    return(rule1);
                }
            }
            else
            {
                return(rule1);
            }
        }
        private ClassificationResult PickBestByPattern(string hostname, List <ClassificationResult> results)
        {
            var splitResults = HostnameSplitter.Split(hostname);

            if (splitResults == null || splitResults.DomainInfo?.RegistrableDomain == null || splitResults.SubdomainParts == null || splitResults.SubdomainParts.Count == 0)
            {
                return(null);
            }

            var domain = splitResults.DomainInfo.RegistrableDomain;

            Dictionary <PatternRule, PatternMiningCoordinates> rulesToCoordinates;

            if (!this.reducedRules.TryGetValue(domain, out rulesToCoordinates))
            {
                return(null);
            }

            var subdomainParts = splitResults.SubdomainParts;

            if (subdomainParts == null || subdomainParts.Count == 0)
            {
                return(null);
            }

            var ruleAtoms = this.miner.CreateRuleAtoms(subdomainParts);

            if (ruleAtoms == null || ruleAtoms.Count == 0)
            {
                return(null);
            }

            var rules = this.miner.GeneratePossibleRules(ruleAtoms);

            if (rules == null || rules.Count == 0)
            {
                return(null);
            }

            var filteredRulesToCoordinates = new Dictionary <PatternRule, PatternMiningCoordinates>();

            foreach (var rule in rules)
            {
                PatternMiningCoordinates coordinates;

                if (rulesToCoordinates.TryGetValue(rule, out coordinates))
                {
                    filteredRulesToCoordinates[rule] = coordinates;
                }
            }

            ClassificationResult closestResult = null;
            double      smallestDistanceKm     = int.MaxValue;
            PatternRule bestRule = null;

            foreach (var result in results)
            {
                if (result.City != null)
                {
                    foreach (var entry in filteredRulesToCoordinates)
                    {
                        var rule        = entry.Key;
                        var coordinates = entry.Value;

                        var distance = DistanceHelper.Distance(result.City.Latitude, result.City.Longitude, coordinates.Latitude, coordinates.Longitude, DistanceUnit.Kilometer);

                        if (distance < smallestDistanceKm)
                        {
                            closestResult      = result;
                            smallestDistanceKm = distance;
                            bestRule           = rule;
                        }
                    }
                }
            }

            if (closestResult != null && smallestDistanceKm <= this.distanceThresholdKm)
            {
                return(closestResult);
            }

            return(null);
        }