public List <PatternMiningCoordinates> RetrieveCentroidsList( Dictionary <PatternRule, List <PatternMiningCoordinates> > rulesToCentroids, PatternRule rule) { List <PatternMiningCoordinates> centroids; if (!rulesToCentroids.TryGetValue(rule, out centroids)) { centroids = new List <PatternMiningCoordinates>(); rulesToCentroids[rule] = centroids; } return(centroids); }
private PatternRule DetermineRuleToDelete(PatternRule rule1, PatternRule rule2) { if (rule1.Atoms.Count > rule2.Atoms.Count) { return(rule2); } else if (rule1.Atoms.Count == rule2.Atoms.Count) { var rule1AtomsLength = 0; rule1.Atoms.ForEach(a => rule1AtomsLength += a.Substring.Length); var rule2AtomsLength = 0; rule2.Atoms.ForEach(a => rule2AtomsLength += a.Substring.Length); if (rule1AtomsLength > rule2AtomsLength) { return(rule2); } if (rule1AtomsLength == rule2AtomsLength) { var rule1RTLCount = 0; rule1.Atoms.ForEach(a => rule1RTLCount += a.IndexType == IndexType.RTL ? 1 : 0); var rule2RTLCount = 0; rule2.Atoms.ForEach(a => rule2RTLCount += a.IndexType == IndexType.RTL ? 1 : 0); if (rule1RTLCount >= rule2RTLCount) { return(rule2); } return(rule1); } else { return(rule1); } } else { return(rule1); } }
private ClassificationResult PickBestByPattern(string hostname, List <ClassificationResult> results) { var splitResults = HostnameSplitter.Split(hostname); if (splitResults == null || splitResults.DomainInfo?.RegistrableDomain == null || splitResults.SubdomainParts == null || splitResults.SubdomainParts.Count == 0) { return(null); } var domain = splitResults.DomainInfo.RegistrableDomain; Dictionary <PatternRule, PatternMiningCoordinates> rulesToCoordinates; if (!this.reducedRules.TryGetValue(domain, out rulesToCoordinates)) { return(null); } var subdomainParts = splitResults.SubdomainParts; if (subdomainParts == null || subdomainParts.Count == 0) { return(null); } var ruleAtoms = this.miner.CreateRuleAtoms(subdomainParts); if (ruleAtoms == null || ruleAtoms.Count == 0) { return(null); } var rules = this.miner.GeneratePossibleRules(ruleAtoms); if (rules == null || rules.Count == 0) { return(null); } var filteredRulesToCoordinates = new Dictionary <PatternRule, PatternMiningCoordinates>(); foreach (var rule in rules) { PatternMiningCoordinates coordinates; if (rulesToCoordinates.TryGetValue(rule, out coordinates)) { filteredRulesToCoordinates[rule] = coordinates; } } ClassificationResult closestResult = null; double smallestDistanceKm = int.MaxValue; PatternRule bestRule = null; foreach (var result in results) { if (result.City != null) { foreach (var entry in filteredRulesToCoordinates) { var rule = entry.Key; var coordinates = entry.Value; var distance = DistanceHelper.Distance(result.City.Latitude, result.City.Longitude, coordinates.Latitude, coordinates.Longitude, DistanceUnit.Kilometer); if (distance < smallestDistanceKm) { closestResult = result; smallestDistanceKm = distance; bestRule = rule; } } } } if (closestResult != null && smallestDistanceKm <= this.distanceThresholdKm) { return(closestResult); } return(null); }