/// <summary> /// Format a point as a delimited string record, without the terminating newline. /// </summary> /// <returns>The record.</returns> /// <param name="point">Point to format.</param> /// <param name="fieldDelimiter">Field delimiter.</param> string PointToRecord(UnsignedPoint point, string fieldDelimiter = ",") { var category = FinalClassification.GetClassLabel(point); var id = InputDataIds[point]; var sb = new StringBuilder(); sb.Append(id).Append(fieldDelimiter).Append(category); foreach (var coordinate in point.LazyCoordinates()) { sb.Append(fieldDelimiter).Append(coordinate); } return(sb.ToString()); }
/// <summary> /// Apply Density-based reclassification to the FinalClassification. /// This may cause some clusters to be split into smaller clusters. /// It will not cause any existing clusters to be merged. /// </summary> void ReclassifyByDensity() { // 0. Decide if we will be doing this or not, based on the configuration. if (!Configuration.DensityClassifier.SkipDensityClassification) { Timer.Start("Reclassify by density"); var numberOfClustersSplit = 0; // 1. Loop through all clusters in FinalClassification // We will be modifying FinalClassification while iterating over it, // so we need to copy the list of labels up front. var classLabels = FinalClassification.ClassLabels().ToList(); foreach (var clusterId in classLabels) { // 2. Decide if the cluster needs reclustering. if (NeedsReclustering(clusterId)) { // 3. Obtain the members of the cluster and index them by the Hilbert curve var pointsToClassify = FinalClassification.PointsInClass(clusterId); var lookupPointById = new Dictionary <int, UnsignedPoint>(); foreach (var p in pointsToClassify) { lookupPointById[p.UniqueId] = p; } int labelCounter = 1; var subClassification = new Classification <UnsignedPoint, string>(pointsToClassify, p => (labelCounter++).ToString()); var hIndex = new HilbertIndex(subClassification, Configuration.Index.BitsPerDimension); // 4. Create a DensityClassifier, properly configured. var unmergeableSize = (int)(pointsToClassify.Count * Configuration.DensityClassifier.UnmergeableSizeFraction); var densityClassifier = new DensityClassifier(hIndex, MergeSquareDistance, unmergeableSize) { NeighborhoodRadiusMultiplier = Configuration.DensityClassifier.NeighborhoodRadiusMultiplier, OutlierSize = Configuration.DensityClassifier.OutlierSize, MergeableShrinkage = Configuration.DensityClassifier.MergeableShrinkage }; // 5. Reclassify. // This classification is in terms of HilbertPoints, so afterwards we will need to map them to // their non-HilbertPoint, original UnsignedPoints. var densityClassification = densityClassifier.Classify(); // 6. If the number of clusters made from the points is more than one... if (densityClassification.NumPartitions > 1) { numberOfClustersSplit++; // 7. ... loop through all HilbertPoints from cluster and find corresponding UnsignedPoints. foreach (var hPoint in densityClassification.Points()) { var uPoint = lookupPointById[hPoint.UniqueId]; // Form the new class label by appending the previous label and the density-based label. var previousClassLabel = FinalClassification.GetClassLabel(uPoint); var densityClassLabel = densityClassification.GetClassLabel(hPoint); var newClassLabel = $"{previousClassLabel}-{densityClassLabel}"; // 8. Pull point from its current cluster and add it to a new cluster FinalClassification.Remove(uPoint); FinalClassification.Add(uPoint, newClassLabel); } } } } Timer.Stop("Reclassify by density"); Logger.Info($"Clusters split due to density-based reclassification: {numberOfClustersSplit}"); } }