private async Task <List <Node> > GetLeafNodesAsync(List <IClusterableMatch> clusterableMatches, ConcurrentDictionary <int, float[]> matrix, int maxIndex, IDistanceMetric distanceMetric, ProgressData progressData) { var average = clusterableMatches.Average(match => match.Coords.Count()); progressData.Reset($"Calculating coordinates for {clusterableMatches.Count} matches (average {average:N0} shared matches per match)...", clusterableMatches.Count); var leafNodes = await Task.Run(() => { return(clusterableMatches .Where(match => matrix.ContainsKey(match.Index)) .Select(match => new LeafNode(match.Index, matrix[match.Index], distanceMetric)) .ToList()); }); progressData.Reset($"Finding closest pairwise distances for {clusterableMatches.Count} matches (average {average:N0} shared matches per match)...", clusterableMatches.Count); var buckets = leafNodes .SelectMany(leafNode => distanceMetric.SignficantCoordinates(leafNode.Coords).Select(coord => new { Coord = coord, LeafNode = leafNode })) .GroupBy(pair => pair.Coord, pair => pair.LeafNode) .ToDictionary(g => g.Key, g => g.ToList()); var calculateNeighborsByDistanceTasks = leafNodes.Select(async leafNode => { leafNode.NeighborsByDistance = await Task.Run(() => GetNeighborsByDistance(leafNode, buckets, distanceMetric)); progressData.Increment(); }); await Task.WhenAll(calculateNeighborsByDistanceTasks); var result = leafNodes.Where(leafNode => leafNode.NeighborsByDistance.Count > 0).ToList <Node>(); progressData.Reset(); return(result); }
private List <Neighbor> GetNeighborsByDistance(LeafNode leafNode, IDictionary <int, List <LeafNode> > buckets, IDistanceMetric distanceMetric) { var neighbors = distanceMetric.SignficantCoordinates(leafNode.Coords) // Get every node with at least one shared match in common .SelectMany(coord => buckets.TryGetValue(coord, out var bucket) ? bucket : Enumerable.Empty <LeafNode>()) // We only need one direction A -> B (not also B -> A) since we're ultimately going to look at the smallest distances. .Where(neighborNode => neighborNode.Index > leafNode.Index) // Make sure that each node is considered only once (might have been in more than one bucket if more than one shared match in common. .Distinct() .Select(neighborNode => new Neighbor(neighborNode, leafNode)) .OrderBy(neighbor => neighbor.DistanceSquared) .ToList(); return(neighbors); }