Beispiel #1
0
        private async Task <List <Node> > GetLeafNodesAsync(List <IClusterableMatch> clusterableMatches, ConcurrentDictionary <int, float[]> matrix, int maxIndex, IDistanceMetric distanceMetric, ProgressData progressData)
        {
            var average = clusterableMatches.Average(match => match.Coords.Count());

            progressData.Reset($"Calculating coordinates for {clusterableMatches.Count} matches (average {average:N0} shared matches per match)...", clusterableMatches.Count);

            var leafNodes = await Task.Run(() =>
            {
                return(clusterableMatches
                       .Where(match => matrix.ContainsKey(match.Index))
                       .Select(match => new LeafNode(match.Index, matrix[match.Index], distanceMetric))
                       .ToList());
            });

            progressData.Reset($"Finding closest pairwise distances for {clusterableMatches.Count} matches (average {average:N0} shared matches per match)...", clusterableMatches.Count);

            var buckets = leafNodes
                          .SelectMany(leafNode => distanceMetric.SignficantCoordinates(leafNode.Coords).Select(coord => new { Coord = coord, LeafNode = leafNode }))
                          .GroupBy(pair => pair.Coord, pair => pair.LeafNode)
                          .ToDictionary(g => g.Key, g => g.ToList());

            var calculateNeighborsByDistanceTasks = leafNodes.Select(async leafNode =>
            {
                leafNode.NeighborsByDistance = await Task.Run(() => GetNeighborsByDistance(leafNode, buckets, distanceMetric));
                progressData.Increment();
            });

            await Task.WhenAll(calculateNeighborsByDistanceTasks);

            var result = leafNodes.Where(leafNode => leafNode.NeighborsByDistance.Count > 0).ToList <Node>();

            progressData.Reset();
            return(result);
        }
Beispiel #2
0
        private List <Neighbor> GetNeighborsByDistance(LeafNode leafNode, IDictionary <int, List <LeafNode> > buckets, IDistanceMetric distanceMetric)
        {
            var neighbors = distanceMetric.SignficantCoordinates(leafNode.Coords)
                            // Get every node with at least one shared match in common
                            .SelectMany(coord => buckets.TryGetValue(coord, out var bucket) ? bucket : Enumerable.Empty <LeafNode>())
                            // We only need one direction A -> B (not also B -> A) since we're ultimately going to look at the smallest distances.
                            .Where(neighborNode => neighborNode.Index > leafNode.Index)
                            // Make sure that each node is considered only once (might have been in more than one bucket if more than one shared match in common.
                            .Distinct()
                            .Select(neighborNode => new Neighbor(neighborNode, leafNode))
                            .OrderBy(neighbor => neighbor.DistanceSquared)
                            .ToList();

            return(neighbors);
        }