/// <summary> /// Attempts to add the <see cref="MNodeEntry{TValue}"/> to the specified node in the tree. If the node cannot be added the tree will create new nodes and rebalance. /// </summary> /// <param name="node">The node to add the new entry to.</param> /// <param name="newNodeEntry">The new node entry to add.</param> private void Add(MNode <int> node, MNodeEntry <int> newNodeEntry) { /* * NOTE: The insertion, split, partition and promotion methods are quite complicated. If you are trying to understand this code you should * really consider reading the original paper: * P. Ciaccia, M. Patella, and P. Zezula. M-tree: an efficient access method for similarity search in metric spaces. */ // If we are trying to insert into an internal node then we determine if the new entry resides in the ball of any // entry in the current node. If it does reside in a ball, then we recurse in to that entry's child node. // If the new entry does NOT reside in any of the balls then we picks the entry whose ball should expand the least to enclose // the new node entry. if (node.IsInternalNode) { // Anonymous type to store the entries and their distance from the new node entry. var entriesWithDistance = node.Entries.Select(n => new { Node = n, Distance = this.Metric(this[n.Value], this[newNodeEntry.Value]) }).ToArray(); // This would be all the entries var ballsContainingEntry = entriesWithDistance.Where(d => d.Distance < d.Node.CoveringRadius).ToArray(); MNodeEntry <int> closestEntry; if (ballsContainingEntry.Any()) { // New entry is currently in the region of a ball closestEntry = ballsContainingEntry[ballsContainingEntry.Select(b => b.Distance).MinIndex()].Node; } else { // The new element does not currently reside in any of the current regions balls. // Since we are not in any of the balls we find which whose radius we must increase the least var closestEntryIndex = entriesWithDistance.Select(d => d.Distance - d.Node.CoveringRadius).MinIndex(); closestEntry = entriesWithDistance[closestEntryIndex].Node; closestEntry.CoveringRadius = entriesWithDistance[closestEntryIndex].Distance; } // Recurse into the closest elements subtree this.Add(closestEntry.ChildNode, newNodeEntry); } else { if (!node.IsFull) { // Node is a leaf node. If the node is not full we simply add to that node. if (node == this.Root) { node.Add(newNodeEntry); } else { newNodeEntry.DistanceFromParent = this.Metric(this.internalArray[node.ParentEntry.Value], this.internalArray[newNodeEntry.Value]); node.Add(newNodeEntry); } } else { this.Split(node, newNodeEntry); } } }
// TODO: If we are willing to take a performance hit, we could abstract both the promote and partition methods // TODO: Some partition methods actually DEPEND on the partition method. /// <summary> /// Chooses two <see cref="MNodeEntry{T}"/>s to be promoted up the tree. The two nodes are chosen /// according to the mM_RAD split policy with balanced partitions defined in reference [1] pg. 431. /// </summary> /// <param name="entries">The entries for which two node will be choose from.</param> /// <param name="isInternalNode">Specifies if the <paramref name="entries"/> list parameter comes from an internal node.</param> /// <returns>The indexes of the element pairs which are the two objects to promote</returns> private PromotionResult <int> Promote(MNodeEntry <int>[] entries, bool isInternalNode) { var uniquePairs = Utilities.UniquePairs(entries.Length); var distanceMatrix = new DistanceMatrix <T>(entries.Select(e => this.internalArray[e.Value]).ToArray(), this.Metric); // we only store the indexes of the pairs // var uniqueDistances = uniquePairs.Select(p => this.Metric(entries[p.Item1].Value, entries[p.Item2].Value)).Reverse().ToArray(); /* * 2. mM_RAD Promotion an Balanced Partitioning * Part of performing the mM_RAD promotion algorithm is * implicitly calculating all possible partitions. * For each pair of objects we calculate a balanced partition. * The pair for which the maximum of the two covering radii is the smallest * is the objects we promote. * In the iterations below, every thing is index-based to keep it as fast as possible. */ // The minimum values which will be traced through out the mM_RAD algorithm var minPair = new Tuple <int, int>(-1, -1); var minMaxRadius = double.MaxValue; var minFirstPartition = new List <int>(); var minSecondPartition = new List <int>(); var minFirstPromotedObject = new MNodeEntry <int>(); var minSecondPromotedObject = new MNodeEntry <int>(); // We iterate through each pair performing a balanced partition of the remaining points. foreach (var pair in uniquePairs) { // Get the indexes of the points not in the current pair var pointsNotInPair = Enumerable.Range(0, entries.Length).Except(new[] { pair.Item1, pair.Item2 }).ToList(); // TODO: Optimize var partitions = this.BalancedPartition(pair, pointsNotInPair, distanceMatrix); var localFirstPartition = partitions.Item1; var localSecondPartition = partitions.Item2; /* * As specified in reference [1] pg. 430. If we are splitting a leaf node, * then the covering radius of promoted object O_1 with partition P_1 is * coveringRadius_O_1 = max{ distance(O_1, O_i) | where O_i in P_1 } * If we are splitting an internal node then the covering radius * of promoted object O_1 with partition P_1 is * coveringRadius_O_1 = max{ distance(O_1, O_i) + CoveringRadius of O_i | where O_i in P_1 } */ var firstPromotedObjectCoveringRadius = localFirstPartition.MaxDistanceFromFirst(distanceMatrix); var secondPromotedObjectCoveringRadius = localSecondPartition.MaxDistanceFromFirst(distanceMatrix); var localMinMaxRadius = Math.Max( firstPromotedObjectCoveringRadius, secondPromotedObjectCoveringRadius); if (isInternalNode) { firstPromotedObjectCoveringRadius = this.CalculateCoveringRadius( pair.Item1, localFirstPartition, distanceMatrix, entries); secondPromotedObjectCoveringRadius = this.CalculateCoveringRadius( pair.Item2, localSecondPartition, distanceMatrix, entries); } if (localMinMaxRadius < minMaxRadius) { minMaxRadius = localMinMaxRadius; minPair = pair; minFirstPromotedObject.CoveringRadius = firstPromotedObjectCoveringRadius; minFirstPartition = localFirstPartition; minSecondPromotedObject.CoveringRadius = secondPromotedObjectCoveringRadius; minSecondPartition = localSecondPartition; } } /* * 3. Creating the MNodeEntry Objects * Now that we have correctly identified the objects to be promoted an each partition * we start setting and/or calculating some of the properties on the node entries */ // set values of promoted objects var firstPartition = new List <MNodeEntry <int> >(); var secondPartition = new List <MNodeEntry <int> >(); minFirstPromotedObject.Value = entries[minPair.Item1].Value; minSecondPromotedObject.Value = entries[minPair.Item2].Value; // TODO: Set distance from parent in partition firstPartition.AddRange(entries.WithIndexes(minFirstPartition)); for (int i = 0; i < firstPartition.Count; i++) { firstPartition[i].DistanceFromParent = distanceMatrix[minFirstPartition[0], minFirstPartition[i]]; } secondPartition.AddRange(entries.WithIndexes(minSecondPartition)); for (int i = 0; i < secondPartition.Count; i++) { secondPartition[i].DistanceFromParent = distanceMatrix[minSecondPartition[0], minSecondPartition[i]]; } var promotionResult = new PromotionResult <int> { FirstPromotionObject = minFirstPromotedObject, SecondPromotionObject = minSecondPromotedObject, FirstPartition = firstPartition, SecondPartition = secondPartition }; // TODO: This method is called from the split method. In the split method we call both promote an partition in one method return(promotionResult); }
/// <summary> /// Splits a leaf node and adds the <paramref name="newEntry"/> /// </summary> /// <param name="node"></param> /// <param name="newEntry"></param> private void Split(MNode <int> node, MNodeEntry <int> newEntry) { var nodeIsRoot = node == this.Root; MNode <int> parent = null; var parentEntryIndex = -1; if (!nodeIsRoot) { // keep reference to parent node parent = node.ParentEntry.EnclosingNode; parentEntryIndex = parent.Entries.IndexOf(node.ParentEntry); //if we are not the root, the get the parent of the current node. } // Create local copy of entries var entries = node.Entries.ToList(); entries.Add(newEntry); var newNode = new MNode <int> { Capacity = this.Capacity }; var promotionResult = this.Promote(entries.ToArray(), node.IsInternalNode); // TODO: Does not need to be an array node.Entries = promotionResult.FirstPartition; newNode.Entries = promotionResult.SecondPartition; // Set child nodes of promotion objects promotionResult.FirstPromotionObject.ChildNode = node; promotionResult.SecondPromotionObject.ChildNode = newNode; if (nodeIsRoot) { // if we are the root node, then create a new root and assign the promoted objects to them var newRoot = new MNode <int> { ParentEntry = null, Capacity = this.Capacity }; newRoot.AddRange( new List <MNodeEntry <int> > { promotionResult.FirstPromotionObject, promotionResult.SecondPromotionObject }); this.Root = newRoot; } else // we are not the root { // Set distance from parent if (parent == this.Root) { promotionResult.FirstPromotionObject.DistanceFromParent = -1; } else { promotionResult.FirstPromotionObject.DistanceFromParent = this.Metric(this.internalArray[promotionResult.FirstPromotionObject.Value], this.internalArray[parent.ParentEntry.Value]); } parent.SetEntryAtIndex(parentEntryIndex, promotionResult.FirstPromotionObject); if (parent.IsFull) { this.Split(parent, promotionResult.SecondPromotionObject); } else { // Set distance from parent if (parent == this.Root) { promotionResult.SecondPromotionObject.DistanceFromParent = -1; } else { promotionResult.SecondPromotionObject.DistanceFromParent = this.Metric(this.internalArray[promotionResult.SecondPromotionObject.Value], this.internalArray[parent.ParentEntry.Value]); } parent.Add(promotionResult.SecondPromotionObject); } } }