Exemple #1
0
        /// <summary>
        /// Attempts to add the <see cref="MNodeEntry{TValue}"/> to the specified node in the tree. If the node cannot be added the tree will create new nodes and rebalance.
        /// </summary>
        /// <param name="node">The node to add the new entry to.</param>
        /// <param name="newNodeEntry">The new node entry to add.</param>
        private void Add(MNode <int> node, MNodeEntry <int> newNodeEntry)
        {
            /*
             *  NOTE: The insertion, split, partition and promotion methods are quite complicated. If you are trying to understand this code you should
             *  really consider reading the original paper:
             *  P. Ciaccia, M. Patella, and P. Zezula. M-tree: an efficient access method for similarity search in metric spaces.
             */

            // If we are trying to insert into an internal node then we determine if the new entry resides in the ball of any
            // entry in the current node. If it does reside in a ball, then we recurse in to that entry's child node.
            // If the new entry does NOT reside in any of the balls then we picks the entry whose ball should expand the least to enclose
            // the new node entry.
            if (node.IsInternalNode)
            {
                // Anonymous type to store the entries and their distance from the new node entry.
                var entriesWithDistance = node.Entries.Select(n => new { Node = n, Distance = this.Metric(this[n.Value], this[newNodeEntry.Value]) }).ToArray();

                // This would be all the entries
                var ballsContainingEntry = entriesWithDistance.Where(d => d.Distance < d.Node.CoveringRadius).ToArray();
                MNodeEntry <int> closestEntry;

                if (ballsContainingEntry.Any())
                {
                    // New entry is currently in the region of a ball
                    closestEntry = ballsContainingEntry[ballsContainingEntry.Select(b => b.Distance).MinIndex()].Node;
                }
                else
                {
                    // The new element does not currently reside in any of the current regions balls.
                    // Since we are not in any of the balls we find which whose radius we must increase the least
                    var closestEntryIndex = entriesWithDistance.Select(d => d.Distance - d.Node.CoveringRadius).MinIndex();
                    closestEntry = entriesWithDistance[closestEntryIndex].Node;
                    closestEntry.CoveringRadius = entriesWithDistance[closestEntryIndex].Distance;
                }

                // Recurse into the closest elements subtree
                this.Add(closestEntry.ChildNode, newNodeEntry);
            }
            else
            {
                if (!node.IsFull)
                {
                    // Node is a leaf node. If the node is not full we simply add to that node.
                    if (node == this.Root)
                    {
                        node.Add(newNodeEntry);
                    }
                    else
                    {
                        newNodeEntry.DistanceFromParent = this.Metric(this.internalArray[node.ParentEntry.Value], this.internalArray[newNodeEntry.Value]);
                        node.Add(newNodeEntry);
                    }
                }
                else
                {
                    this.Split(node, newNodeEntry);
                }
            }
        }
Exemple #2
0
        // TODO: If we are willing to take a performance hit, we could abstract both the promote and partition methods
        // TODO: Some partition methods actually DEPEND on the partition method.

        /// <summary>
        /// Chooses two <see cref="MNodeEntry{T}"/>s to be promoted up the tree. The two nodes are chosen
        /// according to the mM_RAD split policy with balanced partitions defined in reference [1] pg. 431.
        /// </summary>
        /// <param name="entries">The entries for which two node will be choose from.</param>
        /// <param name="isInternalNode">Specifies if the <paramref name="entries"/> list parameter comes from an internal node.</param>
        /// <returns>The indexes of the element pairs which are the two objects to promote</returns>
        private PromotionResult <int> Promote(MNodeEntry <int>[] entries, bool isInternalNode)
        {
            var uniquePairs    = Utilities.UniquePairs(entries.Length);
            var distanceMatrix = new DistanceMatrix <T>(entries.Select(e => this.internalArray[e.Value]).ToArray(), this.Metric);
            // we only store the indexes of the pairs
            // var uniqueDistances = uniquePairs.Select(p => this.Metric(entries[p.Item1].Value, entries[p.Item2].Value)).Reverse().ToArray();

            /*
             *  2. mM_RAD Promotion an Balanced Partitioning
             *  Part of performing the mM_RAD promotion algorithm is
             *  implicitly calculating all possible partitions.
             *  For each pair of objects we calculate a balanced partition.
             *  The pair for which the maximum of the two covering radii is the smallest
             *  is the objects we promote.
             *  In the iterations below, every thing is index-based to keep it as fast as possible.
             */

            // The minimum values which will be traced through out the mM_RAD algorithm
            var minPair                 = new Tuple <int, int>(-1, -1);
            var minMaxRadius            = double.MaxValue;
            var minFirstPartition       = new List <int>();
            var minSecondPartition      = new List <int>();
            var minFirstPromotedObject  = new MNodeEntry <int>();
            var minSecondPromotedObject = new MNodeEntry <int>();

            // We iterate through each pair performing a balanced partition of the remaining points.
            foreach (var pair in uniquePairs)
            {
                // Get the indexes of the points not in the current pair
                var pointsNotInPair =
                    Enumerable.Range(0, entries.Length).Except(new[] { pair.Item1, pair.Item2 }).ToList();
                // TODO: Optimize

                var partitions           = this.BalancedPartition(pair, pointsNotInPair, distanceMatrix);
                var localFirstPartition  = partitions.Item1;
                var localSecondPartition = partitions.Item2;

                /*
                 *  As specified in reference [1] pg. 430. If we are splitting a leaf node,
                 *  then the covering radius of promoted object O_1 with partition P_1 is
                 *  coveringRadius_O_1 = max{ distance(O_1, O_i) | where O_i in P_1 }
                 *  If we are splitting an internal node then the covering radius
                 *  of promoted object O_1 with partition P_1 is
                 *  coveringRadius_O_1 = max{ distance(O_1, O_i) + CoveringRadius of O_i | where O_i in P_1 }
                 */

                var firstPromotedObjectCoveringRadius  = localFirstPartition.MaxDistanceFromFirst(distanceMatrix);
                var secondPromotedObjectCoveringRadius = localSecondPartition.MaxDistanceFromFirst(distanceMatrix);
                var localMinMaxRadius = Math.Max(
                    firstPromotedObjectCoveringRadius,
                    secondPromotedObjectCoveringRadius);
                if (isInternalNode)
                {
                    firstPromotedObjectCoveringRadius = this.CalculateCoveringRadius(
                        pair.Item1,
                        localFirstPartition,
                        distanceMatrix,
                        entries);

                    secondPromotedObjectCoveringRadius = this.CalculateCoveringRadius(
                        pair.Item2,
                        localSecondPartition,
                        distanceMatrix,
                        entries);
                }

                if (localMinMaxRadius < minMaxRadius)
                {
                    minMaxRadius = localMinMaxRadius;
                    minPair      = pair;

                    minFirstPromotedObject.CoveringRadius = firstPromotedObjectCoveringRadius;
                    minFirstPartition = localFirstPartition;

                    minSecondPromotedObject.CoveringRadius = secondPromotedObjectCoveringRadius;
                    minSecondPartition = localSecondPartition;
                }
            }

            /*
             *  3. Creating the MNodeEntry Objects
             *  Now that we have correctly identified the objects to be promoted an each partition
             *  we start setting and/or calculating some of the properties on the node entries
             */

            // set values of promoted objects
            var firstPartition  = new List <MNodeEntry <int> >();
            var secondPartition = new List <MNodeEntry <int> >();

            minFirstPromotedObject.Value  = entries[minPair.Item1].Value;
            minSecondPromotedObject.Value = entries[minPair.Item2].Value;

            // TODO: Set distance from parent in partition
            firstPartition.AddRange(entries.WithIndexes(minFirstPartition));
            for (int i = 0; i < firstPartition.Count; i++)
            {
                firstPartition[i].DistanceFromParent = distanceMatrix[minFirstPartition[0], minFirstPartition[i]];
            }

            secondPartition.AddRange(entries.WithIndexes(minSecondPartition));
            for (int i = 0; i < secondPartition.Count; i++)
            {
                secondPartition[i].DistanceFromParent = distanceMatrix[minSecondPartition[0], minSecondPartition[i]];
            }

            var promotionResult = new PromotionResult <int>
            {
                FirstPromotionObject  = minFirstPromotedObject,
                SecondPromotionObject = minSecondPromotedObject,
                FirstPartition        = firstPartition,
                SecondPartition       = secondPartition
            };

            // TODO: This method is called from the split method. In the split method we call both promote an partition in one method

            return(promotionResult);
        }
Exemple #3
0
        /// <summary>
        /// Splits a leaf node and adds the <paramref name="newEntry"/>
        /// </summary>
        /// <param name="node"></param>
        /// <param name="newEntry"></param>
        private void Split(MNode <int> node, MNodeEntry <int> newEntry)
        {
            var         nodeIsRoot       = node == this.Root;
            MNode <int> parent           = null;
            var         parentEntryIndex = -1;

            if (!nodeIsRoot)
            {
                // keep reference to parent node
                parent           = node.ParentEntry.EnclosingNode;
                parentEntryIndex = parent.Entries.IndexOf(node.ParentEntry);
                //if we are not the root, the get the parent of the current node.
            }

            // Create local copy of entries
            var entries = node.Entries.ToList();

            entries.Add(newEntry);

            var newNode = new MNode <int> {
                Capacity = this.Capacity
            };
            var promotionResult = this.Promote(entries.ToArray(), node.IsInternalNode);

            // TODO: Does not need to be an array
            node.Entries    = promotionResult.FirstPartition;
            newNode.Entries = promotionResult.SecondPartition;

            // Set child nodes of promotion objects
            promotionResult.FirstPromotionObject.ChildNode  = node;
            promotionResult.SecondPromotionObject.ChildNode = newNode;

            if (nodeIsRoot)
            {
                // if we are the root node, then create a new root and assign the promoted objects to them
                var newRoot = new MNode <int> {
                    ParentEntry = null, Capacity = this.Capacity
                };
                newRoot.AddRange(
                    new List <MNodeEntry <int> >
                {
                    promotionResult.FirstPromotionObject,
                    promotionResult.SecondPromotionObject
                });

                this.Root = newRoot;
            }
            else // we are not the root
            {
                // Set distance from parent
                if (parent == this.Root)
                {
                    promotionResult.FirstPromotionObject.DistanceFromParent = -1;
                }
                else
                {
                    promotionResult.FirstPromotionObject.DistanceFromParent =
                        this.Metric(this.internalArray[promotionResult.FirstPromotionObject.Value], this.internalArray[parent.ParentEntry.Value]);
                }

                parent.SetEntryAtIndex(parentEntryIndex, promotionResult.FirstPromotionObject);
                if (parent.IsFull)
                {
                    this.Split(parent, promotionResult.SecondPromotionObject);
                }
                else
                {
                    // Set distance from parent
                    if (parent == this.Root)
                    {
                        promotionResult.SecondPromotionObject.DistanceFromParent = -1;
                    }
                    else
                    {
                        promotionResult.SecondPromotionObject.DistanceFromParent =
                            this.Metric(this.internalArray[promotionResult.SecondPromotionObject.Value], this.internalArray[parent.ParentEntry.Value]);
                    }

                    parent.Add(promotionResult.SecondPromotionObject);
                }
            }
        }