internal BPlusTreeNode <TKey, TValue> SplitChild(BPlusTreeNode <TKey, TValue> originalChild, out int keywordIndex)
        {
            // Split the current node into two nodes:
            //   a. Left node contains all pointers/keywords to the left of the median
            //   b. Right node contains all pointers/keywords to the right of the median
            //   c. Parent node gets median keyword pushed into it
            //
            // Disk-wise: the originalChild is always written back to its original position (since we're splitting it, it's guaranteed to get smaller)
            //            the newChild is placed into either an available abandoned node or tacked on the end of the file.
            //            the parent node is written back to its original position IF IT FITS.  Since we're coyping a keyword
            //               up into the parent node, it may cause it to outgrow its allocated size in the file.  If this occurs,
            //               the call to this.Write() implicitly calls relocate().  relocate() will then either write the parent to
            //               an abandoned node or tack it onto the end of the file.



            // first, determine the offset of the current originalchild in this (the parent)
            keywordIndex = 0;

            try {
                BPlusTree <TKey, TValue> .LogInfo("Begin splitting node=" + this.ToString() + ", child=" + originalChild.ToString());

                if (ChildLocations.Count > 0)
                {
                    // TODO: troubleshoot this -- after several thousand splits, originalChild.Location is no longer found in the ChildLocations array!!!
                    //if (LastChildLocation == originalChild.Location) {
                    //    // rightmost child, on a full node.
                    //} else {
                    while (keywordIndex < ChildLocations.Count && ChildLocations[keywordIndex] != originalChild.Location)
                    {
                        keywordIndex++;
                    }
                    //}
                }

                // assign the median keyword to the parent's Keywords list (inserts just before the current one)
                //if (keywordIndex > this.Keywords.Count) {
                //    // rightmost keyword, add to the end of the list
                //    this.Keywords.Add(originalChild.MedianKeyword);
                //} else {
                // not rightmost keyword, insert it at the proper position
                this.Keywords.Insert(keywordIndex, originalChild.MedianKeyword);
                //}

                if (originalChild.IsLeaf)
                {
                    // since this is a leaf, we will copy up the median keyword to the parent (as opposed to push up for index nodes)

                    // create a leaf node
                    BPlusTreeLeafNode <TKey, TValue> origNode = (BPlusTreeLeafNode <TKey, TValue>)originalChild;
                    BPlusTreeLeafNode <TKey, TValue> newNode  = new BPlusTreeLeafNode <TKey, TValue>(this.Tree, (BPlusTreeIndexNode <TKey, TValue>) this, null);

                    // copy right half of keywords + values from originalChild to newChild
                    newNode.Keywords.AddRange(origNode.Keywords.Skip(this.Tree.Median - 1));
                    newNode.Values.AddRange(origNode.Values.Skip(this.Tree.Median - 1));

                    // remove right half of keywords + values from originalChild
                    origNode.Keywords = origNode.Keywords.Take(this.Tree.Median - 1).ToList();
                    origNode.Values   = origNode.Values.Take(this.Tree.Median - 1).ToList();

                    var nextKeywordIndex = keywordIndex + 1;
                    Tree.Lock(delegate() {
                        // write out nodes so sibling locations can be set properly

                        // note: orig node should never relocate on write since we're making it smaller (moving 1/2 of its keywords to a new node)
                        if (origNode.Write())
                        {
                            BPlusTree <TKey, TValue> .LogInfo("orig leaf node relocated during split child: " + origNode.ToString());
                        }
                        if (newNode.Write())
                        {
                            BPlusTree <TKey, TValue> .LogInfo("new leaf node relocated during split child: " + newNode.ToString());
                        }

                        // adjust sibling locations (we always add the new child to the right of the original one)
                        newNode.RightSiblingLocation = origNode.RightSiblingLocation;
                        //				newNode.LeftSiblingLocation = origNode.Location;

                        origNode.RightSiblingLocation = newNode.Location;

                        ChildLocations.Insert(nextKeywordIndex, newNode.Location);

                        // write them out again so we save sibling locations
                        if (origNode.Write())
                        {
                            BPlusTree <TKey, TValue> .LogInfo("orig leaf node relocated on 2nd write during split child: " + origNode.ToString());
                        }
                        if (newNode.Write())
                        {
                            BPlusTree <TKey, TValue> .LogInfo("new leaf node relocated on 2nd write during split child: " + newNode.ToString());
                        }

                        // and write out the parent since it changed as well
                        if (Write())
                        {
                            BPlusTree <TKey, TValue> .LogInfo("parent of leaf node relocated during split child: " + this.ToString());
                        }
                    });

                    return(newNode);
                }
                else
                {
                    // since this is an index node, we push up the keyword to the parent (as opposed to copy up for leaves)
                    // this means the median keyword will be removed from the original child but not moved to the new child

                    // create an index node
                    BPlusTreeIndexNode <TKey, TValue> origNode = (BPlusTreeIndexNode <TKey, TValue>)originalChild;
                    BPlusTreeIndexNode <TKey, TValue> newNode  = new BPlusTreeIndexNode <TKey, TValue>(this.Tree, (BPlusTreeIndexNode <TKey, TValue>) this, null);

                    // copy right half of keywords + childlocations from originalChild to newChild
                    // (since this is an index node, skip the median keyword
                    newNode.Keywords.AddRange(origNode.Keywords.Skip(this.Tree.Median));
                    newNode.ChildLocations.AddRange(origNode.ChildLocations.Skip(this.Tree.Median));

                    // remove right half of keywords + childlocations from originalChild
                    // (note we stop one shy of the median keyword -- this is because the median keyword needs to be "pushed up"
                    //  on an index node split.  However, we don't drop the corresponding ChildLocation)
                    origNode.Keywords       = origNode.Keywords.Take(this.Tree.Median - 1).ToList();
                    origNode.ChildLocations = origNode.ChildLocations.Take(this.Tree.Median).ToList();

                    var ki = keywordIndex;
                    Tree.Lock(delegate() {
                        if (origNode.Write())
                        {
                            BPlusTree <TKey, TValue> .LogInfo("orig index node relocated during split child: " + origNode.ToString());
                        }
                        if (newNode.Write())
                        {
                            BPlusTree <TKey, TValue> .LogInfo("new index node relocated during split child: " + newNode.ToString());
                        }

                        // original node may have relocated on write...
                        ChildLocations[ki] = origNode.Location;

                        ChildLocations.Insert(ki + 1, newNode.Location);

                        // and write out the parent since it changed as well
                        if (Write())
                        {
                            BPlusTree <TKey, TValue> .LogInfo("parent of index node relocated during split child: " + this.ToString());
                        }
                    });

                    return(newNode);
                }
            } finally {
                BPlusTree <TKey, TValue> .LogInfo("End splitting node=" + this.ToString() + ", child=" + originalChild.ToString());
            }
        }
Пример #2
0
        internal BPlusTreeNode SplitChild(int childOffset, BPlusTreeNode originalChild, BPlusTreeNode grandParent)
        {
            // Split the current node into two nodes:
            //   a. Left node contains all pointers/keywords to the left of the median
            //   b. Right node contains all pointers/keywords to the right of the median
            //   c. Parent node gets median keyword pushed into it
            //
            // Disk-wise: the originalChild is always written back to its original position.
            //            the newChild is placed into either an available abandoned node or tacked on the end of the file.
            //            the parent node is written back to its original position IF IT FITS.  Since we're coyping a keyword
            //               up into the parent node, it may cause it to outgrow its allocated size in the file.  If this occurs,
            //               the call to this.Write() implicitly calls relocate().  relocate() will then either write the parent to
            //               an abandoned node or tack it onto the end of the file.

            _tree.Log("------------ BEGIN Splitting node ------------");
            _tree.Log("BEGIN Splitting node parent -> ".PadRight(50) + ToString());
            _tree.Log("BEGIN Splitting node child -> ".PadRight(50) + originalChild.ToString());

            BPlusTreeNode newChild = BPlusTreeNode.CreateNode(_tree);

            newChild.IsLeaf = originalChild.IsLeaf;

            newChild.KeywordCount = _tree.Fanout - _tree.MinimumChildren;

            // assign items left of median
            for (int i = 0; i < newChild.KeywordCount; i++)
            {
                // Left half of original child already has appropriate values
                // However, left half of new child needs data from right half of original child
                newChild.ChildrenByteOffsets[i] = originalChild.ChildrenByteOffsets[i + _tree.Median + 1];
                newChild.Keywords[i]            = originalChild.Keywords[i + _tree.Median + 1];
            }
            newChild.ChildrenByteOffsets[newChild.KeywordCount] = originalChild.ChildrenByteOffsets[_tree.Fanout];

            // adjust the new child's relative info as needed (we always add to the right)
            if (originalChild.IsLeaf)
            {
                // only leaves should track siblings
                newChild.LeftSiblingFileOffset  = originalChild.FileOffset;
                newChild.RightSiblingFileOffset = originalChild.RightSiblingFileOffset;
            }
            else
            {
                newChild.LeftSiblingFileOffset  = -1;
                newChild.RightSiblingFileOffset = -1;
            }


            // make room in parent for the median values
            // (copy all those above the childOffset over one)

            for (int i = KeywordCount; i > childOffset; i--)
            {
                ChildrenByteOffsets[i + 1] = ChildrenByteOffsets[i];
                Keywords[i] = Keywords[i - 1];
            }
            KeywordCount++;

            Keywords[childOffset] = originalChild.Keywords[_tree.Median];
            // just set byterange that points at our new child to the default for now.  we'll assign it the proper value later (once we know the true file offsets)
            ChildrenByteOffsets[childOffset + 1] = 0;

            // if we're splitting a leaf node, we must copy up the median value.
            // if we're splitting an index node, we must simply push it up (i.e. do not leave a copy in the original child)
            int startErasingAt = _tree.MinimumChildren;

            if (!originalChild.IsLeaf)
            {
                // index node, erase the keyword (as we essentially want to push it up)
                startErasingAt--;
            }

            // assign items right of median (note this effectively means we copy up the median value, not move it up)
            for (int i = startErasingAt; i < _tree.Fanout; i++)
            {
                // Left half of newChild already has proper values (nulls/defaults)
                // However, right half of originalChild still has the values we just copied to the left half of newChild.
                // null/default those values.
                if (originalChild.IsLeaf || i > startErasingAt)
                {
                    originalChild.ChildrenByteOffsets[i] = 0;
                }
                originalChild.Keywords[i] = String.Empty;
            }
            // there's always one more child than keyword...
            originalChild.ChildrenByteOffsets[_tree.Fanout] = 0;
            originalChild.KeywordCount = startErasingAt;

            // determine where to put this new child (in an abandoned node or tack onto end of file)
            BPlusAbandonedNode abandonedNode = _tree.GetNextAvailableNodeLocation(newChild);

            //newChild._chunkSize = abandonedNode.ByteCount;
            newChild.FileOffset = abandonedNode.FileOffset;

            newChild.Write(this);


            // the original child's right sibling must be updated to point at the new child instead
            if (originalChild.RightSiblingFileOffset > 0 && originalChild.IsLeaf)
            {
                BPlusTreeNode originalRightSibling = BPlusTreeNode.Read(_tree, originalChild.RightSiblingFileOffset, true);
                originalRightSibling.LeftSiblingFileOffset = newChild.FileOffset;
                originalRightSibling.Write(null);
            }



            // now that we've written the new child out,
            // we know what to set the originalChild's right sibling to...
            if (originalChild.IsLeaf)
            {
                originalChild.RightSiblingFileOffset = newChild.FileOffset;
            }
            originalChild.Write(this);


            // we now know all the offsets we need to update in the parent...
            ChildrenByteOffsets[childOffset]     = originalChild.FileOffset;
            ChildrenByteOffsets[childOffset + 1] = newChild.FileOffset;

            // this node may relocate because we're adding a keyword to it.
            // if it does, we have to be sure our direct parent knows about it (so it can update its child file offset that points to us)
            // hence the "grandParentNode".  Write() handles updating things correctly if we give it our parent (which is our children's grandparent) :)
            if (Write(grandParent))
            {
                //Debug.WriteLine("would have missed this before");
            }

            _tree.Log("END Splitting node parent -> ".PadRight(50) + ToString());
            _tree.Log("END Splitting node. original child -> ".PadRight(50) + originalChild.ToString());
            _tree.Log("END Splitting node. new child -> ".PadRight(50) + newChild.ToString());
            _tree.Log("------------ END Splitting node ------------");

            return(newChild);



            /*
             * newChild <- Allocate-Node()
             * leaf[newChild] <- leaf[parent]
             * n[newChild] <- t - 1
             * for j <- 1 to t - 1
             *       do keyj[newChild] <- keyj+t[parent]
             * if not leaf[parent]
             *       then for j <- 1 to t
             *                do cj[newChild] <- cj+t[parent]
             * n[parent] <- t - 1
             * for j <- n[originalChild] + 1 downto i + 1
             *       do cj+1[originalChild] <- cj[originalChild]
             * ci+1 <- newChild
             * for j <- n[originalChild] downto i
             *       do keyj+1[originalChild] <- keyj[originalChild]
             * keyi[originalChild] <- keyt[parent]
             * n[originalChild] <- n[originalChild] + 1
             * Disk-Write(parent)
             * Disk-Write(newChild)
             * Disk-Write(originalChild)
             *
             */
        }
Пример #3
0
        private void relocate(BPlusTreeNode parentNode)
        {
            // this node has outgrown its allotted space.
            // we need to move it to the end of the file and update everybody who points to it

            // so have to update the following:
            // 1. Parent node's pointer
            // 2. Left sibling's pointer
            // 3. Right sibling's pointer
            // 4. All children's parent pointer (which points at this node)

            _tree.Log("------------ BEGIN Relocating node ------------");
            _tree.Log("BEGIN Relocating node -> ".PadRight(50) + ToString());

            // get an abandoned node (or the end of the file, whichever comes first)
            BPlusAbandonedNode abandonedNode = _tree.GetNextAvailableNodeLocation(this);

            // mark this node as abandoned in the file (and remove from cache)
            _tree.AbandonNode(this);

            //if (this.IsRoot) {
            //    // we're relocating the root.
            //    _tree.Log("TODO: what to do here, if anything?");
            //}

            if (!IsLeaf)
            {
                // only leaf nodes should track left/right siblings.
                LeftSiblingFileOffset  = -1;
                RightSiblingFileOffset = -1;
            }

            if (LeftSiblingFileOffset > -1)
            {
                BPlusTreeNode originalLeftSibling = BPlusTreeNode.Read(_tree, this.LeftSiblingFileOffset, true);
                originalLeftSibling.RightSiblingFileOffset = originalLeftSibling.IsLeaf ? abandonedNode.FileOffset : -1;
                _tree.Log(("Updating left sibling node @ " + originalLeftSibling.FileOffset + " -> ").PadRight(50) + originalLeftSibling.ToString());
                originalLeftSibling.Write(null);
            }
            if (RightSiblingFileOffset > -1)
            {
                BPlusTreeNode originalRightSibling = BPlusTreeNode.Read(_tree, this.RightSiblingFileOffset, true);
                originalRightSibling.LeftSiblingFileOffset = originalRightSibling.IsLeaf ? abandonedNode.FileOffset : -1;
                _tree.Log(("Updating right sibling node @ " + originalRightSibling.FileOffset + " -> ").PadRight(50) + originalRightSibling.ToString());
                originalRightSibling.Write(null);
            }

            if (this.IsRoot)
            {
                // special case!  no parent means this is the root node.
                // update the int at the very beginning of the file to point at the new root node
                _tree.WriteRootNodeOffset(abandonedNode.FileOffset);
            }
            else
            {
                if (parentNode != null)
                {
                    for (int i = 0; i < parentNode.KeywordCount + 1; i++)
                    {
                        if (parentNode.ChildrenByteOffsets[i] == FileOffset)
                        {
                            parentNode.ChildrenByteOffsets[i] = abandonedNode.FileOffset;
                            _tree.Log(("Updating parent node @ " + parentNode.FileOffset + " -> ").PadRight(50) + parentNode.ToString());
                            parentNode.Write(null);
                            // the Root node is cached in the tree object -- udpate it if need be
                            if (parentNode.IsRoot)
                            {
                                _tree.Root = parentNode;
                            }
                            break;
                        }
                    }
                }
            }


            _tree.Writer.BaseStream.Position = abandonedNode.FileOffset;
            FileOffset = abandonedNode.FileOffset;
            //_chunkSize = abandonedNode.ByteCount;

            _tree.Log("END Relocating node (not written yet) -> ".PadRight(50) + ToString());
            _tree.Log("------------ END Relocating node ------------");
        }
Пример #4
0
        internal bool Insert(string keyword, long dataByteOffset, BPlusTreeNode parentNode)
        {
            //i <- n[x]
            //if leaf[x]
            //     then while i >= 1 and k < keyi[x]
            //            do keyi+1[x] <- keyi[x]
            //               i <- i - 1
            //          keyi+1[x] <- k
            //          n[x] <- n[x] + 1
            //          Disk-Write(x)
            //     else while i >= and k < keyi[x]
            //            do i <- i - 1
            //          i <- i + 1
            //          Disk-Read(ci[x])
            //          if n[ci[x]] = 2t - 1
            //               then B-Tree-Split-Child(x, i, ci[x])
            //                    if k > keyi[x]
            //                       then i <- i + 1
            //          B-Tree-Insert-Nonfull(ci[x], k)

            int i = KeywordCount - 1;

            if (IsLeaf)
            {
                while (i > -1 && String.Compare(keyword, Keywords[i], false) < 0)
                {
                    Keywords[i + 1]            = Keywords[i];
                    ChildrenByteOffsets[i + 1] = ChildrenByteOffsets[i];
                    i--;
                }
                Keywords[i + 1]            = keyword;
                ChildrenByteOffsets[i + 1] = dataByteOffset;
                KeywordCount++;
                return(Write(parentNode));
            }
            else
            {
                while (i > -1 && String.Compare(keyword, Keywords[i], false) < 0)
                {
                    i--;
                }
                i++;
                BPlusTreeNode child = BPlusTreeNode.Read(_tree, ChildrenByteOffsets[i], true);

                _tree.Log(("Inspecting node @ " + child.FileOffset + " -> ").PadRight(50) + child.ToString());
                if (!child.IsFull)
                {
                    return(child.Insert(keyword, dataByteOffset, this));
                }
                else
                {
                    // must first split the child
                    BPlusTreeNode newChild = SplitChild(i, child, parentNode);
                    if (String.Compare(keyword, newChild.Keywords[0], false) < 0)
                    {
                        // new keyword belongs somewhere below the original child
                        return(child.Insert(keyword, dataByteOffset, this));
                    }
                    else
                    {
                        // new keyword belongs somewhere below the new child
                        return(newChild.Insert(keyword, dataByteOffset, this));
                    }
                }
            }
        }
Пример #5
0
        // here's the vernacular:
        // The _keywords array represents all the keywords in this node.
        // The _byteRanges array represents all the file offsets in this node for either (a) a keyword or (b) the data itself.
        //   a. If it is located in an index node (i.e. non-Leaf), the byte range represents the start/end offsets in the index file (used for traversing to other nodes)
        //   b. If it is located in a Leaf node, the byte range represents the start/end offsets in the data file (for looking up associated hit(s))


        // Think ByteRange = pointer to next node or data, Keyword = item we're searching by
        // So we always have 1 more pointer (or byterange, BR) than search item (or keyword, K):
        //
        // |-------- Node ---------|
        // |      go       pi      |
        // |  /   |    |   |    \  |
        // | BR1  K1  BR2  K2  BR3 |
        // |-----------------------|


        /// <summary>
        /// Creates a new root node for the tree and makes the original root the first child of the new root.
        /// </summary>
        /// <param name="tree"></param>
        /// <returns></returns>
        internal static BPlusTreeNode ReplaceRoot(BPlusTree tree, BPlusTreeNode originalRoot, string keyword, long dataByteOffset)
        {
            // NOTE: This is a very special case.
            // replacing the root involves creating 2 new nodes:
            //  1. the new root node
            //  2. splitting the existing root node into 2 nodes, one of which is a new one

            BPlusTreeNode newRoot = null;

            // create a new root node, mark it as not a leaf, put it in exact same spot as original root
            newRoot = new BPlusTreeNode {
                IsLeaf                 = false,
                _tree                  = tree,
                Keywords               = new string[tree.Fanout],
                ChildrenByteOffsets    = new long[tree.Fanout + 1],
                KeywordCount           = 0,
                FileOffset             = -1,
                LeftSiblingFileOffset  = -1,
                RightSiblingFileOffset = -1,
            };

            tree.Log("------------ BEGIN Replacing root ------------");
            tree.Log("BEGIN Replacing root. original node -> ".PadRight(50) + originalRoot.ToString());
            tree.Log("BEGIN Replacing root. new node -> ".PadRight(50) + newRoot.ToString());


            // add our new root to file, update the root node pointer
            BPlusAbandonedNode abandoned = tree.GetNextAvailableNodeLocation(originalRoot);

            newRoot.FileOffset = abandoned.FileOffset;
            newRoot.Write(null);
            tree.WriteRootNodeOffset(newRoot.FileOffset);


            // trickle-down insert the new keyword/data
            if (!String.IsNullOrEmpty(keyword))
            {
                // split the original
                BPlusTreeNode newChild = newRoot.SplitChild(0, originalRoot, null);

                tree.Log("CONTINUE Replacing root. new child node -> ".PadRight(50) + originalRoot.ToString());


                if (newRoot.Insert(keyword, dataByteOffset, null))
                {
                    // the new root was relocated during insertion (outgrew its allotted space)
                    // the insert should take care of it

                    // re-read it in just in case
                    newRoot = BPlusTreeNode.Read(tree, newRoot.FileOffset, true);
                }
            }
            else
            {
                newRoot = BPlusTreeNode.Read(tree, newRoot.FileOffset, true);
            }


            tree.Log("END Replacing root. original node -> ".PadRight(50) + originalRoot.ToString());
            //			_tree.Log("END Replacing root. new child node -> ".PadRight(50) + newChild.ToString());
            tree.Log("END Replacing root. new root -> ".PadRight(50) + newRoot.ToString());
            tree.Log("------------ END Replacing root ------------");

            return(newRoot);
        }