private void foundNodeToUpdate(BPlusTreeNode node, int keywordOffset, object additionalData) { node.ChildrenByteOffsets[keywordOffset] = (long)additionalData; // we know it won't split or relocate so we don't care about the parent... node.Write(null); }
private void relocate(BPlusTreeNode parentNode) { // this node has outgrown its allotted space. // we need to move it to the end of the file and update everybody who points to it // so have to update the following: // 1. Parent node's pointer // 2. Left sibling's pointer // 3. Right sibling's pointer // 4. All children's parent pointer (which points at this node) _tree.Log("------------ BEGIN Relocating node ------------"); _tree.Log("BEGIN Relocating node -> ".PadRight(50) + ToString()); // get an abandoned node (or the end of the file, whichever comes first) BPlusAbandonedNode abandonedNode = _tree.GetNextAvailableNodeLocation(this); // mark this node as abandoned in the file (and remove from cache) _tree.AbandonNode(this); //if (this.IsRoot) { // // we're relocating the root. // _tree.Log("TODO: what to do here, if anything?"); //} if (!IsLeaf) { // only leaf nodes should track left/right siblings. LeftSiblingFileOffset = -1; RightSiblingFileOffset = -1; } if (LeftSiblingFileOffset > -1) { BPlusTreeNode originalLeftSibling = BPlusTreeNode.Read(_tree, this.LeftSiblingFileOffset, true); originalLeftSibling.RightSiblingFileOffset = originalLeftSibling.IsLeaf ? abandonedNode.FileOffset : -1; _tree.Log(("Updating left sibling node @ " + originalLeftSibling.FileOffset + " -> ").PadRight(50) + originalLeftSibling.ToString()); originalLeftSibling.Write(null); } if (RightSiblingFileOffset > -1) { BPlusTreeNode originalRightSibling = BPlusTreeNode.Read(_tree, this.RightSiblingFileOffset, true); originalRightSibling.LeftSiblingFileOffset = originalRightSibling.IsLeaf ? abandonedNode.FileOffset : -1; _tree.Log(("Updating right sibling node @ " + originalRightSibling.FileOffset + " -> ").PadRight(50) + originalRightSibling.ToString()); originalRightSibling.Write(null); } if (this.IsRoot) { // special case! no parent means this is the root node. // update the int at the very beginning of the file to point at the new root node _tree.WriteRootNodeOffset(abandonedNode.FileOffset); } else { if (parentNode != null) { for (int i = 0; i < parentNode.KeywordCount + 1; i++) { if (parentNode.ChildrenByteOffsets[i] == FileOffset) { parentNode.ChildrenByteOffsets[i] = abandonedNode.FileOffset; _tree.Log(("Updating parent node @ " + parentNode.FileOffset + " -> ").PadRight(50) + parentNode.ToString()); parentNode.Write(null); // the Root node is cached in the tree object -- udpate it if need be if (parentNode.IsRoot) { _tree.Root = parentNode; } break; } } } } _tree.Writer.BaseStream.Position = abandonedNode.FileOffset; FileOffset = abandonedNode.FileOffset; //_chunkSize = abandonedNode.ByteCount; _tree.Log("END Relocating node (not written yet) -> ".PadRight(50) + ToString()); _tree.Log("------------ END Relocating node ------------"); }
internal BPlusTreeNode SplitChild(int childOffset, BPlusTreeNode originalChild, BPlusTreeNode grandParent) { // Split the current node into two nodes: // a. Left node contains all pointers/keywords to the left of the median // b. Right node contains all pointers/keywords to the right of the median // c. Parent node gets median keyword pushed into it // // Disk-wise: the originalChild is always written back to its original position. // the newChild is placed into either an available abandoned node or tacked on the end of the file. // the parent node is written back to its original position IF IT FITS. Since we're coyping a keyword // up into the parent node, it may cause it to outgrow its allocated size in the file. If this occurs, // the call to this.Write() implicitly calls relocate(). relocate() will then either write the parent to // an abandoned node or tack it onto the end of the file. _tree.Log("------------ BEGIN Splitting node ------------"); _tree.Log("BEGIN Splitting node parent -> ".PadRight(50) + ToString()); _tree.Log("BEGIN Splitting node child -> ".PadRight(50) + originalChild.ToString()); BPlusTreeNode newChild = BPlusTreeNode.CreateNode(_tree); newChild.IsLeaf = originalChild.IsLeaf; newChild.KeywordCount = _tree.Fanout - _tree.MinimumChildren; // assign items left of median for (int i = 0; i < newChild.KeywordCount; i++) { // Left half of original child already has appropriate values // However, left half of new child needs data from right half of original child newChild.ChildrenByteOffsets[i] = originalChild.ChildrenByteOffsets[i + _tree.Median + 1]; newChild.Keywords[i] = originalChild.Keywords[i + _tree.Median + 1]; } newChild.ChildrenByteOffsets[newChild.KeywordCount] = originalChild.ChildrenByteOffsets[_tree.Fanout]; // adjust the new child's relative info as needed (we always add to the right) if (originalChild.IsLeaf) { // only leaves should track siblings newChild.LeftSiblingFileOffset = originalChild.FileOffset; newChild.RightSiblingFileOffset = originalChild.RightSiblingFileOffset; } else { newChild.LeftSiblingFileOffset = -1; newChild.RightSiblingFileOffset = -1; } // make room in parent for the median values // (copy all those above the childOffset over one) for (int i = KeywordCount; i > childOffset; i--) { ChildrenByteOffsets[i + 1] = ChildrenByteOffsets[i]; Keywords[i] = Keywords[i - 1]; } KeywordCount++; Keywords[childOffset] = originalChild.Keywords[_tree.Median]; // just set byterange that points at our new child to the default for now. we'll assign it the proper value later (once we know the true file offsets) ChildrenByteOffsets[childOffset + 1] = 0; // if we're splitting a leaf node, we must copy up the median value. // if we're splitting an index node, we must simply push it up (i.e. do not leave a copy in the original child) int startErasingAt = _tree.MinimumChildren; if (!originalChild.IsLeaf) { // index node, erase the keyword (as we essentially want to push it up) startErasingAt--; } // assign items right of median (note this effectively means we copy up the median value, not move it up) for (int i = startErasingAt; i < _tree.Fanout; i++) { // Left half of newChild already has proper values (nulls/defaults) // However, right half of originalChild still has the values we just copied to the left half of newChild. // null/default those values. if (originalChild.IsLeaf || i > startErasingAt) { originalChild.ChildrenByteOffsets[i] = 0; } originalChild.Keywords[i] = String.Empty; } // there's always one more child than keyword... originalChild.ChildrenByteOffsets[_tree.Fanout] = 0; originalChild.KeywordCount = startErasingAt; // determine where to put this new child (in an abandoned node or tack onto end of file) BPlusAbandonedNode abandonedNode = _tree.GetNextAvailableNodeLocation(newChild); //newChild._chunkSize = abandonedNode.ByteCount; newChild.FileOffset = abandonedNode.FileOffset; newChild.Write(this); // the original child's right sibling must be updated to point at the new child instead if (originalChild.RightSiblingFileOffset > 0 && originalChild.IsLeaf) { BPlusTreeNode originalRightSibling = BPlusTreeNode.Read(_tree, originalChild.RightSiblingFileOffset, true); originalRightSibling.LeftSiblingFileOffset = newChild.FileOffset; originalRightSibling.Write(null); } // now that we've written the new child out, // we know what to set the originalChild's right sibling to... if (originalChild.IsLeaf) { originalChild.RightSiblingFileOffset = newChild.FileOffset; } originalChild.Write(this); // we now know all the offsets we need to update in the parent... ChildrenByteOffsets[childOffset] = originalChild.FileOffset; ChildrenByteOffsets[childOffset + 1] = newChild.FileOffset; // this node may relocate because we're adding a keyword to it. // if it does, we have to be sure our direct parent knows about it (so it can update its child file offset that points to us) // hence the "grandParentNode". Write() handles updating things correctly if we give it our parent (which is our children's grandparent) :) if (Write(grandParent)) { //Debug.WriteLine("would have missed this before"); } _tree.Log("END Splitting node parent -> ".PadRight(50) + ToString()); _tree.Log("END Splitting node. original child -> ".PadRight(50) + originalChild.ToString()); _tree.Log("END Splitting node. new child -> ".PadRight(50) + newChild.ToString()); _tree.Log("------------ END Splitting node ------------"); return(newChild); /* * newChild <- Allocate-Node() * leaf[newChild] <- leaf[parent] * n[newChild] <- t - 1 * for j <- 1 to t - 1 * do keyj[newChild] <- keyj+t[parent] * if not leaf[parent] * then for j <- 1 to t * do cj[newChild] <- cj+t[parent] * n[parent] <- t - 1 * for j <- n[originalChild] + 1 downto i + 1 * do cj+1[originalChild] <- cj[originalChild] * ci+1 <- newChild * for j <- n[originalChild] downto i * do keyj+1[originalChild] <- keyj[originalChild] * keyi[originalChild] <- keyt[parent] * n[originalChild] <- n[originalChild] + 1 * Disk-Write(parent) * Disk-Write(newChild) * Disk-Write(originalChild) * */ }
// here's the vernacular: // The _keywords array represents all the keywords in this node. // The _byteRanges array represents all the file offsets in this node for either (a) a keyword or (b) the data itself. // a. If it is located in an index node (i.e. non-Leaf), the byte range represents the start/end offsets in the index file (used for traversing to other nodes) // b. If it is located in a Leaf node, the byte range represents the start/end offsets in the data file (for looking up associated hit(s)) // Think ByteRange = pointer to next node or data, Keyword = item we're searching by // So we always have 1 more pointer (or byterange, BR) than search item (or keyword, K): // // |-------- Node ---------| // | go pi | // | / | | | \ | // | BR1 K1 BR2 K2 BR3 | // |-----------------------| /// <summary> /// Creates a new root node for the tree and makes the original root the first child of the new root. /// </summary> /// <param name="tree"></param> /// <returns></returns> internal static BPlusTreeNode ReplaceRoot(BPlusTree tree, BPlusTreeNode originalRoot, string keyword, long dataByteOffset) { // NOTE: This is a very special case. // replacing the root involves creating 2 new nodes: // 1. the new root node // 2. splitting the existing root node into 2 nodes, one of which is a new one BPlusTreeNode newRoot = null; // create a new root node, mark it as not a leaf, put it in exact same spot as original root newRoot = new BPlusTreeNode { IsLeaf = false, _tree = tree, Keywords = new string[tree.Fanout], ChildrenByteOffsets = new long[tree.Fanout + 1], KeywordCount = 0, FileOffset = -1, LeftSiblingFileOffset = -1, RightSiblingFileOffset = -1, }; tree.Log("------------ BEGIN Replacing root ------------"); tree.Log("BEGIN Replacing root. original node -> ".PadRight(50) + originalRoot.ToString()); tree.Log("BEGIN Replacing root. new node -> ".PadRight(50) + newRoot.ToString()); // add our new root to file, update the root node pointer BPlusAbandonedNode abandoned = tree.GetNextAvailableNodeLocation(originalRoot); newRoot.FileOffset = abandoned.FileOffset; newRoot.Write(null); tree.WriteRootNodeOffset(newRoot.FileOffset); // trickle-down insert the new keyword/data if (!String.IsNullOrEmpty(keyword)) { // split the original BPlusTreeNode newChild = newRoot.SplitChild(0, originalRoot, null); tree.Log("CONTINUE Replacing root. new child node -> ".PadRight(50) + originalRoot.ToString()); if (newRoot.Insert(keyword, dataByteOffset, null)) { // the new root was relocated during insertion (outgrew its allotted space) // the insert should take care of it // re-read it in just in case newRoot = BPlusTreeNode.Read(tree, newRoot.FileOffset, true); } } else { newRoot = BPlusTreeNode.Read(tree, newRoot.FileOffset, true); } tree.Log("END Replacing root. original node -> ".PadRight(50) + originalRoot.ToString()); // _tree.Log("END Replacing root. new child node -> ".PadRight(50) + newChild.ToString()); tree.Log("END Replacing root. new root -> ".PadRight(50) + newRoot.ToString()); tree.Log("------------ END Replacing root ------------"); return(newRoot); }