// was:fetchPayload private byte[] FetchPayload(ref int size, out int offset, bool skipKey) { Debug.Assert(this != null && PageID >= 0 && Pages[PageID] != null); Debug.Assert(State == CursorState.VALID); Debug.Assert(HoldsMutex()); offset = -1; var page = Pages[PageID]; Debug.Assert(PagesIndexs[PageID] < page.Cells); if (Check.NEVER(Info.nSize == 0)) { Pages[PageID].ParseCell(PagesIndexs[PageID], ref Info); } var payload = MallocEx.sqlite3Malloc(Info.nSize - Info.nHeader); var nKey = (page.HasIntKey ? 0 : (uint)Info.nKey); uint nLocal; if (skipKey) { offset = (int)(Info.CellID + Info.nHeader + nKey); Buffer.BlockCopy(Info.Cells, offset, payload, 0, (int)(Info.nSize - Info.nHeader - nKey)); nLocal = Info.nLocal - nKey; } else { offset = (int)(Info.CellID + Info.nHeader); Buffer.BlockCopy(Info.Cells, offset, payload, 0, Info.nSize - Info.nHeader); nLocal = Info.nLocal; Debug.Assert(nLocal <= nKey); } size = (int)nLocal; return(payload); }
internal void allocateTempSpace() { if (this.pTmpSpace == null) { this.pTmpSpace = MallocEx.sqlite3Malloc((int)this.PageSize); } }
// was:saveCursorPosition internal RC SavePosition() { Debug.Assert(State == CursorState.VALID); Debug.Assert(Key == null); Debug.Assert(HoldsMutex()); NKey = GetKeySize(); // If this is an intKey table, then the above call to BtreeKeySize() stores the integer key in pCur.nKey. In this case this value is // all that is required. Otherwise, if pCur is not open on an intKey table, then malloc space for and store the pCur.nKey bytes of key data. var rc = RC.OK; if (!Pages[0].HasIntKey) { var pKey = MallocEx.sqlite3Malloc((int)NKey); rc = GetKey(0, (uint)NKey, pKey); if (rc == RC.OK) { Key = pKey; } } Debug.Assert(!Pages[0].HasIntKey || Key == null); if (rc == RC.OK) { for (var i = 0; i <= PageID; i++) { Pages[i].releasePage(); Pages[i] = null; } PageID = -1; State = CursorState.REQUIRESEEK; } Btree.invalidateOverflowCache(this); return(rc); }
// was:sqlite3PcacheTruncate internal void TruncatePage(Pgno pgno) { if (pCache != null) { PgHdr p; PgHdr pNext; for (p = pDirty; p != null; p = pNext) { pNext = p.DirtyNext; // This routine never gets call with a positive pgno except right after sqlite3PcacheCleanAll(). So if there are dirty pages, // it must be that pgno==0. Debug.Assert(p.ID > 0); if (Check.ALWAYS(p.ID > pgno)) { Debug.Assert((p.Flags & PgHdr.PGHDR.DIRTY) != 0); MakePageClean(p); } } if (pgno == 0 && pPage1 != null) { pPage1.Data = MallocEx.sqlite3Malloc(szPage); pgno = 1; } pCache.xTruncate(pgno + 1); } }
internal ushort cellSizePtr(byte[] pCell, int offset) { var info = new CellInfo(); info.Cells = MallocEx.sqlite3Malloc(pCell.Length); Buffer.BlockCopy(pCell, offset, info.Cells, 0, pCell.Length - offset); btreeParseCellPtr(info.Cells, ref info); return(info.nSize); }
public ushort Index; // Insert this cell before idx-th non-overflow cell public Overflow Clone() { var cp = new Overflow(); if (Cell != null) { cp.Cell = MallocEx.sqlite3Malloc(Cell.Length); Buffer.BlockCopy(Cell, 0, cp.Cell, 0, Cell.Length); } cp.Index = Index; return(cp); }
public Pgno ID; // Page number for this page public MemPage Clone() { var cp = (MemPage)MemberwiseClone(); if (Overflows != null) { cp.Overflows = new Overflow[Overflows.Length]; for (var i = 0; i < Overflows.Length; i++) { cp.Overflows[i] = Overflows[i].Clone(); } } if (Data != null) { cp.Data = MallocEx.sqlite3Malloc(Data.Length); Buffer.BlockCopy(Data, 0, cp.Data, 0, Data.Length); } return(cp); }
// was:sqlite3PagerSetPagesize public RC SetPageSize(ref uint pPageSize, int nReserve) { // It is not possible to do a full assert_pager_state() here, as this function may be called from within PagerOpen(), before the state // of the Pager object is internally consistent. // At one point this function returned an error if the pager was in PAGER_ERROR state. But since PAGER_ERROR state guarantees that // there is at least one outstanding page reference, this function is a no-op for that case anyhow. var rc = RC.OK; var pageSize = pPageSize; Debug.Assert(pageSize == 0 || (pageSize >= 512 && pageSize <= SQLITE_MAX_PAGE_SIZE)); if ((this.memDb == 0 || this.dbSize == 0) && this.pPCache.sqlite3PcacheRefCount() == 0 && pageSize != 0 && pageSize != (uint)this.pageSize) { long nByte = 0; if (this.eState > PAGER.OPEN && this.fd.IsOpen) { rc = this.fd.FileSize(ref nByte); } if (rc == RC.OK) { pager_reset(); this.dbSize = (Pgno)(nByte / pageSize); this.pageSize = (int)pageSize; IPCache.sqlite3PageFree(ref this.pTmpSpace); this.pTmpSpace = MallocEx.sqlite3Malloc((int)pageSize); this.pPCache.SetPageSize((int)pageSize); } } pPageSize = (uint)this.pageSize; if (rc == RC.OK) { if (nReserve < 0) { nReserve = this.nReserve; } Debug.Assert(nReserve >= 0 && nReserve < 1000); this.nReserve = (short)nReserve; pagerReportSize(); } return(rc); }
internal static RC balance_nonroot(MemPage pParent, int iParentIdx, byte[] aOvflSpace, int isRoot) { var apOld = new MemPage[NB]; // pPage and up to two siblings var apCopy = new MemPage[NB]; // Private copies of apOld[] pages var apNew = new MemPage[NB + 2]; // pPage and up to NB siblings after balancing var apDiv = new int[NB - 1]; // Divider cells in pParent var cntNew = new int[NB + 2]; // Index in aCell[] of cell after i-th page var szNew = new int[NB + 2]; // Combined size of cells place on i-th page var szCell = new ushort[1]; // Local size of all cells in apCell[] BtShared pBt; // The whole database int nCell = 0; // Number of cells in apCell[] int nMaxCells = 0; // Allocated size of apCell, szCell, aFrom. int nNew = 0; // Number of pages in apNew[] ushort leafCorrection; // 4 if pPage is a leaf. 0 if not int leafData; // True if pPage is a leaf of a LEAFDATA tree int usableSpace; // Bytes in pPage beyond the header int pageFlags; // Value of pPage.aData[0] int subtotal; // Subtotal of bytes in cells on one page int iOvflSpace = 0; // First unused byte of aOvflSpace[] //int szScratch; // Size of scratch memory requested byte[][] apCell = null; // All cells begin balanced // pBt = pParent.Shared; Debug.Assert(MutexEx.Held(pBt.Mutex)); Debug.Assert(Pager.IsPageWriteable(pParent.DbPage)); #if false Btree.TRACE("BALANCE: begin page %d child of %d\n", pPage.pgno, pParent.pgno); #endif // At this point pParent may have at most one overflow cell. And if this overflow cell is present, it must be the cell with // index iParentIdx. This scenario comes about when this function is called (indirectly) from sqlite3BtreeDelete(). Debug.Assert(pParent.NOverflows == 0 || pParent.NOverflows == 1); Debug.Assert(pParent.NOverflows == 0 || pParent.Overflows[0].Index == iParentIdx); // Find the sibling pages to balance. Also locate the cells in pParent that divide the siblings. An attempt is made to find NN siblings on // either side of pPage. More siblings are taken from one side, however, if there are fewer than NN siblings on the other side. If pParent // has NB or fewer children then all children of pParent are taken. // This loop also drops the divider cells from the parent page. This way, the remainder of the function does not have to deal with any // overflow cells in the parent page, since if any existed they will have already been removed. int nOld; // Number of pages in apOld[] int nxDiv; // Next divider slot in pParent.aCell[] var i = pParent.NOverflows + pParent.Cells; if (i < 2) { nxDiv = 0; nOld = i + 1; } else { nOld = 3; if (iParentIdx == 0) { nxDiv = 0; } else if (iParentIdx == i) { nxDiv = i - 2; } else { nxDiv = iParentIdx - 1; } i = 2; } var pRight = ((i + nxDiv - pParent.NOverflows) == pParent.Cells ? pParent.HeaderOffset + 8 : pParent.FindCell(i + nxDiv - pParent.NOverflows)); // Location in parent of right-sibling pointer var pgno = (Pgno)ConvertEx.Get4(pParent.Data, pRight); var rc = RC.OK; while (true) { rc = pBt.getAndInitPage(pgno, ref apOld[i]); if (rc != RC.OK) { goto balance_cleanup; } nMaxCells += 1 + apOld[i].Cells + apOld[i].NOverflows; if (i-- == 0) { break; } if (i + nxDiv == pParent.Overflows[0].Index && pParent.NOverflows != 0) { apDiv[i] = 0; pgno = ConvertEx.Get4(pParent.Overflows[0].Cell, apDiv[i]); szNew[i] = pParent.cellSizePtr(apDiv[i]); pParent.NOverflows = 0; } else { apDiv[i] = pParent.FindCell(i + nxDiv - pParent.NOverflows); pgno = ConvertEx.Get4(pParent.Data, apDiv[i]); szNew[i] = pParent.cellSizePtr(apDiv[i]); // Drop the cell from the parent page. apDiv[i] still points to the cell within the parent, even though it has been dropped. // This is safe because dropping a cell only overwrites the first four bytes of it, and this function does not need the first // four bytes of the divider cell. So the pointer is safe to use later on. // // Unless SQLite is compiled in secure-delete mode. In this case, the dropCell() routine will overwrite the entire cell with zeroes. // In this case, temporarily copy the cell into the aOvflSpace[] buffer. It will be copied out again as soon as the aSpace[] buffer // is allocated. //if (pBt.secureDelete) //{ // int iOff = (int)(apDiv[i]) - (int)(pParent.aData); //SQLITE_PTR_TO_INT(apDiv[i]) - SQLITE_PTR_TO_INT(pParent.aData); // if( (iOff+szNew[i])>(int)pBt->usableSize ) // { // rc = SQLITE_CORRUPT_BKPT(); // Array.Clear(apOld[0].aData,0,apOld[0].aData.Length); //memset(apOld, 0, (i + 1) * sizeof(MemPage*)); // goto balance_cleanup; // } // else // { // memcpy(&aOvflSpace[iOff], apDiv[i], szNew[i]); // apDiv[i] = &aOvflSpace[apDiv[i] - pParent.aData]; // } //} pParent.dropCell(i + nxDiv - pParent.NOverflows, szNew[i], ref rc); } } // Make nMaxCells a multiple of 4 in order to preserve 8-byte alignment nMaxCells = (nMaxCells + 3) & ~3; // Allocate space for memory structures apCell = MallocEx.sqlite3ScratchMalloc(apCell, nMaxCells); if (szCell.Length < nMaxCells) { Array.Resize(ref szCell, nMaxCells); } // Load pointers to all cells on sibling pages and the divider cells into the local apCell[] array. Make copies of the divider cells // into space obtained from aSpace1[] and remove the the divider Cells from pParent. // If the siblings are on leaf pages, then the child pointers of the divider cells are stripped from the cells before they are copied // into aSpace1[]. In this way, all cells in apCell[] are without child pointers. If siblings are not leaves, then all cell in // apCell[] include child pointers. Either way, all cells in apCell[] are alike. // leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf. // leafData: 1 if pPage holds key+data and pParent holds only keys. leafCorrection = (ushort)(apOld[0].Leaf * 4); leafData = apOld[0].HasData; int j; for (i = 0; i < nOld; i++) { // Before doing anything else, take a copy of the i'th original sibling The rest of this function will use data from the copies rather // that the original pages since the original pages will be in the process of being overwritten. var pOld = apCopy[i] = apOld[i].Clone(); var limit = pOld.Cells + pOld.NOverflows; if (pOld.NOverflows > 0 || true) { for (j = 0; j < limit; j++) { Debug.Assert(nCell < nMaxCells); var iFOFC = pOld.FindOverflowCell(j); szCell[nCell] = pOld.cellSizePtr(iFOFC); // Copy the Data Locally if (apCell[nCell] == null) { apCell[nCell] = new byte[szCell[nCell]]; } else if (apCell[nCell].Length < szCell[nCell]) { Array.Resize(ref apCell[nCell], szCell[nCell]); } if (iFOFC < 0) // Overflow Cell { Buffer.BlockCopy(pOld.Overflows[-(iFOFC + 1)].Cell, 0, apCell[nCell], 0, szCell[nCell]); } else { Buffer.BlockCopy(pOld.Data, iFOFC, apCell[nCell], 0, szCell[nCell]); } nCell++; } } else { var aData = pOld.Data; var maskPage = pOld.MaskPage; var cellOffset = pOld.CellOffset; for (j = 0; j < limit; j++) { Debugger.Break(); Debug.Assert(nCell < nMaxCells); apCell[nCell] = FindCellv2(aData, maskPage, cellOffset, j); szCell[nCell] = pOld.cellSizePtr(apCell[nCell]); nCell++; } } if (i < nOld - 1 && 0 == leafData) { var sz = (ushort)szNew[i]; var pTemp = MallocEx.sqlite3Malloc(sz + leafCorrection); Debug.Assert(nCell < nMaxCells); szCell[nCell] = sz; Debug.Assert(sz <= pBt.MaxLocal + 23); Buffer.BlockCopy(pParent.Data, apDiv[i], pTemp, 0, sz); if (apCell[nCell] == null || apCell[nCell].Length < sz) { Array.Resize(ref apCell[nCell], sz); } Buffer.BlockCopy(pTemp, leafCorrection, apCell[nCell], 0, sz); Debug.Assert(leafCorrection == 0 || leafCorrection == 4); szCell[nCell] = (ushort)(szCell[nCell] - leafCorrection); if (0 == pOld.Leaf) { Debug.Assert(leafCorrection == 0); Debug.Assert(pOld.HeaderOffset == 0); // The right pointer of the child page pOld becomes the left pointer of the divider cell Buffer.BlockCopy(pOld.Data, 8, apCell[nCell], 0, 4);//memcpy( apCell[nCell], ref pOld.aData[8], 4 ); } else { Debug.Assert(leafCorrection == 4); if (szCell[nCell] < 4) { // Do not allow any cells smaller than 4 bytes. szCell[nCell] = 4; } } nCell++; } } // Figure out the number of pages needed to hold all nCell cells. Store this number in "k". Also compute szNew[] which is the total // size of all cells on the i-th page and cntNew[] which is the index in apCell[] of the cell that divides page i from page i+1. // cntNew[k] should equal nCell. // Values computed by this block: // k: The total number of sibling pages // szNew[i]: Spaced used on the i-th sibling page. // cntNew[i]: Index in apCell[] and szCell[] for the first cell to // the right of the i-th sibling page. // usableSpace: Number of bytes of space available on each sibling. usableSpace = (int)pBt.UsableSize - 12 + leafCorrection; int k; for (subtotal = k = i = 0; i < nCell; i++) { Debug.Assert(i < nMaxCells); subtotal += szCell[i] + 2; if (subtotal > usableSpace) { szNew[k] = subtotal - szCell[i]; cntNew[k] = i; if (leafData != 0) { i--; } subtotal = 0; k++; if (k > NB + 1) { rc = SysEx.SQLITE_CORRUPT_BKPT(); goto balance_cleanup; } } } szNew[k] = subtotal; cntNew[k] = nCell; k++; // The packing computed by the previous block is biased toward the siblings on the left side. The left siblings are always nearly full, while the // right-most sibling might be nearly empty. This block of code attempts to adjust the packing of siblings to get a better balance. // // This adjustment is more than an optimization. The packing above might be so out of balance as to be illegal. For example, the right-most // sibling might be completely empty. This adjustment is not optional. for (i = k - 1; i > 0; i--) { var szRight = szNew[i]; // Size of sibling on the right var szLeft = szNew[i - 1]; // Size of sibling on the left var r = cntNew[i - 1] - 1; // Index of right-most cell in left sibling var d = r + 1 - leafData; // Index of first cell to the left of right sibling Debug.Assert(d < nMaxCells); Debug.Assert(r < nMaxCells); while (szRight == 0 || szRight + szCell[d] + 2 <= szLeft - (szCell[r] + 2)) { szRight += szCell[d] + 2; szLeft -= szCell[r] + 2; cntNew[i - 1]--; r = cntNew[i - 1] - 1; d = r + 1 - leafData; } szNew[i] = szRight; szNew[i - 1] = szLeft; } // Either we found one or more cells (cntnew[0])>0) or pPage is a virtual root page. A virtual root page is when the real root // page is page 1 and we are the only child of that page. Debug.Assert(cntNew[0] > 0 || (pParent.ID == 1 && pParent.Cells == 0)); Btree.TRACE("BALANCE: old: %d %d %d ", apOld[0].ID, (nOld >= 2 ? apOld[1].ID : 0), (nOld >= 3 ? apOld[2].ID : 0)); // Allocate k new pages. Reuse old pages where possible. if (apOld[0].ID <= 1) { rc = SysEx.SQLITE_CORRUPT_BKPT(); goto balance_cleanup; } pageFlags = apOld[0].Data[0]; for (i = 0; i < k; i++) { var pNew = new MemPage(); if (i < nOld) { pNew = apNew[i] = apOld[i]; apOld[i] = null; rc = Pager.Write(pNew.DbPage); nNew++; if (rc != RC.OK) { goto balance_cleanup; } } else { Debug.Assert(i > 0); rc = pBt.allocateBtreePage(ref pNew, ref pgno, pgno, 0); if (rc != 0) { goto balance_cleanup; } apNew[i] = pNew; nNew++; // Set the pointer-map entry for the new sibling page. #if !SQLITE_OMIT_AUTOVACUUM if (pBt.AutoVacuum) #else if (false) #endif { pBt.ptrmapPut(pNew.ID, PTRMAP.BTREE, pParent.ID, ref rc); if (rc != RC.OK) { goto balance_cleanup; } } } } // Free any old pages that were not reused as new pages. while (i < nOld) { apOld[i].freePage(ref rc); if (rc != RC.OK) { goto balance_cleanup; } apOld[i].releasePage(); apOld[i] = null; i++; } // Put the new pages in accending order. This helps to keep entries in the disk file in order so that a scan // of the table is a linear scan through the file. That in turn helps the operating system to deliver pages // from the disk more rapidly. // An O(n^2) insertion sort algorithm is used, but since n is never more than NB (a small constant), that should // not be a problem. // When NB==3, this one optimization makes the database about 25% faster for large insertions and deletions. for (i = 0; i < k - 1; i++) { var minV = (int)apNew[i].ID; var minI = i; for (j = i + 1; j < k; j++) { if (apNew[j].ID < (uint)minV) { minI = j; minV = (int)apNew[j].ID; } } if (minI > i) { var pT = apNew[i]; apNew[i] = apNew[minI]; apNew[minI] = pT; } } Btree.TRACE("new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)\n", apNew[0].ID, szNew[0], (nNew >= 2 ? apNew[1].ID : 0), (nNew >= 2 ? szNew[1] : 0), (nNew >= 3 ? apNew[2].ID : 0), (nNew >= 3 ? szNew[2] : 0), (nNew >= 4 ? apNew[3].ID : 0), (nNew >= 4 ? szNew[3] : 0), (nNew >= 5 ? apNew[4].ID : 0), (nNew >= 5 ? szNew[4] : 0)); Debug.Assert(Pager.IsPageWriteable(pParent.DbPage)); ConvertEx.Put4L(pParent.Data, pRight, apNew[nNew - 1].ID); // Evenly distribute the data in apCell[] across the new pages. Insert divider cells into pParent as necessary. j = 0; for (i = 0; i < nNew; i++) { // Assemble the new sibling page. MemPage pNew = apNew[i]; Debug.Assert(j < nMaxCells); pNew.zeroPage(pageFlags); pNew.assemblePage(cntNew[i] - j, apCell, szCell, j); Debug.Assert(pNew.Cells > 0 || (nNew == 1 && cntNew[0] == 0)); Debug.Assert(pNew.NOverflows == 0); j = cntNew[i]; // If the sibling page assembled above was not the right-most sibling, insert a divider cell into the parent page. Debug.Assert(i < nNew - 1 || j == nCell); if (j < nCell) { Debug.Assert(j < nMaxCells); var pCell = apCell[j]; var sz = szCell[j] + leafCorrection; var pTemp = MallocEx.sqlite3Malloc(sz); if (pNew.Leaf == 0) { Buffer.BlockCopy(pCell, 0, pNew.Data, 8, 4); } else if (leafData != 0) { // If the tree is a leaf-data tree, and the siblings are leaves, then there is no divider cell in apCell[]. Instead, the divider // cell consists of the integer key for the right-most cell of the sibling-page assembled above only. var info = new CellInfo(); j--; pNew.btreeParseCellPtr(apCell[j], ref info); pCell = pTemp; sz = 4 + ConvertEx.PutVarint9L(pCell, 4, (ulong)info.nKey); pTemp = null; } else { //------------ pCell -= 4; var _pCell_4 = MallocEx.sqlite3Malloc(pCell.Length + 4); Buffer.BlockCopy(pCell, 0, _pCell_4, 4, pCell.Length); pCell = _pCell_4; // Obscure case for non-leaf-data trees: If the cell at pCell was previously stored on a leaf node, and its reported size was 4 // bytes, then it may actually be smaller than this (see btreeParseCellPtr(), 4 bytes is the minimum size of // any cell). But it is important to pass the correct size to insertCell(), so reparse the cell now. // Note that this can never happen in an SQLite data file, as all cells are at least 4 bytes. It only happens in b-trees used // to evaluate "IN (SELECT ...)" and similar clauses. if (szCell[j] == 4) { Debug.Assert(leafCorrection == 4); sz = pParent.cellSizePtr(pCell); } } iOvflSpace += sz; Debug.Assert(sz <= pBt.MaxLocal + 23); Debug.Assert(iOvflSpace <= (int)pBt.PageSize); pParent.insertCell(nxDiv, pCell, sz, pTemp, pNew.ID, ref rc); if (rc != RC.OK) { goto balance_cleanup; } Debug.Assert(Pager.IsPageWriteable(pParent.DbPage)); j++; nxDiv++; } } Debug.Assert(j == nCell); Debug.Assert(nOld > 0); Debug.Assert(nNew > 0); if ((pageFlags & Btree.PTF_LEAF) == 0) { Buffer.BlockCopy(apCopy[nOld - 1].Data, 8, apNew[nNew - 1].Data, 8, 4); } if (isRoot != 0 && pParent.Cells == 0 && pParent.HeaderOffset <= apNew[0].FreeBytes) { // The root page of the b-tree now contains no cells. The only sibling page is the right-child of the parent. Copy the contents of the // child page into the parent, decreasing the overall height of the b-tree structure by one. This is described as the "balance-shallower" // sub-algorithm in some documentation. // If this is an auto-vacuum database, the call to copyNodeContent() sets all pointer-map entries corresponding to database image pages // for which the pointer is stored within the content being copied. // The second Debug.Assert below verifies that the child page is defragmented (it must be, as it was just reconstructed using assemblePage()). This // is important if the parent page happens to be page 1 of the database image. */ Debug.Assert(nNew == 1); Debug.Assert(apNew[0].FreeBytes == (ConvertEx.Get2(apNew[0].Data, 5) - apNew[0].CellOffset - apNew[0].Cells * 2)); copyNodeContent(apNew[0], pParent, ref rc); apNew[0].freePage(ref rc); } else #if !SQLITE_OMIT_AUTOVACUUM if (pBt.AutoVacuum) #else if (false) #endif { // Fix the pointer-map entries for all the cells that were shifted around. There are several different types of pointer-map entries that need to // be dealt with by this routine. Some of these have been set already, but many have not. The following is a summary: // 1) The entries associated with new sibling pages that were not siblings when this function was called. These have already // been set. We don't need to worry about old siblings that were moved to the free-list - the freePage() code has taken care // of those. // 2) The pointer-map entries associated with the first overflow page in any overflow chains used by new divider cells. These // have also already been taken care of by the insertCell() code. // 3) If the sibling pages are not leaves, then the child pages of cells stored on the sibling pages may need to be updated. // 4) If the sibling pages are not internal intkey nodes, then any overflow pages used by these cells may need to be updated // (internal intkey nodes never contain pointers to overflow pages). // 5) If the sibling pages are not leaves, then the pointer-map entries for the right-child pages of each sibling may need // to be updated. // Cases 1 and 2 are dealt with above by other code. The next block deals with cases 3 and 4 and the one after that, case 5. Since // setting a pointer map entry is a relatively expensive operation, this code only sets pointer map entries for child or overflow pages that have // actually moved between pages. var pNew = apNew[0]; var pOld = apCopy[0]; var nOverflow = pOld.NOverflows; var iNextOld = pOld.Cells + nOverflow; var iOverflow = (nOverflow != 0 ? pOld.Overflows[0].Index : -1); j = 0; // Current 'old' sibling page k = 0; // Current 'new' sibling page for (i = 0; i < nCell; i++) { var isDivider = 0; while (i == iNextOld) { // Cell i is the cell immediately following the last cell on old sibling page j. If the siblings are not leaf pages of an // intkey b-tree, then cell i was a divider cell. pOld = apCopy[++j]; iNextOld = i + (0 == leafData ? 1 : 0) + pOld.Cells + pOld.NOverflows; if (pOld.NOverflows != 0) { nOverflow = pOld.NOverflows; iOverflow = i + (0 == leafData ? 1 : 0) + pOld.Overflows[0].Index; } isDivider = 0 == leafData ? 1 : 0; } Debug.Assert(nOverflow > 0 || iOverflow < i); Debug.Assert(nOverflow < 2 || pOld.Overflows[0].Index == pOld.Overflows[1].Index - 1); Debug.Assert(nOverflow < 3 || pOld.Overflows[1].Index == pOld.Overflows[2].Index - 1); if (i == iOverflow) { isDivider = 1; if (--nOverflow > 0) { iOverflow++; } } if (i == cntNew[k]) { // Cell i is the cell immediately following the last cell on new sibling page k. If the siblings are not leaf pages of an // intkey b-tree, then cell i is a divider cell. pNew = apNew[++k]; if (leafData == 0) { continue; } } Debug.Assert(j < nOld); Debug.Assert(k < nNew); // If the cell was originally divider cell (and is not now) or an overflow cell, or if the cell was located on a different sibling // page before the balancing, then the pointer map entries associated with any child or overflow pages need to be updated. if (isDivider != 0 || pOld.ID != pNew.ID) { if (leafCorrection == 0) { pBt.ptrmapPut(ConvertEx.Get4(apCell[i]), PTRMAP.BTREE, pNew.ID, ref rc); } if (szCell[i] > pNew.MinLocal) { pNew.ptrmapPutOvflPtr(apCell[i], ref rc); } } } if (leafCorrection == 0) { for (i = 0; i < nNew; i++) { var key = ConvertEx.Get4(apNew[i].Data, 8); pBt.ptrmapPut(key, PTRMAP.BTREE, apNew[i].ID, ref rc); } } #if false // The ptrmapCheckPages() contains Debug.Assert() statements that verify that all pointer map pages are set correctly. This is helpful while // debugging. This is usually disabled because a corrupt database may cause an Debug.Assert() statement to fail. ptrmapCheckPages(apNew, nNew); ptrmapCheckPages(pParent, 1); #endif } Debug.Assert(pParent.HasInit); Btree.TRACE("BALANCE: finished: old=%d new=%d cells=%d\n", nOld, nNew, nCell); // Cleanup before returning. balance_cleanup: MallocEx.sqlite3ScratchFree(apCell); for (i = 0; i < nOld; i++) { apOld[i].releasePage(); } for (i = 0; i < nNew; i++) { apNew[i].releasePage(); } return(rc); }
internal RC fillInCell(byte[] pCell, byte[] pKey, long nKey, byte[] pData, int nData, int nZero, ref int pnSize) { Debug.Assert(MutexEx.Held(this.Shared.Mutex)); // pPage is not necessarily writeable since pCell might be auxiliary buffer space that is separate from the pPage buffer area // TODO -- Determine if the following Assert is needed under c# //Debug.Assert( pCell < pPage.aData || pCell >= &pPage.aData[pBt.pageSize] || sqlite3PagerIswriteable(pPage.pDbPage) ); // Fill in the header. var nHeader = 0; if (this.Leaf == 0) { nHeader += 4; } if (this.HasData != 0) { nHeader += (int)ConvertEx.PutVariant9(pCell, (uint)nHeader, (int)(nData + nZero)); } else { nData = nZero = 0; } nHeader += ConvertEx.PutVariant9L(pCell, (uint)nHeader, (ulong)nKey); var info = new CellInfo(); btreeParseCellPtr(pCell, ref info); Debug.Assert(info.nHeader == nHeader); Debug.Assert(info.nKey == nKey); Debug.Assert(info.nData == (uint)(nData + nZero)); // Fill in the payload var nPayload = nData + nZero; byte[] pSrc; int nSrc; if (this.HasIntKey) { pSrc = pData; nSrc = nData; nData = 0; } else { if (Check.NEVER(nKey > 0x7fffffff || pKey == null)) { return(SysEx.SQLITE_CORRUPT_BKPT()); } nPayload += (int)nKey; pSrc = pKey; nSrc = (int)nKey; } pnSize = info.nSize; var spaceLeft = (int)info.nLocal; var pPayload = pCell; var pPayloadIndex = nHeader; var pPrior = pCell; var pPriorIndex = (int)info.iOverflow; var pBt = this.Shared; Pgno pgnoOvfl = 0; MemPage pToRelease = null; while (nPayload > 0) { if (spaceLeft == 0) { #if !SQLITE_OMIT_AUTOVACUUM var pgnoPtrmap = pgnoOvfl; // Overflow page pointer-map entry page if (pBt.AutoVacuum) { do { pgnoOvfl++; }while (PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl == PENDING_BYTE_PAGE(pBt)); } #endif MemPage pOvfl = null; var rc = pBt.allocateBtreePage(ref pOvfl, ref pgnoOvfl, pgnoOvfl, 0); #if !SQLITE_OMIT_AUTOVACUUM // If the database supports auto-vacuum, and the second or subsequent overflow page is being allocated, add an entry to the pointer-map for that page now. // If this is the first overflow page, then write a partial entry to the pointer-map. If we write nothing to this pointer-map slot, // then the optimistic overflow chain processing in clearCell() may misinterpret the uninitialised values and delete the // wrong pages from the database. if (pBt.AutoVacuum && rc == RC.OK) { var eType = (pgnoPtrmap != 0 ? PTRMAP.OVERFLOW2 : PTRMAP.OVERFLOW1); pBt.ptrmapPut(pgnoOvfl, eType, pgnoPtrmap, ref rc); if (rc != RC.OK) { pOvfl.releasePage(); } } #endif if (rc != RC.OK) { pToRelease.releasePage(); return(rc); } // If pToRelease is not zero than pPrior points into the data area of pToRelease. Make sure pToRelease is still writeable. Debug.Assert(pToRelease == null || Pager.IsPageWriteable(pToRelease.DbPage)); // If pPrior is part of the data area of pPage, then make sure pPage is still writeable // TODO -- Determine if the following Assert is needed under c# //Debug.Assert( pPrior < pPage.aData || pPrior >= &pPage.aData[pBt.pageSize] || sqlite3PagerIswriteable(pPage.pDbPage) ); ConvertEx.Put4L(pPrior, (uint)pPriorIndex, pgnoOvfl); pToRelease.releasePage(); pToRelease = pOvfl; pPrior = pOvfl.Data; pPriorIndex = 0; ConvertEx.Put4(pPrior, 0); pPayload = pOvfl.Data; pPayloadIndex = 4; spaceLeft = (int)pBt.UsableSize - 4; } var n = nPayload; if (n > spaceLeft) { n = spaceLeft; } // If pToRelease is not zero than pPayload points into the data area of pToRelease. Make sure pToRelease is still writeable. Debug.Assert(pToRelease == null || Pager.IsPageWriteable(pToRelease.DbPage)); // If pPayload is part of the data area of pPage, then make sure pPage is still writeable // TODO -- Determine if the following Assert is needed under c# //Debug.Assert( pPayload < pPage.aData || pPayload >= &pPage.aData[pBt.pageSize] || sqlite3PagerIswriteable(pPage.pDbPage) ); var pSrcIndex = 0; if (nSrc > 0) { if (n > nSrc) { n = nSrc; } Debug.Assert(pSrc != null); Buffer.BlockCopy(pSrc, pSrcIndex, pPayload, pPayloadIndex, n); } else { var pZeroBlob = MallocEx.sqlite3Malloc(n); Buffer.BlockCopy(pZeroBlob, 0, pPayload, pPayloadIndex, n); } nPayload -= n; pPayloadIndex += n; pSrcIndex += n; nSrc -= n; spaceLeft -= n; if (nSrc == 0) { nSrc = nData; pSrc = pData; } } pToRelease.releasePage(); return(RC.OK); }
// was:sqlite3BtreeMovetoUnpacked public RC MoveToUnpacked(Btree.UnpackedRecord idxKey, long intKey, bool biasRight, ref int pRes) { Debug.Assert(HoldsMutex()); Debug.Assert(MutexEx.Held(Tree.DB.Mutex)); Debug.Assert((idxKey == null) == (KeyInfo == null)); // If the cursor is already positioned at the point we are trying to move to, then just return without doing any work if (State == CursorState.VALID && ValidNKey && Pages[0].HasIntKey) { if (Info.nKey == intKey) { pRes = 0; return(RC.OK); } if (AtLast && Info.nKey < intKey) { pRes = -1; return(RC.OK); } } var rc = MoveToRoot(); if (rc != RC.OK) { return(rc); } Debug.Assert(Pages[PageID] != null); Debug.Assert(Pages[PageID].HasInit); Debug.Assert(Pages[PageID].Cells > 0 || State == CursorState.INVALID); if (State == CursorState.INVALID) { pRes = -1; Debug.Assert(Pages[PageID].Cells == 0); return(RC.OK); } Debug.Assert(Pages[0].HasIntKey || idxKey != null); for (; ;) { var page = Pages[PageID]; // pPage.nCell must be greater than zero. If this is the root-page the cursor would have been INVALID above and this for(;;) loop // not run. If this is not the root-page, then the moveToChild() routine would have already detected db corruption. Similarly, pPage must // be the right kind (index or table) of b-tree page. Otherwise a moveToChild() or moveToRoot() call would have detected corruption. Debug.Assert(page.Cells > 0); Debug.Assert(page.HasIntKey == (idxKey == null)); var lwr = 0; var upr = page.Cells - 1; int idx; PagesIndexs[PageID] = (ushort)(biasRight ? (idx = upr) : (idx = (upr + lwr) / 2)); int c; for (; ;) { Debug.Assert(idx == PagesIndexs[PageID]); Info.nSize = 0; var cell = page.FindCell(idx) + page.ChildPtrSize; // Pointer to current cell in pPage if (page.HasIntKey) { var nCellKey = 0L; if (page.HasData != 0) { uint dummy0; cell += ConvertEx.GetVariant4(page.Data, (uint)cell, out dummy0); } ConvertEx.GetVariant9L(page.Data, (uint)cell, out nCellKey); if (nCellKey == intKey) { c = 0; } else if (nCellKey < intKey) { c = -1; } else { Debug.Assert(nCellKey > intKey); c = 1; } ValidNKey = true; Info.nKey = nCellKey; } else { // The maximum supported page-size is 65536 bytes. This means that the maximum number of record bytes stored on an index B-Tree // page is less than 16384 bytes and may be stored as a 2-byte varint. This information is used to attempt to avoid parsing // the entire cell by checking for the cases where the record is stored entirely within the b-tree page by inspecting the first // 2 bytes of the cell. var nCell = (int)page.Data[cell + 0]; if (0 == (nCell & 0x80) && nCell <= page.MaxLocal) { // This branch runs if the record-size field of the cell is a single byte varint and the record fits entirely on the main b-tree page. c = Btree._vdbe.sqlite3VdbeRecordCompare(nCell, page.Data, cell + 1, idxKey); } else if (0 == (page.Data[cell + 1] & 0x80) && (nCell = ((nCell & 0x7f) << 7) + page.Data[cell + 1]) <= page.MaxLocal) { // The record-size field is a 2 byte varint and the record fits entirely on the main b-tree page. c = Btree._vdbe.sqlite3VdbeRecordCompare(nCell, page.Data, cell + 2, idxKey); } else { // The record flows over onto one or more overflow pages. In this case the whole cell needs to be parsed, a buffer allocated // and accessPayload() used to retrieve the record into the buffer before VdbeRecordCompare() can be called. var pCellBody = new byte[page.Data.Length - cell + page.ChildPtrSize]; Buffer.BlockCopy(page.Data, cell - page.ChildPtrSize, pCellBody, 0, pCellBody.Length); page.btreeParseCellPtr(pCellBody, ref Info); nCell = (int)Info.nKey; var pCellKey = MallocEx.sqlite3Malloc(nCell); rc = AccessPayload(0, (uint)nCell, pCellKey, false); if (rc != RC.OK) { pCellKey = null; goto moveto_finish; } c = Btree._vdbe.sqlite3VdbeRecordCompare(nCell, pCellKey, idxKey); pCellKey = null; } } if (c == 0) { if (page.HasIntKey && 0 == page.Leaf) { lwr = idx; upr = lwr - 1; break; } else { pRes = 0; rc = RC.OK; goto moveto_finish; } } if (c < 0) { lwr = idx + 1; } else { upr = idx - 1; } if (lwr > upr) { break; } PagesIndexs[PageID] = (ushort)(idx = (lwr + upr) / 2); } Debug.Assert(lwr == upr + 1); Debug.Assert(page.HasInit); Pgno chldPg; if (page.Leaf != 0) { chldPg = 0; } else if (lwr >= page.Cells) { chldPg = ConvertEx.Get4(page.Data, page.HeaderOffset + 8); } else { chldPg = ConvertEx.Get4(page.Data, page.FindCell(lwr)); } if (chldPg == 0) { Debug.Assert(PagesIndexs[PageID] < Pages[PageID].Cells); pRes = c; rc = RC.OK; goto moveto_finish; } PagesIndexs[PageID] = (ushort)lwr; Info.nSize = 0; ValidNKey = false; rc = MoveToChild(chldPg); if (rc != RC.OK) { goto moveto_finish; } } moveto_finish: return(rc); }
// was:sqlite3BtreeDelete public RC Delete() { MemPage pPage; // Page to delete cell from int pCell; // Pointer to cell to delete int iCellIdx; // Index of cell to delete int iCellDepth; // Depth of node containing pCell var p = this.Tree; var pBt = p.Shared; Debug.Assert(HoldsMutex()); Debug.Assert(pBt.InTransaction == TRANS.WRITE); Debug.Assert(!pBt.ReadOnly); Debug.Assert(this.Writeable); Debug.Assert(p.hasSharedCacheTableLock(this.RootID, (this.KeyInfo != null), LOCK.WRITE)); Debug.Assert(!p.hasReadConflicts(this.RootID)); if (Check.NEVER(this.PagesIndexs[this.PageID] >= this.Pages[this.PageID].Cells) || Check.NEVER(this.State != CursorState.VALID)) { return(RC.ERROR); } // If this is a delete operation to remove a row from a table b-tree, invalidate any incrblob cursors open on the row being deleted. if (this.KeyInfo == null) { Btree.invalidateIncrblobCursors(p, this.Info.nKey, false); } iCellDepth = this.PageID; iCellIdx = this.PagesIndexs[iCellDepth]; pPage = this.Pages[iCellDepth]; pCell = pPage.FindCell(iCellIdx); // If the page containing the entry to delete is not a leaf page, move the cursor to the largest entry in the tree that is smaller than // the entry being deleted. This cell will replace the cell being deleted from the internal node. The 'previous' entry is used for this instead // of the 'next' entry, as the previous entry is always a part of the sub-tree headed by the child page of the cell being deleted. This makes // balancing the tree following the delete operation easier. RC rc; if (pPage.Leaf == 0) { var notUsed = 0; rc = MovePrevious(ref notUsed); if (rc != RC.OK) { return(rc); } } // Save the positions of any other cursors open on this table before making any modifications. Make the page containing the entry to be // deleted writable. Then free any overflow pages associated with the entry and finally remove the cell itself from within the page. rc = pBt.saveAllCursors(this.RootID, this); if (rc != RC.OK) { return(rc); } rc = Pager.Write(pPage.DbPage); if (rc != RC.OK) { return(rc); } rc = pPage.clearCell(pCell); pPage.dropCell(iCellIdx, pPage.cellSizePtr(pCell), ref rc); if (rc != RC.OK) { return(rc); } // If the cell deleted was not located on a leaf page, then the cursor is currently pointing to the largest entry in the sub-tree headed // by the child-page of the cell that was just deleted from an internal node. The cell from the leaf node needs to be moved to the internal // node to replace the deleted cell. if (pPage.Leaf == 0) { var pLeaf = this.Pages[this.PageID]; int nCell; var n = this.Pages[iCellDepth + 1].ID; pCell = pLeaf.FindCell(pLeaf.Cells - 1); nCell = pLeaf.cellSizePtr(pCell); Debug.Assert(Btree.MX_CELL_SIZE(pBt) >= nCell); rc = Pager.Write(pLeaf.DbPage); var pNext_4 = MallocEx.sqlite3Malloc(nCell + 4); Buffer.BlockCopy(pLeaf.Data, pCell - 4, pNext_4, 0, nCell + 4); pPage.insertCell(iCellIdx, pNext_4, nCell + 4, null, n, ref rc); pLeaf.dropCell(pLeaf.Cells - 1, nCell, ref rc); if (rc != RC.OK) { return(rc); } } // Balance the tree. If the entry deleted was located on a leaf page, then the cursor still points to that page. In this case the first // call to balance() repairs the tree, and the if(...) condition is never true. // // Otherwise, if the entry deleted was on an internal node page, then pCur is pointing to the leaf page from which a cell was removed to // replace the cell deleted from the internal node. This is slightly tricky as the leaf node may be underfull, and the internal node may // be either under or overfull. In this case run the balancing algorithm on the leaf node first. If the balance proceeds far enough up the // tree that we can be sure that any problem in the internal node has been corrected, so be it. Otherwise, after balancing the leaf node, // walk the cursor up the tree to the internal node and balance it as well. rc = Balance(); if (rc == RC.OK && this.PageID > iCellDepth) { while (this.PageID > iCellDepth) { this.Pages[this.PageID--].releasePage(); } rc = Balance(); } if (rc == RC.OK) { MoveToRoot(); } return(rc); }
// was:sqlite3PcacheFetch internal RC FetchPage(Pgno pgno, int createFlag, ref PgHdr ppPage) { Debug.Assert(createFlag == 1 || createFlag == 0); Debug.Assert(pgno > 0); // If the pluggable cache (sqlite3_pcache*) has not been allocated, allocate it now. if (pCache == null && createFlag != 0) { var nByte = szPage + szExtra + 0; var p = IPCache.xCreate(nByte, bPurgeable); p.xCachesize(nMax); pCache = p; } var eCreate = createFlag * (1 + ((!bPurgeable || null == pDirty) ? 1 : 0)); PgHdr pPage = null; if (pCache != null) { pPage = pCache.xFetch(pgno, eCreate); } if (pPage == null && eCreate == 1) { PgHdr pPg; // Find a dirty page to write-out and recycle. First try to find a page that does not require a journal-sync (one with PGHDR_NEED_SYNC // cleared), but if that is not possible settle for any other unreferenced dirty page. #if SQLITE_ENABLE_EXPENSIVE_ASSERT expensive_assert(pcacheCheckSynced(pCache)); #endif for (pPg = pSynced; pPg != null && (pPg.Refs != 0 || (pPg.Flags & PgHdr.PGHDR.NEED_SYNC) != 0); pPg = pPg.DirtyPrev) { ; } pSynced = pPg; if (pPg == null) { for (pPg = pDirtyTail; pPg != null && pPg.Refs != 0; pPg = pPg.DirtyPrev) { ; } } if (pPg != null) { #if SQLITE_LOG_CACHE_SPILL sqlite3_log(SQLITE_FULL, "spill page %d making room for %d - cache used: %d/%d", pPg.pgno, pgno, sqlite3GlobalConfig.pcache.xPagecount(pCache.pCache), pCache.nMax); #endif var rc = xStress(pStress, pPg); if (rc != RC.OK && rc != RC.BUSY) { return(rc); } } pPage = pCache.xFetch(pgno, 2); } if (pPage != null) { if (pPage.Data == null) { pPage.Data = MallocEx.sqlite3Malloc(pCache.szPage); pPage.Cache = this; pPage.ID = pgno; } Debug.Assert(pPage.Cache == this); Debug.Assert(pPage.ID == pgno); if (pPage.Refs == 0) { nRef++; } pPage.Refs++; if (pgno == 1) { pPage1 = pPage; } } ppPage = pPage; return(pPage == null && eCreate != 0 ? RC.NOMEM : RC.OK); }