private void RecoverHybridLogFromSnapshotFile( IndexRecoveryInfo indexRecoveryInfo, HybridLogRecoveryInfo recoveryInfo) { var fileStartAddress = recoveryInfo.flushedLogicalAddress; var fromAddress = indexRecoveryInfo.startLogicalAddress; var untilAddress = recoveryInfo.finalLogicalAddress; // Compute startPage and endPage var startPage = hlog.GetPage(fileStartAddress); var endPage = hlog.GetPage(untilAddress); if (untilAddress > hlog.GetStartLogicalAddress(endPage)) { endPage++; } // By default first page has one extra record var capacity = hlog.GetCapacityNumPages(); var recoveryDevice = Devices.CreateLogDevice(directoryConfiguration.GetHybridLogCheckpointFileName(recoveryInfo.guid), false); var objectLogRecoveryDevice = Devices.CreateObjectLogDevice(directoryConfiguration.GetHybridLogCheckpointFileName(recoveryInfo.guid), false); recoveryDevice.Initialize(hlog.GetSegmentSize()); objectLogRecoveryDevice.Initialize(hlog.GetSegmentSize()); var recoveryStatus = new RecoveryStatus(capacity, startPage, endPage) { recoveryDevice = recoveryDevice, objectLogRecoveryDevice = objectLogRecoveryDevice, recoveryDevicePageOffset = startPage }; // Initially issue read request for all pages that can be held in memory int totalPagesToRead = (int)(endPage - startPage); int numPagesToReadFirst = Math.Min(capacity, totalPagesToRead); hlog.AsyncReadPagesFromDevice(startPage, numPagesToReadFirst, AsyncReadPagesCallbackForRecovery, recoveryStatus, recoveryStatus.recoveryDevicePageOffset, recoveryStatus.recoveryDevice, recoveryStatus.objectLogRecoveryDevice); for (long page = startPage; page < endPage; page++) { // Ensure the page is read from file int pageIndex = hlog.GetPageIndexForPage(page); while (recoveryStatus.readStatus[pageIndex] == ReadStatus.Pending) { Thread.Sleep(10); } // Page at hand var startLogicalAddress = hlog.GetStartLogicalAddress(page); var endLogicalAddress = hlog.GetStartLogicalAddress(page + 1); // Perform recovery if page in fuzzy portion of the log if ((fromAddress < endLogicalAddress) && (fromAddress < untilAddress)) { /* * Handling corner-cases: * ---------------------- * When fromAddress is in the middle of the page, * then start recovery only from corresponding offset * in page. Similarly, if untilAddress falls in the * middle of the page, perform recovery only until that * offset. Otherwise, scan the entire page [0, PageSize) */ var pageFromAddress = 0L; if (fromAddress > startLogicalAddress && fromAddress < endLogicalAddress) { pageFromAddress = hlog.GetOffsetInPage(fromAddress); } var pageUntilAddress = hlog.GetPageSize(); if (endLogicalAddress > untilAddress) { pageUntilAddress = hlog.GetOffsetInPage(untilAddress); } var physicalAddress = hlog.GetPhysicalAddress(startLogicalAddress); RecoverFromPage(fromAddress, pageFromAddress, pageUntilAddress, startLogicalAddress, physicalAddress, recoveryInfo.version); } // OS thread flushes current page and issues a read request if necessary recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending; recoveryStatus.flushStatus[pageIndex] = FlushStatus.Pending; // Write back records from snapshot to main hybrid log hlog.AsyncFlushPages(page, 1, AsyncFlushPageCallbackForRecovery, recoveryStatus); } // Assert and wait until all pages have been flushed var done = false; while (!done) { done = true; for (long page = startPage; page < endPage; page++) { int pageIndex = hlog.GetPageIndexForPage(page); if (recoveryStatus.flushStatus[pageIndex] == FlushStatus.Pending) { done = false; break; } } } recoveryStatus.recoveryDevice.Close(); recoveryStatus.objectLogRecoveryDevice.Close(); }
private bool GlobalMoveToNextState(SystemState currentState, SystemState nextState, ref long context) { var intermediateState = SystemState.Make(Phase.INTERMEDIATE, currentState.version); // Move from S1 to I if (MakeTransition(currentState, intermediateState)) { // Acquired ownership to make the transition from S1 to S2 switch (nextState.phase) { case Phase.PREP_INDEX_CHECKPOINT: { _checkpointType = (CheckpointType)context; switch (_checkpointType) { case CheckpointType.INDEX_ONLY: { _indexCheckpointToken = Guid.NewGuid(); InitializeIndexCheckpoint(_indexCheckpointToken); break; } case CheckpointType.FULL: { var fullCheckpointToken = Guid.NewGuid(); _indexCheckpointToken = fullCheckpointToken; _hybridLogCheckpointToken = fullCheckpointToken; InitializeIndexCheckpoint(_indexCheckpointToken); InitializeHybridLogCheckpoint(_hybridLogCheckpointToken, currentState.version); break; } default: throw new Exception(); } ObtainCurrentTailAddress(ref _indexCheckpoint.info.startLogicalAddress); MakeTransition(intermediateState, nextState); break; } case Phase.INDEX_CHECKPOINT: { TakeIndexFuzzyCheckpoint(); MakeTransition(intermediateState, nextState); break; } case Phase.PREPARE: { switch (currentState.phase) { case Phase.REST: { _checkpointType = (CheckpointType)context; Debug.Assert(_checkpointType == CheckpointType.HYBRID_LOG_ONLY); _hybridLogCheckpointToken = Guid.NewGuid(); InitializeHybridLogCheckpoint(_hybridLogCheckpointToken, currentState.version); break; } case Phase.PREP_INDEX_CHECKPOINT: { TakeIndexFuzzyCheckpoint(); break; } default: throw new Exception(); } ObtainCurrentTailAddress(ref _hybridLogCheckpoint.info.startLogicalAddress); if (!FoldOverSnapshot) { _hybridLogCheckpoint.info.flushedLogicalAddress = hlog.FlushedUntilAddress; _hybridLogCheckpoint.info.useSnapshotFile = 1; } MakeTransition(intermediateState, nextState); break; } case Phase.IN_PROGRESS: { MakeTransition(intermediateState, nextState); break; } case Phase.WAIT_PENDING: { var seg = hlog.GetSegmentOffsets(); if (seg != null) { _hybridLogCheckpoint.info.objectLogSegmentOffsets = new long[seg.Length]; Array.Copy(seg, _hybridLogCheckpoint.info.objectLogSegmentOffsets, seg.Length); } MakeTransition(intermediateState, nextState); break; } case Phase.WAIT_FLUSH: { if (_checkpointType == CheckpointType.FULL) { _indexCheckpoint.info.num_buckets = overflowBucketsAllocator.GetMaxValidAddress(); ObtainCurrentTailAddress(ref _indexCheckpoint.info.finalLogicalAddress); WriteIndexMetaFile(); } if (FoldOverSnapshot) { hlog.ShiftReadOnlyToTail(out long tailAddress); _hybridLogCheckpoint.info.finalLogicalAddress = tailAddress; } else { ObtainCurrentTailAddress(ref _hybridLogCheckpoint.info.finalLogicalAddress); _hybridLogCheckpoint.snapshotFileDevice = Devices.CreateLogDevice (directoryConfiguration.GetHybridLogCheckpointFileName(_hybridLogCheckpointToken), false); _hybridLogCheckpoint.snapshotFileObjectLogDevice = Devices.CreateObjectLogDevice (directoryConfiguration.GetHybridLogCheckpointFileName(_hybridLogCheckpointToken), false); _hybridLogCheckpoint.snapshotFileDevice.Initialize(hlog.GetSegmentSize()); _hybridLogCheckpoint.snapshotFileObjectLogDevice.Initialize(hlog.GetSegmentSize()); long startPage = hlog.GetPage(_hybridLogCheckpoint.info.flushedLogicalAddress); long endPage = hlog.GetPage(_hybridLogCheckpoint.info.finalLogicalAddress); if (_hybridLogCheckpoint.info.finalLogicalAddress > hlog.GetStartLogicalAddress(endPage)) { endPage++; } // This can be run on a new thread if we want to immediately parallelize // the rest of the log flush hlog.AsyncFlushPagesToDevice(startPage, endPage, _hybridLogCheckpoint.info.finalLogicalAddress, _hybridLogCheckpoint.snapshotFileDevice, _hybridLogCheckpoint.snapshotFileObjectLogDevice, out _hybridLogCheckpoint.flushed); } WriteHybridLogMetaInfo(); MakeTransition(intermediateState, nextState); break; } case Phase.PERSISTENCE_CALLBACK: { MakeTransition(intermediateState, nextState); break; } case Phase.GC: { hlog.ShiftBeginAddress(context); int numChunks = (int)(state[resizeInfo.version].size / Constants.kSizeofChunk); if (numChunks == 0) { numChunks = 1; // at least one chunk } numPendingChunksToBeGCed = numChunks; gcStatus = new long[numChunks]; MakeTransition(intermediateState, nextState); break; } case Phase.PREPARE_GROW: { // Note that the transition must be done before bumping epoch here! MakeTransition(intermediateState, nextState); epoch.BumpCurrentEpoch(() => { long _context = 0; GlobalMoveToNextState(nextState, SystemState.Make(Phase.IN_PROGRESS_GROW, nextState.version), ref _context); }); break; } case Phase.IN_PROGRESS_GROW: { // Set up the transition to new version of HT int numChunks = (int)(state[resizeInfo.version].size / Constants.kSizeofChunk); if (numChunks == 0) { numChunks = 1; // at least one chunk } numPendingChunksToBeSplit = numChunks; splitStatus = new long[numChunks]; Initialize(1 - resizeInfo.version, state[resizeInfo.version].size * 2, sectorSize); resizeInfo.version = 1 - resizeInfo.version; MakeTransition(intermediateState, nextState); break; } case Phase.REST: { switch (_checkpointType) { case CheckpointType.INDEX_ONLY: { _indexCheckpoint.info.num_buckets = overflowBucketsAllocator.GetMaxValidAddress(); ObtainCurrentTailAddress(ref _indexCheckpoint.info.finalLogicalAddress); WriteIndexMetaFile(); _indexCheckpoint.Reset(); break; } case CheckpointType.FULL: { _indexCheckpoint.Reset(); _hybridLogCheckpoint.Reset(); break; } case CheckpointType.HYBRID_LOG_ONLY: { _hybridLogCheckpoint.Reset(); break; } case CheckpointType.NONE: break; default: throw new Exception(); } _checkpointType = CheckpointType.NONE; MakeTransition(intermediateState, nextState); break; } } return(true); } else { return(false); } }