/// <summary> /// Gets the list of commits from a given blobEntry, starting from a given date /// until the end date. /// </summary> /// <param name="bucketId">The blobEntry id to pull commits from.</param> /// <param name="start">The starting date for commits.</param> /// <param name="end">The ending date for commits.</param> /// <returns>The list of commits from the given blobEntry and greater than or equal to the start date and less than or equal to the end date.</returns> public IEnumerable <ICommit> GetFromTo(string bucketId, DateTime start, DateTime end) { var pageBlobs = WrappedPageBlob.GetAllMatchinPrefix(_primaryContainer, GetContainerName() + "/" + bucketId); // this could be a tremendous amount of data. Depending on system used // this may not be performant enough and may require some sort of index be built. var allCommitDefinitions = new List <Tuple <WrappedPageBlob, PageBlobCommitDefinition> >(); foreach (var pageBlob in pageBlobs) { HeaderDefinitionMetadata headerDefinitionMetadata = null; var header = GetHeaderWithRetry(pageBlob, out headerDefinitionMetadata); foreach (var definition in header.PageBlobCommitDefinitions) { if (definition.CommitStampUtc >= start && definition.CommitStampUtc <= end) { allCommitDefinitions.Add( new Tuple <WrappedPageBlob, PageBlobCommitDefinition>( pageBlob, definition)); } } } // now sort the definitions so we can return out sorted var orderedCommitDefinitions = allCommitDefinitions.OrderBy((x) => x.Item2.CommitStampUtc); foreach (var orderedCommitDefinition in orderedCommitDefinitions) { yield return(CreateCommitFromDefinition(orderedCommitDefinition.Item1, orderedCommitDefinition.Item2)); } }
/// <summary> /// Safely receive the header. /// </summary> /// <param name="blob">blob</param> /// <param name="headerDefinition">header definition</param> /// <param name="exception">exception that occured</param> /// <returns>the header, otherwise null if it could not be fetched</returns> private StreamBlobHeader SafeGetHeader(WrappedPageBlob blob, HeaderDefinitionMetadata headerDefinition, out Exception exception) { StreamBlobHeader header = null; exception = null; try { if (headerDefinition == null || headerDefinition.HeaderSizeInBytes == 0) { throw new InvalidHeaderDataException( $"Attempted to download a header, but the size specified is zero. This aggregate with id [{blob.Name}] may be corrupt."); } var downloadedData = blob.DownloadBytes(headerDefinition.HeaderStartLocationOffsetBytes, headerDefinition.HeaderStartLocationOffsetBytes + headerDefinition.HeaderSizeInBytes); using (var ms = new MemoryStream(downloadedData, false)) { header = _serializer.Deserialize <StreamBlobHeader>(ms.ToArray()); } } catch (Exception ex) { exception = ex; } return(header); }
/// <summary> /// Gets the metadata header /// </summary> /// <param name="blob"></param> /// <returns></returns> private HeaderDefinitionMetadata GetHeaderDefinitionMetadata(WrappedPageBlob blob, int index) { string keyToUse = null; if (index == 0) { keyToUse = PrimaryHeaderDefinitionKey; } else if (index == 1) { keyToUse = SecondaryHeaderDefinitionKey; } else if (index == 2) { keyToUse = TertiaryHeaderDefintionKey; } else { throw new ArgumentException("value must be 0, 1, or 2 for primary, secondary, or terciary respectively.", nameof(index)); } HeaderDefinitionMetadata headerDefinition = null; string serializedHeaderDefinition; if (blob.Metadata.TryGetValue(keyToUse, out serializedHeaderDefinition)) { headerDefinition = HeaderDefinitionMetadata.FromRaw(Convert.FromBase64String(serializedHeaderDefinition)); } return(headerDefinition); }
/// <summary> /// Ordered fetch by checkpoint /// </summary> /// <param name="checkpointToken"></param> /// <returns>this method will get very slow as the number of aggregates increase</returns> public IEnumerable <ICommit> GetFrom(string checkpointToken = null) { var containers = _blobClient.ListContainers(EventSourcePrefix); var allCommitDefinitions = new List <Tuple <WrappedPageBlob, PageBlobCommitDefinition> >(); foreach (var container in containers) { var blobs = WrappedPageBlob.GetAllMatchinPrefix(container, GetContainerName()); // this could be a tremendous amount of data. Depending on system used // this may not be performant enough and may require some sort of index be built. foreach (var pageBlob in blobs) { HeaderDefinitionMetadata headerDefinitionMetadata = null; var header = GetHeaderWithRetry(pageBlob, out headerDefinitionMetadata); foreach (var definition in header.PageBlobCommitDefinitions) { allCommitDefinitions.Add( new Tuple <WrappedPageBlob, PageBlobCommitDefinition>( pageBlob, definition)); } } } // now sort the definitions so we can return out sorted var orderedCommitDefinitions = allCommitDefinitions.OrderBy((x) => x.Item2.Checkpoint); foreach (var orderedCommitDefinition in orderedCommitDefinitions) { yield return(CreateCommitFromDefinition(orderedCommitDefinition.Item1, orderedCommitDefinition.Item2)); } }
/// <summary> /// Gets the deserialized header from the blob. Uses /// </summary> /// <param name="blob">The Blob.</param> /// <param name="assumedValidHeaderDefinition">will output the header definition that is valid, null if there is not currently one</param> /// <returns>A populated StreamBlobHeader.</returns> private StreamBlobHeader GetHeaderWithRetry(WrappedPageBlob blob, out HeaderDefinitionMetadata validHeaderDefinition) { HeaderDefinitionMetadata assumedValidHeaderDefinition = null; StreamBlobHeader header = null; Exception lastException = null; // first, if the primary header metadata key does not exist then we default if (!blob.Metadata.ContainsKey(PrimaryHeaderDefinitionKey)) { assumedValidHeaderDefinition = new HeaderDefinitionMetadata(); header = new StreamBlobHeader(); } // do the fallback logic to try and find a valid header if (header == null) { for (var i = 0; i != 3; ++i) { assumedValidHeaderDefinition = GetHeaderDefinitionMetadata(blob, i); header = SafeGetHeader(blob, assumedValidHeaderDefinition, out lastException); if (header != null) { break; } } } // It is possible we will still have no header here and still be in an ok state. This is a case where the aggregates first // commit set some metadata (specifically) the PrimaryHeaderDefinitionKey, but then failed to write the header. This case // will have a PrimaryHeaderDefinitionKey but no secondary or terciary. In addition it will have a key of first write succeeded // set to false. If it does not have any key at all, it is a "legacy" one and will get the key set upon a future successful write. // legacy ones with issue will continue to fail and require manual intervention currently if (header == null) { string firstWriteCompleted; if (blob.Metadata.TryGetValue(FirstWriteCompletedKey, out firstWriteCompleted)) { if (firstWriteCompleted == "f") { header = new StreamBlobHeader(); assumedValidHeaderDefinition = new HeaderDefinitionMetadata(); } } } if (header != null) { validHeaderDefinition = assumedValidHeaderDefinition; return(header); } throw lastException ?? new Exception("No header could be created");; }
/// <summary> /// Gets commits from a given blobEntry and stream id that fall within min and max revisions. /// </summary> /// <param name="bucketId">The blobEntry id to pull from.</param> /// <param name="streamId">The stream id.</param> /// <param name="minRevision">The minimum revision.</param> /// <param name="maxRevision">The maximum revision.</param> /// <returns></returns> public IEnumerable <ICommit> GetFrom(string bucketId, string streamId, int minRevision, int maxRevision) { var pageBlob = WrappedPageBlob.CreateNewIfNotExists(_primaryContainer, bucketId + "/" + streamId, _options.BlobNumPages); HeaderDefinitionMetadata headerDefinitionMetadata = null; var header = GetHeaderWithRetry(pageBlob, out headerDefinitionMetadata); // find out how many pages we are reading var startPage = 0; var endPage = startPage + 1; var startIndex = 0; var numberOfCommits = 0; foreach (var commitDefinition in header.PageBlobCommitDefinitions) { if (minRevision > commitDefinition.Revision) { ++startIndex; startPage += commitDefinition.TotalPagesUsed; } else if (maxRevision < commitDefinition.Revision) { break; } else { ++numberOfCommits; } endPage += commitDefinition.TotalPagesUsed; } // download all the data var downloadedData = pageBlob.DownloadBytes(startPage * BlobPageSize, endPage * BlobPageSize); // process the downloaded data var commits = new List <ICommit>(); for (var i = startIndex; i != startIndex + numberOfCommits; i++) { var commitStartIndex = (header.PageBlobCommitDefinitions[i].StartPage - startPage) * BlobPageSize; var commitSize = header.PageBlobCommitDefinitions[i].DataSizeBytes; using (var ms = new MemoryStream(downloadedData, commitStartIndex, commitSize, false)) { var commit = _serializer.Deserialize <AzureBlobCommit>(ms); commits.Add(CreateCommitFromAzureBlobCommit(commit)); } } return(commits); }
/// <summary> /// Adds a commit to a stream. /// </summary> /// <param name="attempt">The commit attempt to be added.</param> /// <returns>An Commit if successful.</returns> public ICommit Commit(CommitAttempt attempt) { var pageBlob = WrappedPageBlob.CreateNewIfNotExists(_primaryContainer, attempt.BucketId + "/" + attempt.StreamId, _options.BlobNumPages); HeaderDefinitionMetadata headerDefinitionMetadata = null; var header = GetHeaderWithRetry(pageBlob, out headerDefinitionMetadata); // we must commit at a page offset, we will just track how many pages in we must start writing at var startPage = 0; foreach (var commit in header.PageBlobCommitDefinitions) { if (commit.CommitId == attempt.CommitId) { throw new DuplicateCommitException("Duplicate Commit Attempt"); } startPage += commit.TotalPagesUsed; } if (attempt.CommitSequence <= header.LastCommitSequence) { throw new ConcurrencyException("Concurrency exception in Commit"); } var blobCommit = new AzureBlobCommit { BucketId = attempt.BucketId, CommitId = attempt.CommitId, CommitSequence = attempt.CommitSequence, CommitStampUtc = attempt.CommitStamp, Events = attempt.Events.ToList(), Headers = attempt.Headers, StreamId = attempt.StreamId, StreamRevision = attempt.StreamRevision, Checkpoint = GetNextCheckpoint() }; var serializedBlobCommit = _serializer.Serialize(blobCommit); header.AppendPageBlobCommitDefinition(new PageBlobCommitDefinition(serializedBlobCommit.Length, attempt.CommitId, attempt.StreamRevision, attempt.CommitStamp, header.PageBlobCommitDefinitions.Count, startPage, blobCommit.Checkpoint)); ++header.UndispatchedCommitCount; header.LastCommitSequence = attempt.CommitSequence; CommitNewMessage(pageBlob, serializedBlobCommit, header, headerDefinitionMetadata, startPage * BlobPageSize); return(CreateCommitFromAzureBlobCommit(blobCommit)); }
/// <summary> /// Marks a stream Id's commit as dispatched. /// </summary> /// <param name="commit">The commit object to mark as dispatched.</param> public void MarkCommitAsDispatched(ICommit commit) { AddCheckpointTableEntry(commit); var pageBlob = WrappedPageBlob.GetAssumingExists(_primaryContainer, commit.BucketId + "/" + commit.StreamId); HeaderDefinitionMetadata headerDefinition = null; var header = GetHeaderWithRetry(pageBlob, out headerDefinition); // we must commit at a page offset, we will just track how many pages in we must start writing at foreach (var commitDefinition in header.PageBlobCommitDefinitions) { if (commit.CommitId == commitDefinition.CommitId) { commitDefinition.IsDispatched = true; --header.UndispatchedCommitCount; } } CommitNewMessage(pageBlob, null, header, headerDefinition, headerDefinition.HeaderStartLocationOffsetBytes); }
/// <summary> /// Writes to the page blob /// </summary> /// <param name="pageDataWithHeaderAligned">data to write, aligned with the header appended to it.</param> /// <param name="startOffsetAligned">where writing will start (aligned)</param> /// <param name="currentHeaderDefinition">non aligned offset where the new header will be written</param> /// <param name="newHeaderOffsetBytesNonAligned">start index for where the new header will be written (not aligned)</param> internal void Write(Stream pageDataWithHeaderAligned, int startOffsetAligned, int newHeaderOffsetBytesNonAligned, HeaderDefinitionMetadata currentHeaderDefinition) { try { Logger.Verbose("Writing [{0}] bytes for blob [{1}], etag [{2}]", pageDataWithHeaderAligned.Length, _pageBlob.Uri, _pageBlob.Properties.ETag); // If our entire payload is less than four megabytes we can write this operation in a single commit. // otherwise we must chunk requiring for some more complex managment of our header data const int maxSingleWriteSizeBytes = 1024 * 1024 * 4; if (pageDataWithHeaderAligned.Length <= maxSingleWriteSizeBytes) { _pageBlob.WritePages(pageDataWithHeaderAligned, startOffsetAligned, null, AccessCondition.GenerateIfMatchCondition(_pageBlob.Properties.ETag)); } else { // if there is no header yet, then we have nothing to do around saving off the old header. if (currentHeaderDefinition.HeaderSizeInBytes != 0) { // the first thing we must do is copy the old header to the new assumed location. var seralizedHeader = this.DownloadBytes(currentHeaderDefinition.HeaderStartLocationOffsetBytes, currentHeaderDefinition.HeaderStartLocationOffsetBytes + currentHeaderDefinition.HeaderSizeInBytes); // get the start location where we will write the header. must be page aligned var emptyFirstBytesCount = newHeaderOffsetBytesNonAligned % 512; var headerAlignedStartOffsetBytes = newHeaderOffsetBytesNonAligned - emptyFirstBytesCount; var alignedBytesRequired = GetPageAlignedSize(emptyFirstBytesCount + currentHeaderDefinition.HeaderSizeInBytes); var alignedSerializedHeader = new byte[alignedBytesRequired]; Array.Copy(seralizedHeader, 0, alignedSerializedHeader, emptyFirstBytesCount, seralizedHeader.Length); using (var temp = new MemoryStream(alignedSerializedHeader, false)) { _pageBlob.WritePages(temp, headerAlignedStartOffsetBytes, null, AccessCondition.GenerateIfMatchCondition(_pageBlob.Properties.ETag)); } } var allocatedFourMegs = new byte[maxSingleWriteSizeBytes]; var lastAmountRead = 0; int currentOffset = startOffsetAligned; // our last write must be at least newHeaderSize in size otherwise we run a risk of having a partial header var newHeaderSize = startOffsetAligned + pageDataWithHeaderAligned.Length - newHeaderOffsetBytesNonAligned; var remainingBytesToWrite = pageDataWithHeaderAligned.Length; while (remainingBytesToWrite != 0) { var amountToWrite = maxSingleWriteSizeBytes; var potentialRemaining = remainingBytesToWrite - amountToWrite; if (potentialRemaining < 0) { potentialRemaining = remainingBytesToWrite; } if (potentialRemaining < newHeaderSize) { int howMuchLessWeNeedToWriteAligned = (int)(newHeaderSize - potentialRemaining); howMuchLessWeNeedToWriteAligned = GetPageAlignedSize(howMuchLessWeNeedToWriteAligned); amountToWrite -= howMuchLessWeNeedToWriteAligned; } lastAmountRead = pageDataWithHeaderAligned.Read(allocatedFourMegs, 0, amountToWrite); remainingBytesToWrite -= lastAmountRead; using (var tempStream = new MemoryStream(allocatedFourMegs, 0, lastAmountRead, false, false)) { _pageBlob.WritePages(tempStream, currentOffset, null, AccessCondition.GenerateIfMatchCondition(_pageBlob.Properties.ETag)); } currentOffset += lastAmountRead; } } Logger.Verbose("Wrote [{0}] bytes for blob [{1}], etag [{2}]", pageDataWithHeaderAligned.Length, _pageBlob.Uri, _pageBlob.Properties.ETag); } catch (AzureStorageException ex) { throw HandleAndRemapCommonExceptions(ex); } }
/// <summary> /// Commits the header information which essentially commits any transactions that occurred /// related to that header. /// </summary> /// <param name="newCommit">the new commit to write</param> /// <param name="blob">blob header applies to</param> /// <param name="updatedHeader">the new header to be serialized out</param> /// <param name="currentGoodHeaderDefinition">the definition for the current header, before this change is committed</param> /// <param name="nonAlignedBytesUsedAlready">non aligned offset of index where last commit data is stored (not inclusive of header)</param> /// <returns></returns> private void CommitNewMessage(WrappedPageBlob blob, byte[] newCommit, StreamBlobHeader updatedHeader, HeaderDefinitionMetadata currentGoodHeaderDefinition, int nonAlignedBytesUsedAlready) { newCommit = newCommit ?? new byte[0]; var serializedHeader = _serializer.Serialize(updatedHeader); var writeStartLocationAligned = GetPageAlignedSize(nonAlignedBytesUsedAlready); var amountToWriteAligned = GetPageAlignedSize(serializedHeader.Length + newCommit.Length); var totalSpaceNeeded = writeStartLocationAligned + amountToWriteAligned; var newHeaderStartLocationNonAligned = writeStartLocationAligned + newCommit.Length; var totalBlobLength = blob.Properties.Length; if (totalBlobLength < totalSpaceNeeded) { blob.Resize(totalSpaceNeeded); totalBlobLength = blob.Properties.Length; } // set the header definition to make it all official var isFirstWrite = currentGoodHeaderDefinition.HeaderSizeInBytes == 0; var headerDefinitionMetadata = new HeaderDefinitionMetadata(); headerDefinitionMetadata.HeaderSizeInBytes = serializedHeader.Length; headerDefinitionMetadata.HeaderStartLocationOffsetBytes = writeStartLocationAligned + newCommit.Length; blob.Metadata[IsEventStreamAggregateKey] = "yes"; blob.Metadata[HasUndispatchedCommitsKey] = updatedHeader.PageBlobCommitDefinitions.Any((x) => !x.IsDispatched).ToString(); if (!isFirstWrite) { blob.Metadata[SecondaryHeaderDefinitionKey] = Convert.ToBase64String(currentGoodHeaderDefinition.GetRaw()); // this is a thirt layer backup in the case we have a issue in the middle of this upcoming write operation. var tempHeaderDefinition = currentGoodHeaderDefinition.Clone(); tempHeaderDefinition.HeaderStartLocationOffsetBytes = newHeaderStartLocationNonAligned; blob.Metadata[TertiaryHeaderDefintionKey] = Convert.ToBase64String(tempHeaderDefinition.GetRaw()); blob.Metadata[FirstWriteCompletedKey] = "t"; } else { blob.Metadata[FirstWriteCompletedKey] = "f"; } blob.Metadata[PrimaryHeaderDefinitionKey] = Convert.ToBase64String(headerDefinitionMetadata.GetRaw()); blob.SetMetadata(); using (var ms = CreateAndFillStreamAligned(amountToWriteAligned, newCommit, serializedHeader)) { blob.Write(ms, writeStartLocationAligned, newHeaderStartLocationNonAligned, currentGoodHeaderDefinition); } // we pay the cost of an extra call for our first ever write (this is effectively creation of the aggregate. // we do this because we actually host our header in the blob, but the reference to that header in our metadata. // we set the metadata with the potential states prior to actually writing the new header. If this was the first // ever write and we set the metadata, but then fail to write the header, we can get in a state where the aggregate // becomes unusable because it believes there should be a header according to the metadata. // For that reason we must record when our first write completes if (isFirstWrite) { blob.Metadata[FirstWriteCompletedKey] = "t"; blob.SetMetadata(); } }
/// <summary> /// Gets all undispatched commits across all buckets. /// </summary> /// <returns>A list of all undispatched commits.</returns> public IEnumerable <ICommit> GetUndispatchedCommits() { Logger.Info("Getting undispatched commits. This is only done during initialization. This may take a while..."); var allCommitDefinitions = new List <Tuple <WrappedPageBlob, PageBlobCommitDefinition> >(); // this container is fetched lazily. so actually filtering down at this level will improve our performance, // assuming the options dictate a date range that filters down our set. var pageBlobs = WrappedPageBlob.GetAllMatchinPrefix(_primaryContainer, null); Logger.Info("Checking [{0}] blobs for undispatched commits... this may take a while", pageBlobs.Count()); // this could be a tremendous amount of data. Depending on system used // this may not be performant enough and may require some sort of index be built. foreach (var pageBlob in pageBlobs) { var temp = pageBlob; if (temp.Metadata.ContainsKey(IsEventStreamAggregateKey)) { // we only care about guys who may be dirty var isDirty = false; string isDirtyString; if (temp.Metadata.TryGetValue(HasUndispatchedCommitsKey, out isDirtyString)) { isDirty = bool.Parse(isDirtyString); } if (isDirty) { Logger.Info("undispatched commit possibly found with aggregate [{0}]", temp.Name); // Because fetching the header for a specific blob is a two phase operation it may take a couple tries if we are working with the // blob. This is just a quality of life improvement for the user of the store so loading of the store does not hit frequent optimistic // concurrency hits that cause the store to have to re-initialize. var maxTries = 0; while (true) { try { HeaderDefinitionMetadata headerDefinitionMetadata = null; var header = GetHeaderWithRetry(temp, out headerDefinitionMetadata); var wasActuallyDirty = false; if (header.UndispatchedCommitCount > 0) { foreach (var definition in header.PageBlobCommitDefinitions) { if (!definition.IsDispatched) { Logger.Warn("Found undispatched commit for stream [{0}] revision [{1}]", temp.Name, definition.Revision); wasActuallyDirty = true; allCommitDefinitions.Add(new Tuple <WrappedPageBlob, PageBlobCommitDefinition>(temp, definition)); } } } if (!wasActuallyDirty) { temp.Metadata[HasUndispatchedCommitsKey] = false.ToString(); temp.SetMetadata(); } break; } catch (ConcurrencyException) { if (maxTries++ > 20) { Logger.Error("Reached max tries for getting undispatched commits and keep receiving concurrency exception. throwing out."); throw; } else { Logger.Info("Concurrency issue detected while processing undispatched commits. going to retry to load container"); try { temp = WrappedPageBlob .GetAllMatchinPrefix(_primaryContainer, pageBlob.Name) .Single(); } catch (Exception ex) { Logger.Warn("Attempted to reload during concurrency and failed... will retry. [{0}]", ex.Message); } } } catch (CryptographicException ex) { Logger.Fatal("Received a CryptographicException while processing aggregate with id [{0}]. The header is possibly be corrupt. Error is [{1}]", pageBlob.Name, ex.ToString()); break; } catch (InvalidHeaderDataException ex) { Logger.Fatal("Received a InvalidHeaderDataException while processing aggregate with id [{0}]. The header is possibly be corrupt. Error is [{1}]", pageBlob.Name, ex.ToString()); break; } } } } } // now sort the definitions so we can return out sorted Logger.Info("Found [{0}] undispatched commits", allCommitDefinitions.Count); var orderedCommitDefinitions = allCommitDefinitions.OrderBy((x) => x.Item2.Checkpoint); foreach (var orderedCommitDefinition in orderedCommitDefinitions) { yield return(CreateCommitFromDefinition(orderedCommitDefinition.Item1, orderedCommitDefinition.Item2)); } }