/// <summary> /// Parses one Bitcoin block. /// </summary> /// <param name="blockchainFileName"> /// The name of the blockchain file that contains the block being parsed. /// </param> /// <param name="binaryReader"> /// Provides access to a Bitcoin blockchain file. /// </param> private ParserBlock ParseBlockchainFile(string blockchainFileName, BinaryReader binaryReader) { // There are some rare situations where a block is preceded by a section containing zero bytes. if (binaryReader.SkipZeroBytes() == false) { // We reached the end of the file. There is no block to be parsed. return(null); } UInt32 blockId = binaryReader.ReadUInt32(); if (blockId != this.blockMagicId) { throw new InvalidBlockchainContentException(string.Format(CultureInfo.InvariantCulture, "Invalid block Id: {0:X}. Expected: {1:X}", blockId, this.blockMagicId)); } int blockLength = (int)binaryReader.ReadUInt32(); byte[] blockBuffer = binaryReader.ReadBytes(blockLength); using (BlockMemoryStreamReader blockMemoryStreamReader = new BlockMemoryStreamReader(blockBuffer)) { return(BlockchainParser.InternalParseBlockchainFile(blockBuffer, blockchainFileName, blockMemoryStreamReader)); } }
/// <summary> /// Parses one Bitcoin block except for a few fields before the actual block header. /// </summary> /// <param name="blockchainFileName"> /// The name of the blockchain file that contains the block being parsed. /// </param> /// <param name="blockMemoryStreamReader"> /// Provides access to a section of the Bitcoin blockchain file. /// </param> private static Block InternalParseBlockchainFile(string blockchainFileName, BlockMemoryStreamReader blockMemoryStreamReader) { BlockHeader blockHeader = BlockchainParser.ParseBlockHeader(blockMemoryStreamReader); Block block = new Block(blockchainFileName, blockHeader); int blockTransactionCount = (int)blockMemoryStreamReader.ReadVariableLengthInteger(); for (int transactionIndex = 0; transactionIndex < blockTransactionCount; transactionIndex++) { Transaction transaction = BlockchainParser.ParseTransaction(blockMemoryStreamReader); block.AddTransaction(transaction); } return(block); }
/// <summary> /// Parses a Bitcoin transaction. /// </summary> /// <param name="blockMemoryStreamReader"> /// Provides access to a section of the Bitcoin blockchain file. /// </param> /// <returns> /// The Bitcoin transaction that was parsed. /// </returns> private static Transaction ParseTransaction(BlockMemoryStreamReader blockMemoryStreamReader) { Transaction transaction = new Transaction(); int positionInBaseStreamAtTransactionStart = (int)blockMemoryStreamReader.BaseStream.Position; transaction.TransactionVersion = blockMemoryStreamReader.ReadUInt32(); int inputsCount = (int)blockMemoryStreamReader.ReadVariableLengthInteger(); for (int inputIndex = 0; inputIndex < inputsCount; inputIndex++) { TransactionInput transactionInput = BlockchainParser.ParseTransactionInput(blockMemoryStreamReader); transaction.AddInput(transactionInput); } int outputsCount = (int)blockMemoryStreamReader.ReadVariableLengthInteger(); for (int outputIndex = 0; outputIndex < outputsCount; outputIndex++) { TransactionOutput transactionOutput = BlockchainParser.ParseTransactionOutput(blockMemoryStreamReader); transaction.AddOutput(transactionOutput); } // TODO: Need to find out more details about the semantic of TransactionLockTime. transaction.TransactionLockTime = blockMemoryStreamReader.ReadUInt32(); int positionInBaseStreamAfterTransactionEnd = (int)blockMemoryStreamReader.BaseStream.Position; using (SHA256Managed sha256 = new SHA256Managed()) { //// We need to calculate the double SHA256 hash of this transaction. //// We need to access the buffer that contains the transaction that we jut read through. //// Here we take advantage of the fact that the entire block was loaded as an in-memory buffer. //// The base stream of blockMemoryStreamReader is that in-memory buffer. byte[] baseBuffer = blockMemoryStreamReader.GetBuffer(); int transactionBufferSize = positionInBaseStreamAfterTransactionEnd - positionInBaseStreamAtTransactionStart; byte[] hash1 = sha256.ComputeHash(baseBuffer, positionInBaseStreamAtTransactionStart, transactionBufferSize); transaction.TransactionHash = new ByteArray(sha256.ComputeHash(hash1).ReverseByteArray()); } return(transaction); }
private static void ParseBlockchainFiles(string pathToBlockchain) { BlockchainStatistics overallStatistics = new BlockchainStatistics(); BlockchainStatistics blockFileStatistics = new BlockchainStatistics(); string currentBlockchainFile = null; // Instantiate a BlockchainParser. We will pass the path to the blockchain files // to its constructor. // TIP: Class IBlockchainParser provides several constructors that are useful // in different scenarios. IBlockchainParser blockchainParser = new BlockchainParser(pathToBlockchain); // Start the parsing process by calling blockchainParser.ParseBlockchain() and // process each block that is returned by the parser. // The parser exposes the blocks it parses via an "IEnumerable<Block>". // TIPS: // 1. An instance of type BitcoinBlockchain.Data.Block holds information // about all its transactions, inputs and outputs and it can use a lot of memory. // After you are done processing a block do not keep it around in memory. // For example do not simply collect all instances of type BitcoinBlockchain.Data.Block // in a list. That would consume huge amounts of memory. // // 2. To improve the performance of your application you may want to dispatch the processing // of a block on a background thread. // If you do that however you need to account for the fact that multiple blocks will // be processed concurrently. You have to be prepared to deal with various multi-threading // aspects. For example a transaction input may end up being processed before the output // it links to. You may want to consider a hybrid approach where some of the processing // for a block is done on the main thread and some of the processing is dispatched on a // background thread. // // 3. If during processing you need to store so much information that you expect to // exceed 2 GB of memory, build your tool for the x64 configuration. // // 4. Make sure that you are aware of the concept of stale blocks. // Depending on what your processing does, not accounting for stale blocks could // lead to incorrect results. The parser has no way to know that a block is stale // when it encounters it. It will enumerate it to you and you will have the chance // to detect the stale blocks once the parsing of all blocks is complete. // See: // https://bitcoin.org/en/developer-guide#orphan-blocks // https://bitcoin.org/en/glossary/stale-block // https://bitcoin.org/en/glossary/orphan-block // http://bitcoin.stackexchange.com/questions/5859/what-are-orphaned-and-stale-blocks foreach (Block block in blockchainParser.ParseBlockchain()) { if (currentBlockchainFile != block.BlockchainFileName) { if (currentBlockchainFile != null) { ReportBlockChainStatistics(blockFileStatistics); blockFileStatistics.Reset(); } currentBlockchainFile = block.BlockchainFileName; Console.WriteLine("Parsing file: {0}", block.BlockchainFileName); } blockFileStatistics.AddStatistics(1, block.Transactions.Count, block.TransactionInputsCount, block.TransactionOutputsCount); overallStatistics.AddStatistics(1, block.Transactions.Count, block.TransactionInputsCount, block.TransactionOutputsCount); } ReportBlockChainStatistics(blockFileStatistics); Console.WriteLine("================================================="); Console.WriteLine("Overall statistics:"); ReportBlockChainStatistics(overallStatistics); }
/// <summary> /// Parses a Bitcoin transaction. /// </summary> /// <param name="blockMemoryStreamReader"> /// Provides access to a section of the Bitcoin blockchain file. /// </param> /// <returns> /// The Bitcoin transaction that was parsed. /// </returns> private static Transaction ParseTransaction(BlockMemoryStreamReader blockMemoryStreamReader) { Transaction transaction = new Transaction(); int positionInBaseStreamAtTransactionStart = (int)blockMemoryStreamReader.BaseStream.Position; transaction.TransactionVersion = blockMemoryStreamReader.ReadUInt32(); int inputsCount = (int)blockMemoryStreamReader.ReadVariableLengthInteger(); bool isSegWit = false; if (inputsCount == 0) { byte flag = blockMemoryStreamReader.ReadByte(); if (flag != 0x01) { throw new InvalidBlockchainContentException(string.Format(CultureInfo.InvariantCulture, "Unknown transaction serialization. No input transactions, but SegWit flag was {0} instead of 1.", flag)); } inputsCount = (int)blockMemoryStreamReader.ReadVariableLengthInteger(); isSegWit = true; } for (int inputIndex = 0; inputIndex < inputsCount; inputIndex++) { TransactionInput transactionInput = BlockchainParser.ParseTransactionInput(blockMemoryStreamReader); transaction.AddInput(transactionInput); } int outputsCount = (int)blockMemoryStreamReader.ReadVariableLengthInteger(); for (int outputIndex = 0; outputIndex < outputsCount; outputIndex++) { TransactionOutput transactionOutput = BlockchainParser.ParseTransactionOutput(blockMemoryStreamReader); transaction.AddOutput(transactionOutput); } int positionInBaseStreamAfterTxOuts = (int)blockMemoryStreamReader.BaseStream.Position; if (isSegWit) { for (int inputIndex = 0; inputIndex < inputsCount; inputIndex++) { Witness witness = BlockchainParser.ParseWitness(blockMemoryStreamReader); transaction.AddWitness(witness); } } // TODO: Need to find out more details about the semantic of TransactionLockTime. transaction.TransactionLockTime = blockMemoryStreamReader.ReadUInt32(); int positionInBaseStreamAfterTransactionEnd = (int)blockMemoryStreamReader.BaseStream.Position; using (SHA256Managed sha256 = new SHA256Managed()) { //// We need to calculate the double SHA256 hash of this transaction. //// We need to access the buffer that contains the transaction that we jut read through. //// Here we take advantage of the fact that the entire block was loaded as an in-memory buffer. //// The base stream of blockMemoryStreamReader is that in-memory buffer. //byte[] baseBuffer = blockMemoryStreamReader.GetBuffer(); //int transactionBufferSize = positionInBaseStreamAfterTransactionEnd - positionInBaseStreamAtTransactionStart; byte[] baseBuffer = blockMemoryStreamReader.GetBuffer(), hash1 = null; if (isSegWit) { using (SHA256Managed innerSHA256 = new SHA256Managed()) { //// SegWit transactions are still identified by their txid, which is double SHA256 of the old //// serialization format (i.e. no marker, flag, or witness). So, we need to calculate the txid by //// recreating the old format as the input to the hash algorithm. // First, the version number innerSHA256.TransformBlock(baseBuffer, positionInBaseStreamAtTransactionStart, 4, baseBuffer, positionInBaseStreamAtTransactionStart); // Skip the marker and flag (each one byte), then read in txins and txouts (starting with txin count) int txStart = positionInBaseStreamAtTransactionStart + 6; int txSize = positionInBaseStreamAfterTxOuts - txStart; innerSHA256.TransformBlock(baseBuffer, txStart, txSize, baseBuffer, txStart); ///// After the transactions comes the segregated witness data, which is not included in the txid. ///// The only thing left to add to calcualte the txid is nLockTime located in the last 4 bytes int lockTimeStart = positionInBaseStreamAfterTransactionEnd - 4; innerSHA256.TransformFinalBlock(baseBuffer, lockTimeStart, 4); hash1 = innerSHA256.Hash; } } else { int transactionBufferSize = positionInBaseStreamAfterTransactionEnd - positionInBaseStreamAtTransactionStart; hash1 = sha256.ComputeHash(baseBuffer, positionInBaseStreamAtTransactionStart, transactionBufferSize); } // byte[] hash1 = sha256.ComputeHash(baseBuffer, positionInBaseStreamAtTransactionStart, transactionBufferSize); transaction.TransactionHash = new ByteArray(sha256.ComputeHash(hash1).ReverseByteArray()); } return(transaction); }
private async Task TransferBlockchainDataAsync(string lastKnownBlockchainFileName, bool newDatabase) { DatabaseIdManager databaseIdManager = this.GetDatabaseIdManager(); TaskDispatcher taskDispatcher = new TaskDispatcher(this.parameters.Threads); // What if we use 1 thread now that we use bulk copy? IBlockchainParser blockchainParser; if (this.blockchainParserFactory == null) { blockchainParser = new BlockchainParser(this.parameters.BlockchainPath, lastKnownBlockchainFileName); } else { blockchainParser = this.blockchainParserFactory(); } if (this.parameters.BlockId != null) { blockchainParser.SetBlockId(this.parameters.BlockId.Value); } this.processingStatistics.ProcessingBlockchainStarting(); Stopwatch currentBlockchainFileStopwatch = new Stopwatch(); currentBlockchainFileStopwatch.Start(); SourceDataPipeline sourceDataPipeline = new SourceDataPipeline(); int blockFileId = -1; foreach (ParserData.Block block in blockchainParser.ParseBlockchain()) { if (this.currentBlockchainFile != block.BlockchainFileName) { if (this.currentBlockchainFile != null) { this.FinalizeBlockchainFileProcessing(currentBlockchainFileStopwatch); currentBlockchainFileStopwatch.Restart(); } this.lastReportedPercentage = -1; blockFileId = databaseIdManager.GetNextBlockchainFileId(1); this.ProcessBlockchainFile(blockFileId, block.BlockchainFileName); this.currentBlockchainFile = block.BlockchainFileName; } this.ReportProgressReport(block.BlockchainFileName, block.PercentageOfCurrentBlockchainFile); // We instantiate databaseIdSegmentManager on the main thread and by doing this we'll guarantee that // the database primary keys are generated in a certain order. The primary keys in our tables will be // in the same order as the corresponding entities appear in the blockchain. For example, with the // current implementation, the block ID will be the block depth as reported by http://blockchain.info/. DatabaseIdSegmentManager databaseIdSegmentManager = new DatabaseIdSegmentManager(databaseIdManager, 1, block.Transactions.Count, block.TransactionInputsCount, block.TransactionOutputsCount); this.processingStatistics.AddBlocksCount(1); this.processingStatistics.AddTransactionsCount(block.Transactions.Count); this.processingStatistics.AddTransactionInputsCount(block.TransactionInputsCount); this.processingStatistics.AddTransactionOutputsCount(block.TransactionOutputsCount); int blockFileId2 = blockFileId; ParserData.Block block2 = block; // Dispatch the work of "filling the source pipeline" to an available background thread. // Note: The await awaits only until the work is dispatched and not until the work is completed. // Dispatching the work itself may take a while if all available background threads are busy. await taskDispatcher.DispatchWorkAsync(() => sourceDataPipeline.FillBlockchainPipeline(blockFileId2, block2, databaseIdSegmentManager)); await this.TransferAvailableData(taskDispatcher, sourceDataPipeline); } // Wait for the last remaining background tasks if any that are still executing // sourceDataPipeline.FillBlockchainPipeline or the SQL bulk copy to finish. await taskDispatcher.WaitForAllWorkToComplete(); // Instruct sourceDataPipeline to transfer all remaining data to the available data queue. // IMPORTANT: do not call this while there could still be threads executing sourceDataPipeline.FillBlockchainPipeline. sourceDataPipeline.Flush(); // Now trigger the SQL bulk copy for the data that remains. await this.TransferAvailableData(taskDispatcher, sourceDataPipeline); // Wait for the last remaining background tasks if any that are still executing // the SQL bulk copy to finish. await taskDispatcher.WaitForAllWorkToComplete(); this.FinalizeBlockchainFileProcessing(currentBlockchainFileStopwatch); }