private void MapOne(Dictionary <string, object> obj) { // [run_id] [smallint] NOT NULL BulkWriter.WriteSmallInt(RunID); // [tweet_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "id")); // [user_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "user.id")); // [retweeted_tweet_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "retweeted_status.id")); // [retweeted_user_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "retweeted_status.user.id")); // [created_at] [datetime] NOT NULL BulkWriter.WriteDateTime(JsonUtil.GetDateTime(obj, "created_at")); // [retweeted_at] [datetime] NOT NULL BulkWriter.WriteDateTime(JsonUtil.GetDateTime(obj, "retweeted_status.created_at")); BulkWriter.EndLine(); }
public async Task CanWriteSyncWithExistingConnectionAndTransaction() { string tableName = DropCreate(nameof(BulkWriterTestsMyTestClass)); using (var connection = new SqlConnection(_connectionString)) { await connection.OpenAsync(); using (var transaction = connection.BeginTransaction()) { var writer = new BulkWriter <BulkWriterTestsMyTestClass>(connection, transaction); var items = Enumerable.Range(1, 1000) .Select(i => new BulkWriterTestsMyTestClass { Id = i, Name = "Bob" }); writer.WriteToDatabase(items); var count = (int)await TestHelpers.ExecuteScalar(connection, $"SELECT COUNT(1) FROM {tableName}", transaction); Assert.Equal(1000, count); transaction.Rollback(); count = (int)await TestHelpers.ExecuteScalar(connection, $"SELECT COUNT(1) FROM {tableName}"); Assert.Equal(0, count); } } }
public async Task Should_Handle_Column_VarBinary_Large() { string tableName = nameof(MyTestClassForVarBinary); TestHelpers.ExecuteNonQuery(_connectionString, $"DROP TABLE IF EXISTS [dbo].[{tableName}]"); TestHelpers.ExecuteNonQuery(_connectionString, "CREATE TABLE [dbo].[" + tableName + "](" + "[Id] [int] IDENTITY(1,1) NOT NULL," + "[Data] [varbinary](MAX) NULL," + "CONSTRAINT [PK_" + tableName + "] PRIMARY KEY CLUSTERED ([Id] ASC)" + ")"); var writer = new BulkWriter <MyTestClassForVarBinary>(_connectionString); var items = new[] { new MyTestClassForVarBinary { Id = 1, Data = new byte[1024 * 1024 * 1] } }; new Random().NextBytes(items.First().Data); writer.WriteToDatabase(items); var count = (int)await TestHelpers.ExecuteScalar(_connectionString, $"SELECT COUNT(1) FROM {tableName}"); var data = (byte[])await TestHelpers.ExecuteScalar(_connectionString, $"SELECT TOP 1 Data FROM {tableName}"); Assert.Equal(items.First().Data, data); Assert.Equal(1, count); }
public async Task PartiallyCompletesWhenAStepThrows() { var tableName = TestHelpers.DropCreate(nameof(PipelineTestsMyTestClass)); using (var writer = new BulkWriter <PipelineTestsMyTestClass>(_connectionString)) { var items = Enumerable.Range(1, 1000).Select(i => new PipelineTestsMyTestClass { Id = i, Name = "Bob" }); var pipeline = EtlPipeline .StartWith(items) .TransformInPlace(i => { if (i.Id > 500) { throw new Exception("Transform exception"); } i.Id -= 1; i.Name = $"Alice {i.Id}"; }) .WriteTo(writer); var pipelineTask = pipeline.ExecuteAsync(); await Assert.ThrowsAsync <Exception>(() => pipelineTask); var count = (int)await TestHelpers.ExecuteScalar(_connectionString, $"SELECT COUNT(1) FROM {tableName}"); Assert.Equal(500, count); } }
public async Task CanWriteSyncWithOptions() { var tableName = TestHelpers.DropCreate(nameof(BulkWriterTestsMyTestClass)); var tableNameWithKey = TestHelpers.DropCreate(nameof(BulkWriterTestsMyTestClassWithKey)); var writer = new BulkWriter <BulkWriterTestsMyTestClass>(_connectionString); var writerWithOptions = new BulkWriter <BulkWriterTestsMyTestClassWithKey>(_connectionString, SqlBulkCopyOptions.KeepIdentity); var items = Enumerable.Range(11, 20).Select(i => new BulkWriterTestsMyTestClass { Id = i, Name = "Bob" }); var itemsWithKey = Enumerable.Range(11, 20).Select(i => new BulkWriterTestsMyTestClassWithKey { Id = i, Name = "Bob" }); writer.WriteToDatabase(items); writerWithOptions.WriteToDatabase(itemsWithKey); var minId = (int)await TestHelpers.ExecuteScalar(_connectionString, $"SELECT MIN(Id) FROM {tableName}"); var minIdWithKey = (int)await TestHelpers.ExecuteScalar(_connectionString, $"SELECT MIN(Id) FROM {tableNameWithKey}"); Assert.Equal(1, minId); Assert.Equal(11, minIdWithKey); }
public async Task CanSetBulkCopyParameters() { const int timeout = 10; const int batchSize = 1000; var writer = new BulkWriter <BulkWriterInitializationTestsMyTestClass>(_connectionString) { BulkCopyTimeout = timeout, BatchSize = batchSize, BulkCopySetup = bcp => { Assert.Equal(timeout, bcp.BulkCopyTimeout); Assert.Equal(batchSize, bcp.BatchSize); } }; var items = Enumerable.Range(1, 10) .Select(i => new BulkWriterInitializationTestsMyTestClass { Id = i, Name = "Bob" }); writer.WriteToDatabase(items); var count = (int)await TestHelpers.ExecuteScalar(_connectionString, $"SELECT COUNT(1) FROM {_tableName}"); Assert.Equal(10, count); }
public async Task CanSetBulkCopyParameters_Respects_Table_Annotation() { string tableName = DropCreate("TestClass2"); const int timeout = 10; const int batchSize = 1000; bool setupCallbackInvoked = false; var writer = new BulkWriter <BulkWriterInitializationTestsMyTestClassAnnotation>(_connectionString) { BulkCopyTimeout = timeout, BatchSize = batchSize, BulkCopySetup = bcp => { setupCallbackInvoked = true; Assert.Equal("TestClass2", bcp.DestinationTableName); Assert.Equal(timeout, bcp.BulkCopyTimeout); Assert.Equal(batchSize, bcp.BatchSize); } }; var items = Enumerable.Range(1, 10) .Select(i => new BulkWriterInitializationTestsMyTestClassAnnotation { Id = i, Name = "Bob" }); writer.WriteToDatabase(items); var count = (int)await TestHelpers.ExecuteScalar(_connectionString, $"SELECT COUNT(1) FROM {tableName }"); Assert.Equal(10, count); Assert.True(setupCallbackInvoked); }
private void MapOne(Dictionary <string, object> obj) { ArrayList um = (ArrayList)JsonUtil.GetValue(obj, "entities.hashtags"); for (int i = 0; i < um.Count; i++) { // [run_id] [smallint] NOT NULL BulkWriter.WriteSmallInt(RunID); // [tweet_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "id")); // [user_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "user.id")); // [tag] [nvarchar](50) NOT NULL //string tag = (string)((Dictionary<string, object>)um[i])["text"]; //WriteTinyInt(tag.Substring(0, Math.Min(tag.Length, 50))); BulkWriter.WriteVarChar(JsonUtil.GetString(um[i], "text"), 50); // [created_at] [datetime] NOT NULL BulkWriter.WriteDateTime(JsonUtil.GetDateTime(obj, "created_at")); BulkWriter.EndLine(); } }
public override void RunAsync(CancellationToken cancellationToken) { var enumerable = _inCollection.GetConsumingEnumerable(cancellationToken); using (var bulkWriter = new BulkWriter <T>(_context.ConnectionString)) { bulkWriter.WriteToDatabase(enumerable); } }
public async Task BulkWriterAsyncEnumerable() { await using var sqlConnection = DbHelpers.OpenSqlConnection(); using var bulkWriter = new BulkWriter <DomainEntity>(sqlConnection) { BulkCopyTimeout = 0, BatchSize = 10000 }; var items = GetTestRecords().ToAsyncEnumerable(); await bulkWriter.WriteToDatabaseAsync(items); }
private void MapOne(Dictionary <string, object> obj) { // [run_id] [smallint] NOT NULL BulkWriter.WriteSmallInt(RunID); // [user_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "scrub_geo.user_id")); // [up_to_status_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "scrub_geo.up_to_status_id")); BulkWriter.EndLine(); }
private void MapOne(Dictionary <string, object> obj) { // [run_id] [smallint] NOT NULL BulkWriter.WriteSmallInt(RunID); // [tweet_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "delete.status.id")); // [user_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "delete.status.user_id")); BulkWriter.EndLine(); }
public async Task CanWriteSync() { var writer = new BulkWriter <BulkWriterTestsMyTestClass>(_connectionString); var items = Enumerable.Range(1, 1000).Select(i => new BulkWriterTestsMyTestClass { Id = i, Name = "Bob" }); writer.WriteToDatabase(items); var count = (int)await TestHelpers.ExecuteScalar(_connectionString, $"SELECT COUNT(1) FROM {_tableName}"); Assert.Equal(1000, count); }
private void WriteUrl(Dictionary <string, object> obj, ArrayList um) { if (um != null) { for (int i = 0; i < um.Count; i++) { string url = (string)((Dictionary <string, object>)um[i])["url"]; if (url.StartsWith("http://t.co/")) { url = url.Substring(12); } else if (url.StartsWith("http://t.co/")) { url = url.Substring(13); } else { continue; } // [run_id] [smallint] NOT NULL BulkWriter.WriteSmallInt(RunID); // [tweet_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "id")); // [user_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "user.id")); // [url_id] [char](8) NOT NULL BulkWriter.WriteChar(url, 8); // [created_at] [datetime] NOT NULL BulkWriter.WriteDateTime(JsonUtil.GetDateTime(obj, "created_at")); // [expanded_url] [varchar](8000) if (((Dictionary <string, object>)um[i]).ContainsKey("expanded_url")) { BulkWriter.WriteVarChar((string)((Dictionary <string, object>)um[i])["expanded_url"], 8000); } else { BulkWriter.WriteVarChar(null, 8000); } BulkWriter.EndLine(); } } }
public async Task Should_Handle_Both_Ordinal_And_ColumnName_For_Destination_Mapping() { string tableName = DropCreate(nameof(OrdinalAndColumnNameExampleType)); var writer = new BulkWriter <OrdinalAndColumnNameExampleType>(_connectionString); var items = new[] { new OrdinalAndColumnNameExampleType { Id = 1, Name2 = "Bob" } }; writer.WriteToDatabase(items); var count = (int)await TestHelpers.ExecuteScalar(_connectionString, $"SELECT COUNT(1) FROM {tableName}"); Assert.Equal(1, count); }
public async Task RaisesExceptionsForAllStepsThatThrow() { var tableName = TestHelpers.DropCreate(nameof(PipelineTestsMyTestClass)); using (var writer = new BulkWriter <PipelineTestsMyTestClass>(_connectionString)) { var items = Enumerable .Range(1, 1000) .Select(i => new PipelineTestsMyTestClass { Id = i, Name = "Bob" }) .ToAsyncEnumerable(); var pipeline = EtlPipeline .StartWith(items) .Project(i => { //pump a few values through to ensure the next pipeline step actually //gets run if (i.Id >= 400) { throw new Exception("Projection exception 1"); } return(i); }) .Project(i => { if (i.Id >= 200) { throw new Exception("Projection exception 2"); } return(i); }) .WriteTo(writer); var pipelineTask = pipeline.ExecuteAsync(); await Assert.ThrowsAsync <Exception>(() => pipelineTask); Assert.Equal(2, pipelineTask.Exception.InnerExceptions.Count); Assert.Equal(1, pipelineTask.Exception.InnerExceptions.Count(e => e.Message == "Projection exception 1")); Assert.Equal(1, pipelineTask.Exception.InnerExceptions.Count(e => e.Message == "Projection exception 2")); } }
private void WriteProjectDependencies(IEnumerable <Dependency> dependencies, IDictionary <string, int> projectMap) { var projectDependencyIntCounter = new IntCounter(-1); var projectDependencies = dependencies .Select(x => new ProjectDependency { Id = projectDependencyIntCounter.Next(), ProjectFromId = projectMap[x.ProjectName], ProjectToId = projectMap[x.Id], Version = x.Version, TargetFramework = x.Framework, Type = x.Type.ToString() }); using (var bulkWriter = new BulkWriter <ProjectDependency>(_connectionString)) { bulkWriter.WriteToDatabase(projectDependencies); } }
public async Task CanWriteSyncWithExistingConnectionAndTransactionAndOptions() { var tableName = TestHelpers.DropCreate(nameof(BulkWriterTestsMyTestClass)); var tableNameWithKey = TestHelpers.DropCreate(nameof(BulkWriterTestsMyTestClassWithKey)); using (var connection = new SqlConnection(_connectionString)) { await connection.OpenAsync(); using (var transaction = connection.BeginTransaction()) { var writer = new BulkWriter <BulkWriterTestsMyTestClass>(connection, transaction); var writerWithOptions = new BulkWriter <BulkWriterTestsMyTestClassWithKey>(connection, SqlBulkCopyOptions.KeepIdentity, transaction); var items = Enumerable.Range(11, 20).Select(i => new BulkWriterTestsMyTestClass { Id = i, Name = "Bob" }); var itemsWithKey = Enumerable.Range(11, 20).Select(i => new BulkWriterTestsMyTestClassWithKey { Id = i, Name = "Bob" }); writer.WriteToDatabase(items); writerWithOptions.WriteToDatabase(itemsWithKey); var minId = (int?)await TestHelpers.ExecuteScalar(connection, $"SELECT MIN(Id) FROM {tableName}", transaction); var minIdWithKey = (int?)await TestHelpers.ExecuteScalar(connection, $"SELECT MIN(Id) FROM {tableNameWithKey}", transaction); Assert.Equal(1, minId); Assert.Equal(11, minIdWithKey); transaction.Rollback(); var emptyMinId = await TestHelpers.ExecuteScalar(connection, $"SELECT MIN(Id) FROM {tableName}"); var emptyMinIdWithKey = await TestHelpers.ExecuteScalar(connection, $"SELECT MIN(Id) FROM {tableNameWithKey}"); Assert.Equal(emptyMinId, System.DBNull.Value); Assert.Equal(emptyMinIdWithKey, System.DBNull.Value); } } }
public async Task ThrowsWhenAStepThrows() { var tableName = TestHelpers.DropCreate(nameof(PipelineTestsMyTestClass)); using (var writer = new BulkWriter <PipelineTestsMyTestClass>(_connectionString)) { var items = Enumerable.Range(1, 1000).Select(i => new PipelineTestsMyTestClass { Id = i, Name = "Bob" }); var pipeline = EtlPipeline .StartWith(items) .Project <PipelineTestsMyTestClass>(i => throw new Exception("Projection exception")) .WriteTo(writer); var pipelineTask = pipeline.ExecuteAsync(); var exception = await Assert.ThrowsAsync <Exception>(() => pipelineTask); Assert.Equal("Projection exception", exception.Message); } }
public async Task WritesToBulkWriter() { var tableName = TestHelpers.DropCreate(nameof(PipelineTestsMyTestClass)); using (var writer = new BulkWriter <PipelineTestsMyTestClass>(_connectionString)) { var items = Enumerable.Range(1, 1000).Select(i => new PipelineTestsMyTestClass { Id = i, Name = "Bob" }); var pipeline = EtlPipeline .StartWith(items) .WriteTo(writer); await pipeline.ExecuteAsync(); var count = (int)await TestHelpers.ExecuteScalar(_connectionString, $"SELECT COUNT(1) FROM {tableName}"); Assert.Equal(1000, count); } }
private IDictionary <string, int> WriteProjects(IEnumerable <Dependency> dependencies) { var projectIntCounter = new IntCounter(-1); var projects = dependencies .SelectMany(x => new List <string> { x.ProjectName, x.Id }) .Distinct() .Select(x => new Project { Id = projectIntCounter.Next(), Name = x }) .ToList(); using (var bulkWriter = new BulkWriter <Project>(_connectionString)) { bulkWriter.WriteToDatabase(projects); } return(projects.ToDictionary(x => x.Name, x => x.Id)); }
private static async Task Main(string[] args) { SetupDb(); var timer = new Stopwatch(); using (var bulkWriter = new BulkWriter <MyDomainEntity>(@"Data Source=.\sqlexpress;Database=BulkWriter.Demo;Trusted_Connection=True;Connection Timeout=300") { BulkCopyTimeout = 0, BatchSize = 10000 }) { var items = GetDomainEntities(); timer.Start(); await bulkWriter.WriteToDatabaseAsync(items); timer.Stop(); } Console.WriteLine(timer.ElapsedMilliseconds); Console.ReadKey(); }
public async Task Should_Handle_Column_Nvarchar_With_Length_Max() { string tableName = nameof(MyTestClassForNvarCharMax); TestHelpers.ExecuteNonQuery(_connectionString, $"DROP TABLE IF EXISTS [dbo].[{tableName}]"); TestHelpers.ExecuteNonQuery(_connectionString, "CREATE TABLE [dbo].[" + tableName + "](" + "[Id] [int] IDENTITY(1,1) NOT NULL," + "[Name] [nvarchar](MAX) NULL," + "CONSTRAINT [PK_" + tableName + "] PRIMARY KEY CLUSTERED ([Id] ASC)" + ")"); var writer = new BulkWriter <MyTestClassForNvarCharMax>(_connectionString); var items = new[] { new MyTestClassForNvarCharMax { Id = 1, Name = "Bob" } }; writer.WriteToDatabase(items); var count = (int)await TestHelpers.ExecuteScalar(_connectionString, $"SELECT COUNT(1) FROM {tableName}"); Assert.Equal(1, count); }
private void MapOne(Dictionary <string, object> obj) { ArrayList um = (ArrayList)JsonUtil.GetValue(obj, "entities.user_mentions"); for (int i = 0; i < um.Count; i++) { if (!String.IsNullOrWhiteSpace((string)((Dictionary <string, object>)um[i])["id"])) { // [run_id] [smallint] NOT NULL BulkWriter.WriteSmallInt(RunID); // [tweet_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "id")); // [user_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "user.id")); // [mentioned_user_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(um[i], "id")); BulkWriter.EndLine(); } } }
private void WriteBlocksToDisk(ConcurrentIOPoolManager readPool, ConcurrentIOPoolManager writePool, Algorithm.Collection.ICollectionOnDisk parent, IDictionary <long, Sop.DataBlock> blocks) { // todo: do this Async way, when time permits. :) if (BinaryWriter == null) { if (BufferStream == null) { BufferStream = new MemoryStream(); } BinaryWriter = new BinaryWriter(BufferStream, parent.File.Server.Encoding); } else { BinaryWriter.Seek(0, SeekOrigin.Begin); } const int sizeOfNumerics = Sop.DataBlock.OverheadSize; var writeBuffer = _writeBuffer; int bufferIndex = 0, currentTargetBufferIndex = 0; long runningAddress = -1, startBlockAddress = -1; var bulkWriter = new BulkWriter(); var dataChunks = new List <BulkWriter.DataChunk>(4); #region resize data file before appending to it... if (readPool == null || writePool == null) { long currentAddress = -1; if (((Collections.Generic.ISortedDictionary <long, Sop.DataBlock>)blocks).MoveLast()) { currentAddress = ((Collections.Generic.ISortedDictionary <long, Sop.DataBlock>)blocks).CurrentKey; } if (currentAddress > -1) { // thread safe increase File Size to accomodate data to be appended... var FileSize = currentAddress + (int)parent.DataBlockSize; if (parent.FileStream.Length < FileSize) { if (parent.Transaction != null) { lock (parent.Transaction) { if (parent.FileStream.Length < FileSize) { parent.FileStream.SetLength(FileSize); } } } else { parent.FileStream.SetLength(FileSize); } } } } #endregion Sop.DataBlock[] blocksCopy = new Sop.DataBlock[blocks.Count]; blocks.Values.CopyTo(blocksCopy, 0); foreach (Sop.DataBlock block in blocksCopy) { SetIsDirty(block, false); if (block.DataAddress >= 0) { #region Process special states, e.g. - buffer is full, current block Address is fragmented from previous block's if (startBlockAddress == -1) { startBlockAddress = runningAddress = block.DataAddress; } else { bool bufferIsFull = bufferIndex + sizeOfNumerics + block.Data.Length + currentTargetBufferIndex > writeBuffer.Length - block.Length; if (block.DataAddress != runningAddress || bufferIsFull) { dataChunks.Add(new BulkWriter.DataChunk { TargetDataAddress = startBlockAddress, Index = currentTargetBufferIndex, // Index in the buffer of 1st byte of this segment Size = bufferIndex // size of the segment }); if (bufferIsFull) { //** write to disk if (readPool != null && writePool != null) { bulkWriter.Backup(readPool, writePool, parent, writeBuffer, dataChunks); if (writePool.AsyncThreadException != null) { throw writePool.AsyncThreadException; } else if (readPool.AsyncThreadException != null) { throw readPool.AsyncThreadException; } } else if (writePool != null) { bulkWriter.Write(writePool, parent, writeBuffer, dataChunks); if (writePool.AsyncThreadException != null) { throw writePool.AsyncThreadException; } } else { throw new SopException("WriteBlocksToDisk has a bug!"); } // create new buffer for succeeding chunks... dataChunks = new List <BulkWriter.DataChunk>(4); writeBuffer = new byte[writeBuffer.Length]; currentTargetBufferIndex = 0; } else { currentTargetBufferIndex += bufferIndex; } bufferIndex = 0; runningAddress = startBlockAddress = block.DataAddress; } } #endregion } else { throw new InvalidOperationException("Invalid (-) Block.DataAddress detected."); } //**** write Block Header and Data to disk BinaryWriter.Seek(0, SeekOrigin.Begin); // Byte 0 to 7: Next Item Address (64 bit long int) = 0 (no next item) BinaryWriter.Write(block.NextItemAddress); // Byte 8 to 11: Size Occupied BinaryWriter.Write(block.SizeOccupied); // Byte 12 to 19: Low-level next datablock address BinaryWriter.Write(block.InternalNextBlockAddress); // Byte 20: count of member blocks, max is 65535. ushort memberCount = 0; if (block.IsHead) { int cm = block.CountMembers(true); memberCount = cm > Sop.DataBlock.MaxChainMemberCount ? Sop.DataBlock.MaxChainMemberCount : (ushort)cm; } BinaryWriter.Write(memberCount); byte[] b2 = BufferStream.GetBuffer(); Array.Copy(b2, 0, writeBuffer, currentTargetBufferIndex + bufferIndex, sizeOfNumerics); bufferIndex += sizeOfNumerics; //** Byte 20 to 20 + Data Length: USER DATA int cs = block.Data.Length; //if (currentTargetBufferIndex + cs + bufferIndex > writeBuffer.Length - block.Length) // cs = writeBuffer.Length - (currentTargetBufferIndex + bufferIndex); Array.Copy(block.Data, 0, writeBuffer, currentTargetBufferIndex + bufferIndex, cs); bufferIndex += block.Data.Length; runningAddress += block.Length; } // write the last chunk set to disk... if (startBlockAddress != -1) { //** write to disk dataChunks.Add(new BulkWriter.DataChunk { TargetDataAddress = startBlockAddress, Index = currentTargetBufferIndex, Size = bufferIndex }); } if (dataChunks.Count > 0) { if (readPool != null && writePool != null) { bulkWriter.Backup(readPool, writePool, parent, writeBuffer, dataChunks); } else if (writePool != null) { bulkWriter.Write(writePool, parent, writeBuffer, dataChunks); } else { throw new SopException("WriteBlocksToDisk has a bug!"); } } }
/// <summary> /// Write a group of Blocks into Disk. NOTE: it will be more optimal if Blocks /// are sorted by its Data Address so this function can write contiguous blocks /// in one async write. /// </summary> public int WriteBlocksToDisk(Algorithm.Collection.ICollectionOnDisk parent, IDictionary <long, Sop.DataBlock> blocks, bool clear) { if (!parent.IsOpen) { return(0); } var blockSize = (int)parent.DataBlockSize; int r = blocks.Count; if (BinaryWriter == null) { if (BufferStream == null) { BufferStream = new MemoryStream(); } BinaryWriter = new BinaryWriter(BufferStream, parent.File.Server.Encoding); } else { BinaryWriter.Seek(0, SeekOrigin.Begin); } const int sizeOfNumerics = Sop.DataBlock.OverheadSize; int chunkSize = (int)DataBlockSize.FiveTwentyFourTwoEightyEight * 4; if (chunkSize > blocks.Count * blockSize) { chunkSize = blocks.Count * blockSize; } if (_writeBuffer == null || _writeBuffer.Length < chunkSize) { _writeBuffer = new byte[chunkSize]; } int bufferIndex = 0, startIndex = 0, currentTargetBufferIndex = 0; long runningAddress = -1, startBlockAddress = -1; var bulkWriter = new BulkWriter(); var dataChunks = new List <BulkWriter.DataChunk>(4); foreach (Sop.DataBlock block in blocks.Values) { SetIsDirty(block, false); if (block.DataAddress >= 0) { #region Process special states, e.g. - buffer is full if (startBlockAddress == -1) { startBlockAddress = runningAddress = block.DataAddress; } else { bool bufferIsFull = (bufferIndex - startIndex) + sizeOfNumerics + block.Data.Length + currentTargetBufferIndex > _writeBuffer.Length - block.Length; if (block.DataAddress != runningAddress || bufferIsFull) { dataChunks.Add(new BulkWriter.DataChunk { TargetDataAddress = startBlockAddress == -1 ? block.DataAddress : startBlockAddress, Index = currentTargetBufferIndex + startIndex, Size = bufferIndex - startIndex }); if (bufferIsFull) { //** write to disk bulkWriter.Write(parent, _writeBuffer, dataChunks); //** reset buffer dataChunks.Clear(); currentTargetBufferIndex = 0; } else { currentTargetBufferIndex += (bufferIndex - startIndex); } startIndex = bufferIndex = 0; runningAddress = startBlockAddress = block.DataAddress; } } #endregion } else { throw new InvalidOperationException("Invalid (-) Block.DataAddress detected."); } //**** write Block Header and Data to disk BinaryWriter.Seek(0, SeekOrigin.Begin); // Byte 0 to 7: Next Item Address (64 bit long int) = 0 (no next item) BinaryWriter.Write(block.NextItemAddress); // Byte 8 to 11: Size Occupied BinaryWriter.Write(block.SizeOccupied); // Byte 12 to 19: Low-level next datablock address BinaryWriter.Write(block.InternalNextBlockAddress); // Byte 20: count of member blocks, max is 255. byte memberCount = 0; if (block.IsHead) { int cm = block.CountMembers(true); memberCount = cm > byte.MaxValue ? byte.MaxValue : (byte)cm; } BinaryWriter.Write(memberCount); byte[] b2 = BufferStream.GetBuffer(); Array.Copy(b2, 0, _writeBuffer, currentTargetBufferIndex + bufferIndex, sizeOfNumerics); bufferIndex += sizeOfNumerics; //** Byte 20 to 20 + Data Length: USER DATA int cs = block.Data.Length; if (currentTargetBufferIndex + cs + bufferIndex > _writeBuffer.Length - block.Length) { cs = _writeBuffer.Length - (currentTargetBufferIndex + bufferIndex); } Array.Copy(block.Data, 0, _writeBuffer, currentTargetBufferIndex + bufferIndex, cs); bufferIndex += block.Data.Length; runningAddress += block.Length; } if (startBlockAddress != -1) { //** write to disk dataChunks.Add(new BulkWriter.DataChunk { TargetDataAddress = startBlockAddress, Index = currentTargetBufferIndex + startIndex, Size = bufferIndex - startIndex }); } if (dataChunks.Count > 0) { bulkWriter.Write(parent, _writeBuffer, dataChunks); } return(r); }
private void MapOne(Dictionary <string, object> obj) { var user = (Dictionary <string, object>)obj["user"]; // [run_id] [smallint] NOT NULL BulkWriter.WriteSmallInt(RunID); // [user_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(user, "id")); // [created_at] [datetime] NOT NULL BulkWriter.WriteDateTime(JsonUtil.GetDateTime(user, "created_at")); // [tweeted_at] [datetime] NOT NULL BulkWriter.WriteDateTime(JsonUtil.GetDateTime(obj, "created_at")); // [screen_name] [nvarchar](50) NOT NULL BulkWriter.WriteVarChar(JsonUtil.GetString(user, "screen_name") ?? "", 50); // [description] [nvarchar](160) NOT NULL BulkWriter.WriteVarChar(Util.UnescapeText(JsonUtil.GetString(user, "description")) ?? "", 160); // [favourites_count] [int] NOT NULL BulkWriter.WriteInt(JsonUtil.GetNullableInt32(user, "favourites_count") ?? 0); // [followers_count] [int] NOT NULL BulkWriter.WriteInt(JsonUtil.GetNullableInt32(user, "followers_count") ?? 0); // [friends_count] [int] NOT NULL BulkWriter.WriteInt(JsonUtil.GetNullableInt32(user, "friends_count") ?? 0); // [statuses_count] [int] NOT NULL BulkWriter.WriteInt(JsonUtil.GetNullableInt32(user, "statuses_count") ?? 0); // [geo_enabled] [bit] NOT NULL BulkWriter.WriteBit(JsonUtil.GetNullableBoolean(user, "geo_enabled") ?? false); // [lang] [char](5) NOT NULL, BulkWriter.WriteChar(JsonUtil.GetString(user, "lang") ?? "", 5); // [location] [nvarchar](100) NULL BulkWriter.WriteVarChar(Util.UnescapeText(JsonUtil.GetString(user, "location")), 100); // [name] [nvarchar](30) NOT NULL BulkWriter.WriteVarChar(Util.UnescapeText(JsonUtil.GetString(user, "name")) ?? "", 30); // [profile_background_color] [char](6) NOT NULL BulkWriter.WriteChar(JsonUtil.GetString(user, "profile_background_color") ?? "000000", 6); // [profile_text_color] [char](6) NOT NULL BulkWriter.WriteChar(JsonUtil.GetString(user, "profile_text_color") ?? "000000", 6); // [protected] [bit] NOT NULL BulkWriter.WriteBit(JsonUtil.GetNullableBoolean(user, "protected") ?? false); // [show_all_inline_media] [bit] NOT NULL BulkWriter.WriteBit(JsonUtil.GetNullableBoolean(user, "show_all_inline_media") ?? false); // [utc_offset] [int] NULL BulkWriter.WriteNullableInt(JsonUtil.GetNullableInt32(user, "utc_offset")); // [verified] [bit] NOT NULL BulkWriter.WriteBit(JsonUtil.GetNullableBoolean(user, "verified") ?? false); BulkWriter.EndLine(); }
private void MapOne(Dictionary <string, object> obj) { // [run_id] [smallint] NOT NULL BulkWriter.WriteSmallInt(RunID); // [tweet_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "id")); // [created_at] [datetime] NOT NULL BulkWriter.WriteDateTime(JsonUtil.GetDateTime(obj, "created_at")); // [utc_offset] [int] NULL BulkWriter.WriteNullableInt(JsonUtil.GetNullableInt32(obj, "user.utc_offset")); // [user_id] [bigint] NOT NULL BulkWriter.WriteBigInt(JsonUtil.GetInt64(obj, "user.id")); // [place_id] [char](16) NULL BulkWriter.WriteNullableChar(JsonUtil.GetString(obj, "place.id"), 16); // [lon] [float] NULL // [lat] [float] NULL // [cx] [float] NOT NULL // [cy] [float] NOT NULL // [cz] [float] NOT NULL // [htm_id] [bigint] NOT NULL if (obj.ContainsKey("coordinates") && obj["coordinates"] != null) { string[] coords = (string[])((ArrayList)JsonUtil.GetValue(obj, "coordinates.coordinates")).ToArray(typeof(string)); var lon = double.Parse(coords[0], System.Globalization.CultureInfo.InvariantCulture); var lat = double.Parse(coords[1], System.Globalization.CultureInfo.InvariantCulture); var c = new Cartesian(lon, lat); BulkWriter.WriteNullableFloat(lon); BulkWriter.WriteNullableFloat(lat); BulkWriter.WriteFloat(c.X); BulkWriter.WriteFloat(c.Y); BulkWriter.WriteFloat(c.Z); BulkWriter.WriteBigInt(Trixel.CartesianToHid20(c)); } else { BulkWriter.WriteNullableFloat(null); BulkWriter.WriteNullableFloat(null); BulkWriter.WriteFloat(0); BulkWriter.WriteFloat(0); BulkWriter.WriteFloat(0); BulkWriter.WriteBigInt(0); } // [in_reply_to_tweet_id] [bigint] NULL BulkWriter.WriteNullableBigInt(JsonUtil.GetNullableInt64(obj, "in_reply_to_status_id")); // [in_reply_to_user_id] [bigint] NULL BulkWriter.WriteNullableBigInt(JsonUtil.GetNullableInt64(obj, "in_reply_to_user_id")); // [possibly_sensitive] [bit] NULL // [possibly_sensitive_editable] [bit] NULL if (obj.ContainsKey("possibly_sensitive") && obj.ContainsKey("possibly_sensitive_editable")) { BulkWriter.WriteNullableBit(JsonUtil.GetNullableBoolean(obj, "possibly_sensitive")); BulkWriter.WriteNullableBit(JsonUtil.GetNullableBoolean(obj, "possibly_sensitive_editable")); } else { BulkWriter.WriteNullableBit(null); BulkWriter.WriteNullableBit(null); } // [retweet_count] [int] NOT NULL BulkWriter.WriteInt(JsonUtil.GetNullableInt32(obj, "retweet_count") ?? 0); // [text] [nvarchar](150) NOT NULL var text = Util.UnescapeText(System.Web.HttpUtility.HtmlDecode(JsonUtil.GetString(obj, "text"))); BulkWriter.WriteVarChar(text, 150); // [truncated] [bit] NOT NULL BulkWriter.WriteBit(JsonUtil.GetNullableBoolean(obj, "truncated") ?? false); // [lang] [char](5) NOT NULL BulkWriter.WriteChar(JsonUtil.GetString(obj, "user.lang") ?? "??", 5); // [lang_word_count] [tinyint] NOT NULL // [lang_guess1] [char](2) NOT NULL // [lang_guess2] [char](2) NOT NULL int words; string lang1, lang2; if (LanguageUtil.DetectLanguage(text, out words, out lang1, out lang2)) { BulkWriter.WriteTinyInt((sbyte)words); // [lang_word_count] BulkWriter.WriteChar(lang1, 2); // [lang_guess1] BulkWriter.WriteChar(lang2, 2); // [lang_guess2] } else { BulkWriter.WriteTinyInt(0); BulkWriter.WriteChar("??", 2); BulkWriter.WriteChar("??", 2); } BulkWriter.EndLine(); }