public void CompareFlowWithBulkInsert(int numberOfRows, double deviation) { //Arrange BigDataCsvSource.CreateCSVFileIfNeeded(numberOfRows); var sourceNonGeneric = new CSVSource(BigDataCsvSource.GetCompleteFilePath(numberOfRows)); var destNonGeneric = new MemoryDestination(); var sourceGeneric = new CSVSource <CSVData>(BigDataCsvSource.GetCompleteFilePath(numberOfRows)); var destGeneric = new MemoryDestination <CSVData>(); var sourceDynamic = new CSVSource <ExpandoObject>(BigDataCsvSource.GetCompleteFilePath(numberOfRows)); var destDynamic = new MemoryDestination <ExpandoObject>(); //Act var teNonGeneric = GetETLBoxTime(numberOfRows, sourceNonGeneric, destNonGeneric); var teGeneric = GetETLBoxTime(numberOfRows, sourceGeneric, destGeneric); var teDynamic = GetETLBoxTime(numberOfRows, sourceDynamic, destDynamic); //Assert Assert.Equal(numberOfRows, destNonGeneric.Data.Count); Assert.Equal(numberOfRows, destGeneric.Data.Count); Assert.Equal(numberOfRows, destDynamic.Data.Count); Assert.True(new [] { teGeneric.TotalMilliseconds, teNonGeneric.TotalMilliseconds, teDynamic.TotalMilliseconds }.Max() < new [] { teGeneric.TotalMilliseconds, teNonGeneric.TotalMilliseconds, teDynamic.TotalMilliseconds }.Max() *(deviation + 1)); }
public void CompareFlowWithBulkInsert(IConnectionManager connection, int numberOfRows, int batchSize, double deviationGeneric, double deviationBulk) { //Arrange BigDataCsvSource.CreateCSVFileIfNeeded(numberOfRows); ReCreateDestinationTable(connection, "CsvDestinationNonGenericETLBox"); ReCreateDestinationTable(connection, "CsvDestinationBulkInsert"); ReCreateDestinationTable(connection, "CsvDestinationGenericETLBox"); var sourceNonGeneric = new CsvSource(BigDataCsvSource.GetCompleteFilePath(numberOfRows)); var destNonGeneric = new DbDestination(connection, "CsvDestinationNonGenericETLBox", batchSize); var sourceGeneric = new CsvSource <CSVData>(BigDataCsvSource.GetCompleteFilePath(numberOfRows)); var destGeneric = new DbDestination <CSVData>(connection, "CsvDestinationGenericETLBox", batchSize); //Act var timeElapsedBulkInsert = GetBulkInsertTime(connection, numberOfRows); var timeElapsedETLBoxNonGeneric = GetETLBoxTime(numberOfRows, sourceNonGeneric, destNonGeneric); var timeElapsedETLBoxGeneric = GetETLBoxTime(numberOfRows, sourceGeneric, destGeneric); //Assert Assert.Equal(numberOfRows, RowCountTask.Count(connection, "CsvDestinationNonGenericETLBox")); Assert.Equal(numberOfRows, RowCountTask.Count(connection, "CsvDestinationGenericETLBox")); Assert.True(Math.Abs(timeElapsedETLBoxGeneric.TotalMilliseconds - timeElapsedETLBoxNonGeneric.TotalMilliseconds) < Math.Min(timeElapsedETLBoxGeneric.TotalMilliseconds, timeElapsedETLBoxNonGeneric.TotalMilliseconds) * deviationGeneric); if (timeElapsedBulkInsert.TotalMilliseconds > 0) { Assert.True(timeElapsedBulkInsert < timeElapsedETLBoxNonGeneric); Assert.True(timeElapsedBulkInsert.TotalMilliseconds * (deviationBulk + 1) > timeElapsedETLBoxNonGeneric.TotalMilliseconds); } }
private TimeSpan GetBulkInsertTime(IConnectionManager connection, int numberOfRows) { TimeSpan result = TimeSpan.FromMilliseconds(0); if (connection.GetType() == typeof(SqlConnectionManager)) { result = BigDataHelper.LogExecutionTime($"Copying Csv into DB (non generic) with rows of data using BulkInsert", () => { SqlTask.ExecuteNonQuery(connection, "Insert with BulkInsert", $@"BULK INSERT [dbo].[CsvDestinationBulkInsert] FROM '{BigDataCsvSource.GetCompleteFilePath(numberOfRows)}' WITH ( FIRSTROW = 2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n' ); "); }); Assert.Equal(numberOfRows, RowCountTask.Count(connection, "CsvDestinationBulkInsert")); output.WriteLine("Elapsed " + result.TotalSeconds + " seconds for bulk insert."); } return(result); }
public void CheckMemoryUsage(IConnectionManager connection, int numberOfRows, int batchSize, double deviation) { //Arrange BigDataCsvSource.CreateCSVFileIfNeeded(numberOfRows); ReCreateDestinationTable(connection, "CsvDestinationWithTransformation"); var sourceExpando = new CsvSource(BigDataCsvSource.GetCompleteFilePath(numberOfRows)); var trans = new RowTransformation <ExpandoObject, CSVData>( row => { dynamic r = row as ExpandoObject; return(new CSVData() { Col1 = r.Col1, Col2 = r.Col2, Col3 = r.Col3, Col4 = r.Col4 }); }); var destGeneric = new DbDestination <CSVData>(connection, "CsvDestinationWithTransformation", batchSize); sourceExpando.LinkTo(trans); trans.LinkTo(destGeneric); //Act long memAfter = 0; long memBefore = 0; bool startCheck = true; int count = 1; destGeneric.AfterBatchWrite = data => { if (count++ % 50 == 0) { using (Process proc = Process.GetCurrentProcess()) { memAfter = proc.WorkingSet64; if (startCheck) { memBefore = memAfter; startCheck = false; } Assert.True(memAfter < (memBefore + (memBefore * deviation))); } } }; var timeElapsedETLBox = BigDataHelper.LogExecutionTime($"Copying Csv into DB (non generic) with {numberOfRows} rows of data using ETLBox", () => { sourceExpando.Execute(); destGeneric.Wait(); } ); output.WriteLine("Elapsed " + timeElapsedETLBox.TotalSeconds + " seconds for ETLBox (Expando to object transformation)."); //Assert Assert.Equal(numberOfRows, RowCountTask.Count(connection, "CsvDestinationWithTransformation")); //10.000.000 rows, batch size 10.000: ~8 min //10.000.000 rows, batch size 1.000: ~10 min 10 sec }