Exemplo n.º 1
0
        public void CompareFlowWithBulkInsert(int numberOfRows, double deviation)
        {
            //Arrange
            BigDataCsvSource.CreateCSVFileIfNeeded(numberOfRows);

            var sourceNonGeneric = new CSVSource(BigDataCsvSource.GetCompleteFilePath(numberOfRows));
            var destNonGeneric   = new MemoryDestination();
            var sourceGeneric    = new CSVSource <CSVData>(BigDataCsvSource.GetCompleteFilePath(numberOfRows));
            var destGeneric      = new MemoryDestination <CSVData>();
            var sourceDynamic    = new CSVSource <ExpandoObject>(BigDataCsvSource.GetCompleteFilePath(numberOfRows));
            var destDynamic      = new MemoryDestination <ExpandoObject>();


            //Act
            var teNonGeneric = GetETLBoxTime(numberOfRows, sourceNonGeneric, destNonGeneric);
            var teGeneric    = GetETLBoxTime(numberOfRows, sourceGeneric, destGeneric);
            var teDynamic    = GetETLBoxTime(numberOfRows, sourceDynamic, destDynamic);

            //Assert
            Assert.Equal(numberOfRows, destNonGeneric.Data.Count);
            Assert.Equal(numberOfRows, destGeneric.Data.Count);
            Assert.Equal(numberOfRows, destDynamic.Data.Count);
            Assert.True(new [] { teGeneric.TotalMilliseconds, teNonGeneric.TotalMilliseconds, teDynamic.TotalMilliseconds }.Max() <
                        new [] { teGeneric.TotalMilliseconds, teNonGeneric.TotalMilliseconds, teDynamic.TotalMilliseconds }.Max() *(deviation + 1));
        }
Exemplo n.º 2
0
        public void CompareFlowWithBulkInsert(IConnectionManager connection, int numberOfRows, int batchSize, double deviationGeneric, double deviationBulk)
        {
            //Arrange
            BigDataCsvSource.CreateCSVFileIfNeeded(numberOfRows);
            ReCreateDestinationTable(connection, "CsvDestinationNonGenericETLBox");
            ReCreateDestinationTable(connection, "CsvDestinationBulkInsert");
            ReCreateDestinationTable(connection, "CsvDestinationGenericETLBox");

            var sourceNonGeneric = new CsvSource(BigDataCsvSource.GetCompleteFilePath(numberOfRows));
            var destNonGeneric   = new DbDestination(connection, "CsvDestinationNonGenericETLBox", batchSize);
            var sourceGeneric    = new CsvSource <CSVData>(BigDataCsvSource.GetCompleteFilePath(numberOfRows));
            var destGeneric      = new DbDestination <CSVData>(connection, "CsvDestinationGenericETLBox", batchSize);

            //Act
            var timeElapsedBulkInsert       = GetBulkInsertTime(connection, numberOfRows);
            var timeElapsedETLBoxNonGeneric = GetETLBoxTime(numberOfRows, sourceNonGeneric, destNonGeneric);
            var timeElapsedETLBoxGeneric    = GetETLBoxTime(numberOfRows, sourceGeneric, destGeneric);


            //Assert
            Assert.Equal(numberOfRows, RowCountTask.Count(connection, "CsvDestinationNonGenericETLBox"));
            Assert.Equal(numberOfRows, RowCountTask.Count(connection, "CsvDestinationGenericETLBox"));
            Assert.True(Math.Abs(timeElapsedETLBoxGeneric.TotalMilliseconds - timeElapsedETLBoxNonGeneric.TotalMilliseconds) <
                        Math.Min(timeElapsedETLBoxGeneric.TotalMilliseconds, timeElapsedETLBoxNonGeneric.TotalMilliseconds) * deviationGeneric);
            if (timeElapsedBulkInsert.TotalMilliseconds > 0)
            {
                Assert.True(timeElapsedBulkInsert < timeElapsedETLBoxNonGeneric);
                Assert.True(timeElapsedBulkInsert.TotalMilliseconds * (deviationBulk + 1) > timeElapsedETLBoxNonGeneric.TotalMilliseconds);
            }
        }
Exemplo n.º 3
0
        public void CheckMemoryUsage(IConnectionManager connection, int numberOfRows, int batchSize, double deviation)
        {
            //Arrange
            BigDataCsvSource.CreateCSVFileIfNeeded(numberOfRows);
            ReCreateDestinationTable(connection, "CsvDestinationWithTransformation");

            var sourceExpando = new CsvSource(BigDataCsvSource.GetCompleteFilePath(numberOfRows));
            var trans         = new RowTransformation <ExpandoObject, CSVData>(
                row =>
            {
                dynamic r = row as ExpandoObject;
                return(new CSVData()
                {
                    Col1 = r.Col1,
                    Col2 = r.Col2,
                    Col3 = r.Col3,
                    Col4 = r.Col4
                });
            });
            var destGeneric = new DbDestination <CSVData>(connection, "CsvDestinationWithTransformation", batchSize);

            sourceExpando.LinkTo(trans);
            trans.LinkTo(destGeneric);

            //Act
            long memAfter   = 0;
            long memBefore  = 0;
            bool startCheck = true;
            int  count      = 1;

            destGeneric.AfterBatchWrite = data =>
            {
                if (count++ % 50 == 0)
                {
                    using (Process proc = Process.GetCurrentProcess())
                    {
                        memAfter = proc.WorkingSet64;
                        if (startCheck)
                        {
                            memBefore  = memAfter;
                            startCheck = false;
                        }
                        Assert.True(memAfter < (memBefore + (memBefore * deviation)));
                    }
                }
            };

            var timeElapsedETLBox = BigDataHelper.LogExecutionTime($"Copying Csv into DB (non generic) with {numberOfRows} rows of data using ETLBox",
                                                                   () =>
            {
                sourceExpando.Execute();
                destGeneric.Wait();
            }
                                                                   );

            output.WriteLine("Elapsed " + timeElapsedETLBox.TotalSeconds + " seconds for ETLBox (Expando to object transformation).");

            //Assert
            Assert.Equal(numberOfRows, RowCountTask.Count(connection, "CsvDestinationWithTransformation"));
            //10.000.000 rows, batch size 10.000: ~8 min
            //10.000.000 rows, batch size  1.000: ~10 min 10 sec
        }