Example #1
0
        public void NormalBook_FirstRowCorrect_AddFilenameColumnNamed(string versionOfTestFile)
        {
            ExcelDataFlowSource source = new ExcelDataFlowSource();

            source.AddFilenameColumnNamed = "Path";

            source.PreInitialize(new FlatFileToLoad(_fileLocations[versionOfTestFile]), new ThrowImmediatelyDataLoadEventListener());
            DataTable dt = source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken());

            Assert.AreEqual(7, dt.Columns.Count);
            Assert.AreEqual("Participant", dt.Columns[0].ColumnName);
            Assert.AreEqual("Score", dt.Columns[1].ColumnName);
            Assert.AreEqual("IsEvil", dt.Columns[2].ColumnName);

            Assert.AreEqual("DateField", dt.Columns[3].ColumnName);
            Assert.AreEqual("DoubleField", dt.Columns[4].ColumnName);
            Assert.AreEqual("MixedField", dt.Columns[5].ColumnName);
            Assert.AreEqual("Path", dt.Columns[6].ColumnName);

            Assert.AreEqual("Bob", dt.Rows[0][0]);
            Assert.AreEqual("3", dt.Rows[0][1]);
            Assert.AreEqual("yes", dt.Rows[0][2]);

            Assert.AreEqual(_fileLocations[versionOfTestFile].FullName, dt.Rows[0][6]);
        }
Example #2
0
        public void TestOddFormats()
        {
            var listener = new ToMemoryDataLoadEventListener(true);

            ExcelDataFlowSource source = new ExcelDataFlowSource();

            source.WorkSheetName = "MySheet";

            source.PreInitialize(new FlatFileToLoad(_fileLocations[OddFormatsFile]), listener);
            DataTable dt = source.GetChunk(listener, new GracefulCancellationToken());

            Assert.AreEqual(2, dt.Rows.Count);
            Assert.AreEqual(5, dt.Columns.Count);

            Assert.AreEqual("Name", dt.Columns[0].ColumnName);
            Assert.AreEqual("Category", dt.Columns[1].ColumnName);
            Assert.AreEqual("Age", dt.Columns[2].ColumnName);
            Assert.AreEqual("Wage", dt.Columns[3].ColumnName);
            Assert.AreEqual("Invisibre", dt.Columns[4].ColumnName); //this column is hidden in the spreadsheet but we still load it

            Assert.AreEqual("Frank", dt.Rows[0][0]);
            Assert.AreEqual("Upper, Left", dt.Rows[0][1]);
            Assert.AreEqual("30", dt.Rows[0][2]);

            //its a pound symbol alright! but since there is 2 encodings for pound symbol lets just make everyones life easier
            StringAssert.IsMatch(@"^\W11.00$", dt.Rows[0][3].ToString());

            Assert.AreEqual("0.1", dt.Rows[0][4]);

            Assert.AreEqual("Castello", dt.Rows[1][0]);
            Assert.AreEqual("Lower, Back", dt.Rows[1][1]);
            Assert.AreEqual("31", dt.Rows[1][2]);
            Assert.AreEqual("50.00%", dt.Rows[1][3]);
            Assert.AreEqual("0.2", dt.Rows[1][4]);
        }
Example #3
0
        public void TestOddFormats()
        {
            var listener = new ToMemoryDataLoadEventListener(true);

            ExcelDataFlowSource source = new ExcelDataFlowSource();

            source.WorkSheetName = "MySheet";

            source.PreInitialize(new FlatFileToLoad(_fileLocations[OddFormatsFile]), listener);
            DataTable dt = source.GetChunk(listener, new GracefulCancellationToken());

            Assert.AreEqual(2, dt.Rows.Count);
            Assert.AreEqual(5, dt.Columns.Count);

            Assert.AreEqual("Name", dt.Columns[0].ColumnName);
            Assert.AreEqual("Category", dt.Columns[1].ColumnName);
            Assert.AreEqual("Age", dt.Columns[2].ColumnName);
            Assert.AreEqual("Wage", dt.Columns[3].ColumnName);
            Assert.AreEqual("Invisibre", dt.Columns[4].ColumnName); //this column is hidden in the spreadsheet but we still load it

            Assert.AreEqual("Frank", dt.Rows[0][0]);
            Assert.AreEqual("Upper, Left", dt.Rows[0][1]);
            Assert.AreEqual("30", dt.Rows[0][2]);
            Assert.AreEqual("£11.00", dt.Rows[0][3]);
            Assert.AreEqual("0.1", dt.Rows[0][4]);

            Assert.AreEqual("Castello", dt.Rows[1][0]);
            Assert.AreEqual("Lower, Back", dt.Rows[1][1]);
            Assert.AreEqual("31", dt.Rows[1][2]);
            Assert.AreEqual("50.00%", dt.Rows[1][3]);
            Assert.AreEqual("0.2", dt.Rows[1][4]);
        }
Example #4
0
        private void ProcessFile(FileInfo fileInfo, IDataLoadJob job)
        {
            using (var fs = new FileStream(fileInfo.FullName, FileMode.Open))
            {
                IWorkbook wb;
                if (fileInfo.Extension == ".xls")
                {
                    wb = new HSSFWorkbook(fs);
                }
                else
                {
                    wb = new XSSFWorkbook(fs);
                }

                try
                {
                    var source = new ExcelDataFlowSource();
                    source.PreInitialize(new FlatFileToLoad(fileInfo), job);

                    for (int i = 0; i < wb.NumberOfSheets; i++)
                    {
                        var sheet = wb.GetSheetAt(i);

                        if (IsWorksheetNameMatch(sheet.SheetName))
                        {
                            job.OnNotify(this,
                                         new NotifyEventArgs(ProgressEventType.Information,
                                                             "Started processing worksheet:" + sheet.SheetName));

                            string newName = PrefixWithWorkbookName
                                ? Path.GetFileNameWithoutExtension(fileInfo.FullName) + "_" + sheet.SheetName
                                : sheet.SheetName;

                            //make it sensible
                            newName = new MicrosoftQuerySyntaxHelper().GetSensibleTableNameFromString(newName) + ".csv";

                            string savePath = Path.Combine(job.LoadDirectory.ForLoading.FullName, newName);
                            var    dt       = source.GetAllData(sheet, job);
                            dt.SaveAsCsv(savePath);

                            job.OnNotify(this,
                                         new NotifyEventArgs(ProgressEventType.Information, "Saved worksheet as " + newName));
                        }
                        else
                        {
                            job.OnNotify(this,
                                         new NotifyEventArgs(ProgressEventType.Information, "Ignoring worksheet:" + sheet.SheetName));
                        }
                    }
                }
                finally
                {
                    wb.Close();
                }
            }
        }
Example #5
0
        public void Checks_ValidFileExtension_Pass()
        {
            ExcelDataFlowSource source = new ExcelDataFlowSource();

            source.PreInitialize(new FlatFileToLoad(new FileInfo("bob.xlsx")), new ThrowImmediatelyDataLoadEventListener());
            source.Check(new ThrowImmediatelyCheckNotifier()
            {
                ThrowOnWarning = true
            });
        }
Example #6
0
        public void NormalBook_NoEmptyRowsRead()
        {
            ExcelDataFlowSource source = new ExcelDataFlowSource();

            var listener = new ToMemoryDataLoadEventListener(true);

            source.PreInitialize(new FlatFileToLoad(_fileLocations[TestFile]), listener);
            DataTable dt = source.GetChunk(listener, new GracefulCancellationToken());

            Assert.AreEqual(5, dt.Rows.Count);
        }
Example #7
0
        public void Checks_ValidFileExtension_InvalidExtensionPass()
        {
            ExcelDataFlowSource source = new ExcelDataFlowSource();

            source.PreInitialize(new FlatFileToLoad(new FileInfo("bob.csv")), new ThrowImmediatelyDataLoadEventListener());
            var ex = Assert.Throws <Exception>(() => source.Check(new ThrowImmediatelyCheckNotifier()
            {
                ThrowOnWarning = true
            }));

            Assert.AreEqual("File extension bob.csv has an invalid extension:.csv (this class only accepts:.xlsx,.xls)", ex.Message);
        }
Example #8
0
        public void FreakyTestFile_WarningsCorrect()
        {
            var messages = new ToMemoryDataLoadEventListener(true);

            ExcelDataFlowSource source = new ExcelDataFlowSource();

            source.PreInitialize(new FlatFileToLoad(_fileLocations[FreakyTestFile]), new ThrowImmediatelyDataLoadEventListener());
            DataTable dt = source.GetChunk(messages, new GracefulCancellationToken());

            var args = messages.EventsReceivedBySender[source];

            Console.Write(messages.ToString());

            Assert.IsTrue(args.Any(a => a.Message.Contains("Discarded the following data (that was found in unamed columns):RowCount:5") && a.ProgressEventType == ProgressEventType.Warning));
        }
Example #9
0
        public void BlankWorkbook()
        {
            ExcelDataFlowSource source = new ExcelDataFlowSource();


            var fi = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, "DataLoad", "Engine", "Resources", "BlankBook.xlsx"));

            Assert.IsTrue(fi.Exists);

            source.PreInitialize(new FlatFileToLoad(fi), new ThrowImmediatelyDataLoadEventListener());


            var ex = Assert.Throws <FlatFileLoadException>(() => source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken()));

            Assert.AreEqual("The Excel sheet 'Sheet1' in workbook 'BlankBook.xlsx' is empty", ex.Message);
        }
Example #10
0
        protected override void OpenFile(FileInfo fileToLoad, IDataLoadEventListener listener)
        {
            _haveServedData                      = false;
            _fileToLoad                          = fileToLoad;
            _hostedSource                        = new ExcelDataFlowSource();
            _hostedSource.WorkSheetName          = WorkSheetName;
            _hostedSource.AddFilenameColumnNamed = AddFilenameColumnNamed;

            _hostedSource.PreInitialize(new FlatFileToLoad(fileToLoad), listener);
            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to start processing " + fileToLoad.FullName));

            _dataTable = _hostedSource.GetChunk(listener, new GracefulCancellationToken());

            if (!string.IsNullOrEmpty(ForceReplacementHeaders))
            {
                //split headers by , (and trim leading/trailing whitespace).
                string[] replacementHeadersSplit = ForceReplacementHeaders.Split(',').Select(h => string.IsNullOrWhiteSpace(h)?h:h.Trim()).ToArray();

                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Force headers will make the following header changes:" + GenerateASCIIArtOfSubstitutions(replacementHeadersSplit, _dataTable.Columns)));

                if (replacementHeadersSplit.Length != _dataTable.Columns.Count)
                {
                    listener.OnNotify(this,
                                      new NotifyEventArgs(ProgressEventType.Error,
                                                          "ForceReplacementHeaders was set but it had " + replacementHeadersSplit.Length +
                                                          " column header names while the file had " + _dataTable.Columns.Count +
                                                          " (there must be the same number of replacement headers as headers in the excel file)"));
                }
                else
                {
                    for (int i = 0; i < replacementHeadersSplit.Length; i++)
                    {
                        _dataTable.Columns[i].ColumnName = replacementHeadersSplit[i];//rename the columns to match the forced replacments
                    }
                }
            }

            //all data should now be exhausted
            if (_hostedSource.GetChunk(listener, new GracefulCancellationToken()) != null)
            {
                throw new Exception("Hosted source served more than 1 chunk, expected all the data to be read from the Excel file in one go");
            }
        }
Example #11
0
        public void BlankFirstLineFile()
        {
            ExcelDataFlowSource source = new ExcelDataFlowSource();

            var fi = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"DataLoad\Engine\Resources\BlankLineBook.xlsx"));

            Assert.IsTrue(fi.Exists);

            source.PreInitialize(new FlatFileToLoad(fi), new ThrowImmediatelyDataLoadEventListener());


            DataTable dt = source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken());


            Assert.AreEqual(3, dt.Rows.Count);
            Assert.AreEqual(2, dt.Columns.Count);
            Assert.AreEqual("Name", dt.Columns[0].ColumnName);
            Assert.AreEqual("Age", dt.Columns[1].ColumnName);
        }
Example #12
0
        public void ExcelDateTimeDeciphering(string versionOfTestFile)
        {
            /*
             * 01/01/2001	0.1	01/01/2001
             * 01/01/2001 10:30	0.51	01/01/2001 10:30
             * 01/01/2002 11:30	0.22	0.1
             * 01/01/2003 01:30	0.10	0.51
             */
            var listener = new ToMemoryDataLoadEventListener(true);

            ExcelDataFlowSource source = new ExcelDataFlowSource();

            source.PreInitialize(new FlatFileToLoad(_fileLocations[versionOfTestFile]), listener);
            DataTable dt = source.GetChunk(listener, new GracefulCancellationToken());

            Assert.AreEqual(5, dt.Rows.Count);

            Assert.AreEqual("2001-01-01", dt.Rows[0][3]);
            Assert.AreEqual("0.1", dt.Rows[0][4]);
            Assert.AreEqual("10:30:00", dt.Rows[0][5]);

            Assert.AreEqual("2001-01-01 10:30:00", dt.Rows[1][3]);
            Assert.AreEqual("0.51", dt.Rows[1][4]);
            Assert.AreEqual("11:30:00", dt.Rows[1][5]);

            Assert.AreEqual("2002-01-01 11:30:00", dt.Rows[2][3]);
            Assert.AreEqual("0.22", dt.Rows[2][4]);
            Assert.AreEqual("0.1", dt.Rows[2][5]);

            Assert.AreEqual("2003-01-01 01:30:00", dt.Rows[3][3]);
            Assert.AreEqual("0.10", dt.Rows[3][4]);
            Assert.AreEqual("0.51", dt.Rows[3][5]);

            Assert.AreEqual("2015-09-18", dt.Rows[4][3]);
            Assert.AreEqual("15:09:00", dt.Rows[4][4]);
            Assert.AreEqual("00:03:56", dt.Rows[4][5]);
        }