public void NormalBook_FirstRowCorrect_AddFilenameColumnNamed(string versionOfTestFile) { ExcelDataFlowSource source = new ExcelDataFlowSource(); source.AddFilenameColumnNamed = "Path"; source.PreInitialize(new FlatFileToLoad(_fileLocations[versionOfTestFile]), new ThrowImmediatelyDataLoadEventListener()); DataTable dt = source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken()); Assert.AreEqual(7, dt.Columns.Count); Assert.AreEqual("Participant", dt.Columns[0].ColumnName); Assert.AreEqual("Score", dt.Columns[1].ColumnName); Assert.AreEqual("IsEvil", dt.Columns[2].ColumnName); Assert.AreEqual("DateField", dt.Columns[3].ColumnName); Assert.AreEqual("DoubleField", dt.Columns[4].ColumnName); Assert.AreEqual("MixedField", dt.Columns[5].ColumnName); Assert.AreEqual("Path", dt.Columns[6].ColumnName); Assert.AreEqual("Bob", dt.Rows[0][0]); Assert.AreEqual("3", dt.Rows[0][1]); Assert.AreEqual("yes", dt.Rows[0][2]); Assert.AreEqual(_fileLocations[versionOfTestFile].FullName, dt.Rows[0][6]); }
public void TestOddFormats() { var listener = new ToMemoryDataLoadEventListener(true); ExcelDataFlowSource source = new ExcelDataFlowSource(); source.WorkSheetName = "MySheet"; source.PreInitialize(new FlatFileToLoad(_fileLocations[OddFormatsFile]), listener); DataTable dt = source.GetChunk(listener, new GracefulCancellationToken()); Assert.AreEqual(2, dt.Rows.Count); Assert.AreEqual(5, dt.Columns.Count); Assert.AreEqual("Name", dt.Columns[0].ColumnName); Assert.AreEqual("Category", dt.Columns[1].ColumnName); Assert.AreEqual("Age", dt.Columns[2].ColumnName); Assert.AreEqual("Wage", dt.Columns[3].ColumnName); Assert.AreEqual("Invisibre", dt.Columns[4].ColumnName); //this column is hidden in the spreadsheet but we still load it Assert.AreEqual("Frank", dt.Rows[0][0]); Assert.AreEqual("Upper, Left", dt.Rows[0][1]); Assert.AreEqual("30", dt.Rows[0][2]); //its a pound symbol alright! but since there is 2 encodings for pound symbol lets just make everyones life easier StringAssert.IsMatch(@"^\W11.00$", dt.Rows[0][3].ToString()); Assert.AreEqual("0.1", dt.Rows[0][4]); Assert.AreEqual("Castello", dt.Rows[1][0]); Assert.AreEqual("Lower, Back", dt.Rows[1][1]); Assert.AreEqual("31", dt.Rows[1][2]); Assert.AreEqual("50.00%", dt.Rows[1][3]); Assert.AreEqual("0.2", dt.Rows[1][4]); }
public void TestOddFormats() { var listener = new ToMemoryDataLoadEventListener(true); ExcelDataFlowSource source = new ExcelDataFlowSource(); source.WorkSheetName = "MySheet"; source.PreInitialize(new FlatFileToLoad(_fileLocations[OddFormatsFile]), listener); DataTable dt = source.GetChunk(listener, new GracefulCancellationToken()); Assert.AreEqual(2, dt.Rows.Count); Assert.AreEqual(5, dt.Columns.Count); Assert.AreEqual("Name", dt.Columns[0].ColumnName); Assert.AreEqual("Category", dt.Columns[1].ColumnName); Assert.AreEqual("Age", dt.Columns[2].ColumnName); Assert.AreEqual("Wage", dt.Columns[3].ColumnName); Assert.AreEqual("Invisibre", dt.Columns[4].ColumnName); //this column is hidden in the spreadsheet but we still load it Assert.AreEqual("Frank", dt.Rows[0][0]); Assert.AreEqual("Upper, Left", dt.Rows[0][1]); Assert.AreEqual("30", dt.Rows[0][2]); Assert.AreEqual("£11.00", dt.Rows[0][3]); Assert.AreEqual("0.1", dt.Rows[0][4]); Assert.AreEqual("Castello", dt.Rows[1][0]); Assert.AreEqual("Lower, Back", dt.Rows[1][1]); Assert.AreEqual("31", dt.Rows[1][2]); Assert.AreEqual("50.00%", dt.Rows[1][3]); Assert.AreEqual("0.2", dt.Rows[1][4]); }
private void ProcessFile(FileInfo fileInfo, IDataLoadJob job) { using (var fs = new FileStream(fileInfo.FullName, FileMode.Open)) { IWorkbook wb; if (fileInfo.Extension == ".xls") { wb = new HSSFWorkbook(fs); } else { wb = new XSSFWorkbook(fs); } try { var source = new ExcelDataFlowSource(); source.PreInitialize(new FlatFileToLoad(fileInfo), job); for (int i = 0; i < wb.NumberOfSheets; i++) { var sheet = wb.GetSheetAt(i); if (IsWorksheetNameMatch(sheet.SheetName)) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Started processing worksheet:" + sheet.SheetName)); string newName = PrefixWithWorkbookName ? Path.GetFileNameWithoutExtension(fileInfo.FullName) + "_" + sheet.SheetName : sheet.SheetName; //make it sensible newName = new MicrosoftQuerySyntaxHelper().GetSensibleTableNameFromString(newName) + ".csv"; string savePath = Path.Combine(job.LoadDirectory.ForLoading.FullName, newName); var dt = source.GetAllData(sheet, job); dt.SaveAsCsv(savePath); job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Saved worksheet as " + newName)); } else { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Ignoring worksheet:" + sheet.SheetName)); } } } finally { wb.Close(); } } }
public void Checks_ValidFileExtension_Pass() { ExcelDataFlowSource source = new ExcelDataFlowSource(); source.PreInitialize(new FlatFileToLoad(new FileInfo("bob.xlsx")), new ThrowImmediatelyDataLoadEventListener()); source.Check(new ThrowImmediatelyCheckNotifier() { ThrowOnWarning = true }); }
public void NormalBook_NoEmptyRowsRead() { ExcelDataFlowSource source = new ExcelDataFlowSource(); var listener = new ToMemoryDataLoadEventListener(true); source.PreInitialize(new FlatFileToLoad(_fileLocations[TestFile]), listener); DataTable dt = source.GetChunk(listener, new GracefulCancellationToken()); Assert.AreEqual(5, dt.Rows.Count); }
public void Checks_ValidFileExtension_InvalidExtensionPass() { ExcelDataFlowSource source = new ExcelDataFlowSource(); source.PreInitialize(new FlatFileToLoad(new FileInfo("bob.csv")), new ThrowImmediatelyDataLoadEventListener()); var ex = Assert.Throws <Exception>(() => source.Check(new ThrowImmediatelyCheckNotifier() { ThrowOnWarning = true })); Assert.AreEqual("File extension bob.csv has an invalid extension:.csv (this class only accepts:.xlsx,.xls)", ex.Message); }
public void FreakyTestFile_WarningsCorrect() { var messages = new ToMemoryDataLoadEventListener(true); ExcelDataFlowSource source = new ExcelDataFlowSource(); source.PreInitialize(new FlatFileToLoad(_fileLocations[FreakyTestFile]), new ThrowImmediatelyDataLoadEventListener()); DataTable dt = source.GetChunk(messages, new GracefulCancellationToken()); var args = messages.EventsReceivedBySender[source]; Console.Write(messages.ToString()); Assert.IsTrue(args.Any(a => a.Message.Contains("Discarded the following data (that was found in unamed columns):RowCount:5") && a.ProgressEventType == ProgressEventType.Warning)); }
public void BlankWorkbook() { ExcelDataFlowSource source = new ExcelDataFlowSource(); var fi = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, "DataLoad", "Engine", "Resources", "BlankBook.xlsx")); Assert.IsTrue(fi.Exists); source.PreInitialize(new FlatFileToLoad(fi), new ThrowImmediatelyDataLoadEventListener()); var ex = Assert.Throws <FlatFileLoadException>(() => source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken())); Assert.AreEqual("The Excel sheet 'Sheet1' in workbook 'BlankBook.xlsx' is empty", ex.Message); }
protected override void OpenFile(FileInfo fileToLoad, IDataLoadEventListener listener) { _haveServedData = false; _fileToLoad = fileToLoad; _hostedSource = new ExcelDataFlowSource(); _hostedSource.WorkSheetName = WorkSheetName; _hostedSource.AddFilenameColumnNamed = AddFilenameColumnNamed; _hostedSource.PreInitialize(new FlatFileToLoad(fileToLoad), listener); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to start processing " + fileToLoad.FullName)); _dataTable = _hostedSource.GetChunk(listener, new GracefulCancellationToken()); if (!string.IsNullOrEmpty(ForceReplacementHeaders)) { //split headers by , (and trim leading/trailing whitespace). string[] replacementHeadersSplit = ForceReplacementHeaders.Split(',').Select(h => string.IsNullOrWhiteSpace(h)?h:h.Trim()).ToArray(); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Force headers will make the following header changes:" + GenerateASCIIArtOfSubstitutions(replacementHeadersSplit, _dataTable.Columns))); if (replacementHeadersSplit.Length != _dataTable.Columns.Count) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "ForceReplacementHeaders was set but it had " + replacementHeadersSplit.Length + " column header names while the file had " + _dataTable.Columns.Count + " (there must be the same number of replacement headers as headers in the excel file)")); } else { for (int i = 0; i < replacementHeadersSplit.Length; i++) { _dataTable.Columns[i].ColumnName = replacementHeadersSplit[i];//rename the columns to match the forced replacments } } } //all data should now be exhausted if (_hostedSource.GetChunk(listener, new GracefulCancellationToken()) != null) { throw new Exception("Hosted source served more than 1 chunk, expected all the data to be read from the Excel file in one go"); } }
public void BlankFirstLineFile() { ExcelDataFlowSource source = new ExcelDataFlowSource(); var fi = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, @"DataLoad\Engine\Resources\BlankLineBook.xlsx")); Assert.IsTrue(fi.Exists); source.PreInitialize(new FlatFileToLoad(fi), new ThrowImmediatelyDataLoadEventListener()); DataTable dt = source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken()); Assert.AreEqual(3, dt.Rows.Count); Assert.AreEqual(2, dt.Columns.Count); Assert.AreEqual("Name", dt.Columns[0].ColumnName); Assert.AreEqual("Age", dt.Columns[1].ColumnName); }
public void ExcelDateTimeDeciphering(string versionOfTestFile) { /* * 01/01/2001 0.1 01/01/2001 * 01/01/2001 10:30 0.51 01/01/2001 10:30 * 01/01/2002 11:30 0.22 0.1 * 01/01/2003 01:30 0.10 0.51 */ var listener = new ToMemoryDataLoadEventListener(true); ExcelDataFlowSource source = new ExcelDataFlowSource(); source.PreInitialize(new FlatFileToLoad(_fileLocations[versionOfTestFile]), listener); DataTable dt = source.GetChunk(listener, new GracefulCancellationToken()); Assert.AreEqual(5, dt.Rows.Count); Assert.AreEqual("2001-01-01", dt.Rows[0][3]); Assert.AreEqual("0.1", dt.Rows[0][4]); Assert.AreEqual("10:30:00", dt.Rows[0][5]); Assert.AreEqual("2001-01-01 10:30:00", dt.Rows[1][3]); Assert.AreEqual("0.51", dt.Rows[1][4]); Assert.AreEqual("11:30:00", dt.Rows[1][5]); Assert.AreEqual("2002-01-01 11:30:00", dt.Rows[2][3]); Assert.AreEqual("0.22", dt.Rows[2][4]); Assert.AreEqual("0.1", dt.Rows[2][5]); Assert.AreEqual("2003-01-01 01:30:00", dt.Rows[3][3]); Assert.AreEqual("0.10", dt.Rows[3][4]); Assert.AreEqual("0.51", dt.Rows[3][5]); Assert.AreEqual("2015-09-18", dt.Rows[4][3]); Assert.AreEqual("15:09:00", dt.Rows[4][4]); Assert.AreEqual("00:03:56", dt.Rows[4][5]); }