public void TestCSVWriter() { string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/argentina_diputados_voting_record.csv"); Table table = this.GetTable(); /* * StringBuilder sb = new StringBuilder(); * (new CSVWriter()).write(sb, table); * String s = sb.ToString(); * String[] lines = s.Split("\\r?\\n"); * assertEquals(EXPECTED_CSV_WRITER_OUTPUT, lines[0]); * assertEquals(expectedCsv, s); */ using (var stream = new MemoryStream()) using (var sb = new StreamWriter(stream) { AutoFlush = true }) { (new CSVWriter()).Write(sb, table); var reader = new StreamReader(stream); stream.Position = 0; var s = reader.ReadToEnd().Trim(); // trim to remove last new line string[] lines = s.Split("\r\n"); // "\\r?\\n" Assert.Equal(EXPECTED_CSV_WRITER_OUTPUT, lines[0]); Assert.Equal(expectedCsv, s.Replace("\r\n", "\n")); } }
public void TestCSVMultilineRow() { string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/frx_2012_disclosure.csv"); PageArea page = UtilsForTesting.GetAreaFromFirstPage("Resources/frx_2012_disclosure.pdf", new PdfRectangle(double.NaN, double.NaN, double.NaN, double.NaN)); // 53.0f, 49.0f, 735.0f, 550.0f); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); Table table = sea.Extract(page)[0]; StringBuilder sb = new StringBuilder(); (new CSVWriter()).Write(sb, table); string s = sb.ToString(); Assert.Equal(expectedCsv, s); /* * using (var stream = new MemoryStream()) * using (var sb = new StreamWriter(stream) { AutoFlush = true }) * { * (new CSVWriter()).write(sb, table); * var reader = new StreamReader(stream); * stream.Position = 0; * var s = reader.ReadToEnd().Trim(); // trim to remove last new line * Assert.Equal(expectedCsv, s); * } */ }
public void TestCSVSerializeTwoTables() { string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/twotables.csv"); List <Table> tables = this.GetTables(); /* * StringBuilder sb = new StringBuilder(); * (new CSVWriter()).write(sb, tables); * String s = sb.toString(); * assertEquals(expectedCsv, s); */ using (var stream = new MemoryStream()) using (var sb = new StreamWriter(stream) { AutoFlush = true }) { (new CSVWriter()).Write(sb, tables); var reader = new StreamReader(stream); stream.Position = 0; var s = reader.ReadToEnd().Trim(); // trim to remove last new line Assert.Equal(expectedCsv, s); } }
public void TestCSVSerializeInfinity() { string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/schools.csv"); // top, left, bottom, right // page height = 612 // 53.74f, 16.97f, 548.74f, 762.3f) PageArea page = UtilsForTesting.GetAreaFromFirstPage("Resources/schools.pdf", new PdfRectangle(16.97, 612 - 548.74, 762.3, 612 - 53.74 - 1)); // remove 1 because add an empty line at the top if not SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); Table table = sea.Extract(page)[0]; StringBuilder sb = new StringBuilder(); (new CSVWriter()).Write(sb, table); string s = sb.ToString(); Assert.Equal(expectedCsv.Trim(), s.Replace("\r\n", "\n")); /* * using (var stream = new MemoryStream()) * using (var sb = new StreamWriter(stream) { AutoFlush = true }) * { * (new CSVWriter()).write(sb, table); * var reader = new StreamReader(stream); * stream.Position = 0; * var s = reader.ReadToEnd().Trim(); // trim to remove last new line * Assert.Equal(expectedCsv, s); * } */ }
public void TestSpanningCellsToCsv() { PageArea page = UtilsForTesting.GetPage("Resources/spanning_cells.pdf", 1); string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/spanning_cells.csv"); SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); List <Table> tables = se.Extract(page); Assert.Equal(2, tables.Count); StringBuilder sb = new StringBuilder(); (new CSVWriter()).Write(sb, tables); Assert.Equal(expectedCsv, sb.ToString().Replace("\r\n", "\n").Trim()); }
public void StreamNoGuess1() { PageArea page = UtilsForTesting.GetPage("Resources/data.pdf", 1); // data_stream_noguess.csv was modified for decimal precision string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/data_stream_noguess.csv"); BasicExtractionAlgorithm se = new BasicExtractionAlgorithm(); List <Table> tables = se.Extract(page); StringBuilder sb = new StringBuilder(); (new CSVWriter()).Write(sb, tables[0]); Assert.Equal(expectedCsv, sb.ToString().Replace("\r\n", "\n")); }
public void TestSpreadsheetWithNoBoundingFrameShouldBeSpreadsheet() { PageArea page = UtilsForTesting.GetAreaFromPage("Resources/spreadsheet_no_bounding_frame.pdf", 1, new PdfRectangle(58.9, 842 - 654.7, 536.12, 842 - 150.56)); // 842 - 150.56)); // 150.56f, 58.9f, 654.7f, 536.12f); string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/spreadsheet_no_bounding_frame.csv"); SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); bool isTabular = se.IsTabular(page); Assert.True(isTabular); List <Table> tables = se.Extract(page); StringBuilder sb = new StringBuilder(); (new CSVWriter()).Write(sb, tables[0]); Assert.Equal(expectedCsv, sb.ToString()); }
public void StreamNoGuess1() { // tabula.read_pdf(pdf_path, stream=True, guess=False) PageArea page = UtilsForTesting.GetPage("Resources/data.pdf", 1); // string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/data_stream_noguess.csv"); BasicExtractionAlgorithm se = new BasicExtractionAlgorithm(); List <Table> tables = se.Extract(page); StringBuilder sb = new StringBuilder(); (new CSVWriter()).Write(sb, tables[0]); Assert.Equal(expectedCsv, sb.ToString().Replace("\r\n", "\n")); }
public void Latice1() { PageArea page = UtilsForTesting.GetPage("Resources/data.pdf", 1); // data_lattice.csv was modified to add the last row, missing in tabula_py string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/data_lattice.csv"); SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); List <Table> tables = se.Extract(page); Assert.Single(tables); StringBuilder sb = new StringBuilder(); (new CSVWriter()).Write(sb, tables[0]); Assert.Equal(expectedCsv, sb.ToString().Replace("\r\n", "\n")); }
public void TestTableWithMultilineHeader() { string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/us-020.csv"); PageArea page = UtilsForTesting.GetAreaFromPage("Resources/us-020.pdf", 2, new PdfRectangle(35.0, 151, 560, 688.5)); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.Extract(page)[0]; using (var stream = new MemoryStream()) using (var sb = new StreamWriter(stream) { AutoFlush = true }) { (new CSVWriter()).Write(sb, table); var reader = new StreamReader(stream); stream.Position = 0; var data = reader.ReadToEnd().Replace("\r\n", "\n").Trim(); // trim to remove last new line Assert.Equal(expectedCsv, data); } }
public void TestRealLifeRTL2() { string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/indictb1h_14.csv"); PageArea page = UtilsForTesting.GetAreaFromPage("Resources/indictb1h_14.pdf", 1, new PdfRectangle(120.0, 842 - 622.82, 459.9, 842 - 120.0)); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.Extract(page)[0]; using (var stream = new MemoryStream()) using (var sb = new StreamWriter(stream) { AutoFlush = true }) { (new CSVWriter()).Write(sb, table); var reader = new StreamReader(stream); stream.Position = 0; var data = reader.ReadToEnd().Replace("\r\n", "\n").Trim(); // trim to remove last new line Assert.Equal(expectedCsv, data); } }
public void TestSpreadsheetExtractionIssue656() { // page height = 482, width 762.3 // 612 // top, left, bottom, right // 56.925f, 24.255f, 549.945f, 786.555f); PageArea page = UtilsForTesting.GetAreaFromFirstPage("Resources/Publication_of_award_of_Bids_for_Transport_Sector__August_2016.pdf", new PdfRectangle(24.255, 71, 786.555, 553)); string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/Publication_of_award_of_Bids_for_Transport_Sector__August_2016.csv"); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); List <Table> tables = sea.Extract(page); Assert.Single(tables); Table table = tables[0]; StringBuilder sb = new StringBuilder(); (new CSVWriter()).Write(sb, table); string result = sb.ToString(); Assert.Equal(expectedCsv.Replace("\n", "\r"), result.Replace("\r\n", "\n").Replace("\n", "\r").Trim()); /* * using (var stream = new MemoryStream()) * using (var sb = new StreamWriter(stream) { AutoFlush = true }) * { * (new CSVWriter()).write(sb, table); * * var reader = new StreamReader(stream); * stream.Position = 0; * var s = reader.ReadToEnd().Trim(); // trim to remove last new line * * // is there an issue with \r and \n? * Assert.Equal(expectedCsv.Replace("\n", "\r"), s.Replace("\r\n", "\n").Replace("\n", "\r")); * } */ }