private List <Table> GetTables() { PageArea page = UtilsForTesting.GetPage("Resources/twotables.pdf", 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); return(sea.Extract(page)); }
public void TestCSVMultilineRow() { string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/frx_2012_disclosure.csv"); PageArea page = UtilsForTesting.GetAreaFromFirstPage("Resources/frx_2012_disclosure.pdf", new PdfRectangle(double.NaN, double.NaN, double.NaN, double.NaN)); // 53.0f, 49.0f, 735.0f, 550.0f); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); Table table = sea.Extract(page)[0]; StringBuilder sb = new StringBuilder(); (new CSVWriter()).Write(sb, table); string s = sb.ToString(); Assert.Equal(expectedCsv, s); /* * using (var stream = new MemoryStream()) * using (var sb = new StreamWriter(stream) { AutoFlush = true }) * { * (new CSVWriter()).write(sb, table); * var reader = new StreamReader(stream); * stream.Position = 0; * var s = reader.ReadToEnd().Trim(); // trim to remove last new line * Assert.Equal(expectedCsv, s); * } */ }
IRegionDefinition IRegionDefinition.AddTemplate(string templatePath, PageArea pageArea) { var templateComponent = (_region.Content as TemplateComponent) ?? new TemplateComponent(_componentDependenciesFactory); switch (pageArea) { case PageArea.Head: templateComponent.HeadTemplate(templatePath); break; case PageArea.Scripts: templateComponent.ScriptTemplate(templatePath); break; case PageArea.Styles: templateComponent.StyleTemplate(templatePath); break; case PageArea.Body: templateComponent.BodyTemplate(templatePath); break; case PageArea.Initialization: templateComponent.InitializationTemplate(templatePath); break; } _region.Content = templateComponent; return(this); }
public void TestCSVSerializeInfinity() { string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/schools.csv"); // top, left, bottom, right // page height = 612 // 53.74f, 16.97f, 548.74f, 762.3f) PageArea page = UtilsForTesting.GetAreaFromFirstPage("Resources/schools.pdf", new PdfRectangle(16.97, 612 - 548.74, 762.3, 612 - 53.74 - 1)); // remove 1 because add an empty line at the top if not SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); Table table = sea.Extract(page)[0]; StringBuilder sb = new StringBuilder(); (new CSVWriter()).Write(sb, table); string s = sb.ToString(); Assert.Equal(expectedCsv.Trim(), s.Replace("\r\n", "\n")); /* * using (var stream = new MemoryStream()) * using (var sb = new StreamWriter(stream) { AutoFlush = true }) * { * (new CSVWriter()).write(sb, table); * var reader = new StreamReader(stream); * stream.Position = 0; * var s = reader.ReadToEnd().Trim(); // trim to remove last new line * Assert.Equal(expectedCsv, s); * } */ }
public void TestExtractTableWithExternallyDefinedRulings() { PageArea page = UtilsForTesting.GetPage("Resources/us-007.pdf", 1); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); List <Table> tables = bea.Extract(page, EXTERNALLY_DEFINED_RULINGS.ToList()); Assert.Single(tables); Table table = tables[0]; Assert.Equal(18, table.Cells.Count); var rows = table.Rows; Assert.Equal("Payroll Period", rows[0][0].GetText()); Assert.Equal("One Withholding\rAllowance", rows[0][1].GetText()); Assert.Equal("Weekly", rows[1][0].GetText()); Assert.Equal("$71.15", rows[1][1].GetText()); Assert.Equal("Biweekly", rows[2][0].GetText()); Assert.Equal("142.31", rows[2][1].GetText()); Assert.Equal("Semimonthly", rows[3][0].GetText()); Assert.Equal("154.17", rows[3][1].GetText()); Assert.Equal("Monthly", rows[4][0].GetText()); Assert.Equal("308.33", rows[4][1].GetText()); Assert.Equal("Quarterly", rows[5][0].GetText()); Assert.Equal("925.00", rows[5][1].GetText()); Assert.Equal("Semiannually", rows[6][0].GetText()); Assert.Equal("1,850.00", rows[6][1].GetText()); Assert.Equal("Annually", rows[7][0].GetText()); Assert.Equal("3,700.00", rows[7][1].GetText()); Assert.Equal("Daily or Miscellaneous\r(each day of the payroll period)", rows[8][0].GetText()); Assert.Equal("14.23", rows[8][1].GetText()); }
public void TestExtractSpreadsheetWithinAnArea() { PageArea page = UtilsForTesting.GetAreaFromPage("Resources/puertos1.pdf", 1, new PdfRectangle(30.32142857142857, 793 - 554.8821428571429, 546.7964285714286, 793 - 273.9035714285714)); // 273.9035714285714f, 30.32142857142857f, 554.8821428571429f, 546.7964285714286f); SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); List <Table> tables = se.Extract(page); Table table = tables[0]; Assert.Equal(15, table.Rows.Count); const string expected = "\"\",TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM\n" + "Peces vivos,1,25,1,23,2,38,1,37,2,67,2,89,1\n" + "\"Pescado fresco\n" + "o refrigerado.\n" + "exc. filetes\",7.704,7.175,8.931,6.892,12.635,10.255,16.742,13.688,14.357,11.674,13.035,13.429,9.727\n" + "\"Pescado congelado\n" + "exc. filetes\",90.560,105.950,112.645,108.416,132.895,115.874,152.767,133.765,148.882,134.847,156.619,165.134,137.179\n" + "\"Filetes y demás car-\n" + "nes de pescado\",105.434,200.563,151.142,218.389,152.174,227.780,178.123,291.863,169.422,313.735,176.427,381.640,144.814\n" + "\"Pescado sec./sal./\n" + "en salm. har./pol./\n" + "pell. aptos\n" + "p/c humano\",6.837,14.493,6.660,9.167,14.630,17.579,18.150,21.302,18.197,25.739,13.460,23.549,11.709\n" + "Crustáceos,61.691,375.798,52.488,251.043,47.635,387.783,27.815,217.443,7.123,86.019,39.488,373.583,45.191\n" + "Moluscos,162.027,174.507,109.436,111.443,90.834,104.741,57.695,109.141,98.182,206.304,187.023,251.352,157.531\n" + "\"Prod. no exp. en\n" + "otros capítulos.\n" + "No apto p/c humano\",203,328,7,35,521,343,\"1,710\",\"1,568\",125,246,124,263,131\n" + "\"Grasas y aceites de\n" + "pescado y mamíferos\n" + "marinos\",913,297,\"1,250\",476,\"1,031\",521,\"1,019\",642,690,483,489,710,959\n" + "\"Extractos y jugos de\n" + "pescado y mariscos\",5,25,1,3,4,4,31,93,39,117,77,230,80\n" + "\"Preparaciones y con-\n" + "servas de pescado\",846,\"3,737\",\"1,688\",\"4,411\",\"1,556\",\"3,681\",\"2,292\",\"5,474\",\"2,167\",\"7,494\",\"2,591\",\"8,833\",\"2,795\"\n" + "\"Preparaciones y con-\n" + "servas de mariscos\",348,\"3,667\",345,\"1,771\",738,\"3,627\",561,\"2,620\",607,\"3,928\",314,\"2,819\",250\n" + "\"Harina, polvo y pe-\n" + "llets de pescado.No\n" + "aptos p/c humano\",\"16,947\",\"8,547\",\"11,867\",\"6,315\",\"32,528\",\"13,985\",\"37,313\",\"18,989\",\"35,787\",\"19,914\",\"37,821\",\"27,174\",\"30,000\"\n" + "TOTAL,\"453,515\",\"895,111\",\"456,431\",\"718,382\",\"487,183\",\"886,211\",\"494,220\",\"816,623\",\"495,580\",\"810,565\",\"627,469\",\"1,248,804\",\"540,367\"\n"; // TODO add better assertions StringBuilder sb = new StringBuilder(); (new CSVWriter()).Write(sb, tables[0]); string result = sb.ToString(); //List<CSVRecord> parsedExpected = org.apache.commons.csv.CSVParser.parse(expected, CSVFormat.EXCEL).getRecords(); //List<CSVRecord> parsedResult = org.apache.commons.csv.CSVParser.parse(result, CSVFormat.EXCEL).getRecords(); using (var csv = new CsvReader(new StreamReader(new MemoryStream(Encoding.ASCII.GetBytes(result))), CultureInfo.InvariantCulture)) { /* * Assert.Equal(parsedResult.Count, parsedExpected.Count); * for (int i = 0; i < parsedResult.Count; i++) * { * Assert.Equal(parsedResult[i].size(), parsedExpected[i].size()); * } */ } }
public void TestExtractColumnsCorrectly2() { if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) // || RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) { PageArea page = UtilsForTesting.GetPage(EU_017_PDF, 3); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(page.VerticalRulings); Table table = bea.Extract(page.GetArea(new PdfRectangle(148.44, 543 - (711.875 - 299.625), 452.32, 543)))[0]; var result = UtilsForTesting.TableToArrayOfRows(table); Assert.Equal(EU_017_EXPECTED.Length, result.Length); for (int i = 0; i < EU_017_EXPECTED.Length; i++) { var expecteds = EU_017_EXPECTED[i]; var actuals = result[i]; Assert.Equal(expecteds.Length, actuals.Length); for (int j = 0; j < expecteds.Length; j++) { var e = expecteds[j]; var a = actuals[j]; Assert.Equal(e, a); } } } else { // fails on linux and mac os. Linked to PdfPig not finding the correct font. // need to use apt-get -y install ttf-mscorefonts-installer // still have mscorefonts - eula license could not be presented } }
public void TestExtractColumnsCorrectly() { if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) // || RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) { PageArea page = UtilsForTesting.GetAreaFromPage(EU_002_PDF, 1, new PdfRectangle(70.0, 725 - (233 - 115), 510.0, 725)); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.Extract(page)[0]; var actualArray = UtilsForTesting.TableToArrayOfRows(table); Assert.Equal(EU_002_EXPECTED.Length, actualArray.Length); for (int i = 0; i < EU_002_EXPECTED.Length; i++) { var expecteds = EU_002_EXPECTED[i]; var actuals = actualArray[i]; Assert.Equal(expecteds.Length, actuals.Length); for (int j = 0; j < expecteds.Length; j++) { var e = expecteds[j]; var a = actuals[j]; Assert.Equal(e, a); } } } else { // fails on linux and mac os. Linked to PdfPig not finding the correct font. // need to use apt-get -y install ttf-mscorefonts-installer // still have mscorefonts - eula license could not be presented } }
public void TestRealLifeRTL() { PageArea page = UtilsForTesting.GetPage("Resources/mednine.pdf", 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); List <Table> tables = sea.Extract(page); Assert.Single(tables); Table table = tables[0]; var rows = table.Rows; Assert.Equal("الانتخابات التشريعية 2014", rows[0][0].GetText()); // the doubled spaces might be a bug in my implementation. // bobld: missing space or worng words order Assert.Equal("ورقة كشف نتائج دائرة مدنين", rows[1][0].GetText()); Assert.Equal("426", rows[4][0].GetText()); Assert.Equal("63", rows[4][1].GetText()); Assert.Equal("43", rows[4][2].GetText()); Assert.Equal("56", rows[4][3].GetText()); Assert.Equal("58", rows[4][4].GetText()); Assert.Equal("49", rows[4][5].GetText()); Assert.Equal("55", rows[4][6].GetText()); Assert.Equal("33", rows[4][7].GetText()); Assert.Equal("32", rows[4][8].GetText()); Assert.Equal("37", rows[4][9].GetText()); Assert.Equal("قائمة من أجل تحقيق سلطة الشعب", rows[4][10].GetText()); // there is one remaining problems that are not yet addressed // - diacritics (e.g. Arabic's tanwinً and probably Hebrew nekudot) are put in the wrong place. // this should get fixed, but this is a good first stab at the problem. // these (commented-out) tests reflect the theoretical correct answer, // which is not currently possible because of the two problems listed above //Assert.Equal("مرحباً", rows[0][0].getText()); // really ought to be ً, but this is forgiveable for now }
public void TestRTL() { PageArea page = UtilsForTesting.GetPage("Resources/arabic.pdf", 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); List <Table> tables = sea.Extract(page); // Assert.Equal(1, tables.size()); Table table = tables[0]; var rows = table.Rows; Assert.Equal("اسمي سلطان", rows[1][1].GetText()); Assert.Equal("من اين انت؟", rows[2][1].GetText()); Assert.Equal("1234", rows[3][0].GetText()); Assert.Equal("هل انت شباك؟", rows[4][0].GetText()); Assert.Equal("انا من ولاية كارولينا الشمال", rows[2][0].GetText()); // conjoined lam-alif gets missed Assert.Equal("اسمي Jeremy في الانجليزية", rows[4][1].GetText()); // conjoined lam-alif gets missed Assert.Equal("عندي 47 قطط", rows[3][1].GetText()); // the real right answer is 47. Assert.Equal("Jeremy is جرمي in Arabic", rows[5][0].GetText()); // the real right answer is 47. Assert.Equal("مرحباً", rows[1][0].GetText()); // really ought to be ً, but this is forgiveable for now // there is one remaining problems that are not yet addressed // - diacritics (e.g. Arabic's tanwinً and probably Hebrew nekudot) are put in the wrong place. // this should get fixed, but this is a good first stab at the problem. // these (commented-out) tests reflect the theoretical correct answer, // which is not currently possible because of the two problems listed above // Assert.Equal("مرحباً", table.getRows()[0][0].getText()); // really ought to be ً, but this is forgiveable for now }
public void Eu004() { using (PdfDocument document = PdfDocument.Open("Resources/icdar2013-dataset/competition-dataset-eu/eu-004.pdf", new ParsingOptions() { ClipPaths = true })) { ObjectExtractor oe = new ObjectExtractor(document); PageArea page = oe.Extract(3); var detector = new SimpleNurminenDetectionAlgorithm(); var regions = detector.Detect(page); var newArea = page.GetArea(regions[0].BoundingBox); var sea = new SpreadsheetExtractionAlgorithm(); var tables = sea.Extract(newArea); /* * var detector = new SimpleNurminenDetectionAlgorithm(); * var regions = detector.Detect(page); * * foreach (var a in regions) * { * IExtractionAlgorithm ea = new BasicExtractionAlgorithm(); * var newArea = page.GetArea(a.BoundingBox); * List<Table> tables = ea.Extract(newArea); * } */ } }
public override IWriteResult WritePageArea( IRenderContext context, PageArea pageArea) { if (pageArea == PageArea.Body) { var data = context.Data.Get <VerticalText>(); context.Html.WriteOpenTag("svg", "width", data.Width.ToString(), "height", data.Height.ToString()); context.Html.WriteLine(); context.Html.WriteElementLine("style", "text { " + data.TextStyle + " }"); if (!string.IsNullOrEmpty(data.Background)) { context.Html.WriteOpenTag("rect", true, "width", "100%", "height", "100%", "fill", data.Background); } context.Html.WriteOpenTag("g", "transform", "translate(" + data.X + "," + data.Y + ") rotate(-90)"); context.Html.WriteLine(); context.Html.WriteElementLine("text", data.Caption, "textLength", data.TextHeight.ToString(), "lengthAdjust", data.LengthAdjust); context.Html.WriteCloseTag("g"); context.Html.WriteLine(); context.Html.WriteCloseTag("svg"); context.Html.WriteLine(); } return(WriteResult.Continue()); }
private Table GetTable() { PageArea page = UtilsForTesting.GetAreaFromFirstPage("Resources/argentina_diputados_voting_record.pdf", new PdfRectangle(12.75, 55.0, 561, 567)); // 269.875f, 12.75f, 790.5f, 561f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); return(bea.Extract(page)[0]); }
public async Task <ActionResult> CreateArea(PageArea pageArea) { try { string redirectURI = "CreateArea"; if (ModelState.IsValid) { PageActionManager pageAction = new PageActionManager(); OperationStatus response = await pageAction.AddUpdatePageArea(pageArea, GetUsername); if (Convert.ToBoolean(response.StatusCode)) { ActionMessage(response.Message, MessageType.Success); redirectURI = nameof(PageArea); return(RedirectToAction(redirectURI)); } else { ActionMessage(response.Message, MessageType.Error); return(View("CreateArea", pageArea)); } } else { ActionMessage("All the fields are required!", MessageType.Error); return(View("CreateArea", pageArea)); } } catch (Exception ex) { ProcessException(ex); ActionMessage(ex.Message, MessageType.Error); return(View("CreateArea", pageArea)); } }
public void TestSpanningCells() { PageArea page = UtilsForTesting.GetPage("Resources/spanning_cells.pdf", 1); string expectedJson = UtilsForTesting.LoadJson("Resources/json/spanning_cells.json"); SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); List <Table> tables = se.Extract(page); Assert.Equal(2, tables.Count); var expectedJObject = (JArray)JsonConvert.DeserializeObject(expectedJson); StringBuilder sb = new StringBuilder(); (new JSONWriter()).Write(sb, tables); var actualJObject = (JArray)JsonConvert.DeserializeObject(sb.ToString()); double pageHeight = 842; double precision = 2; for (int i = 0; i < 2; i++) { Assert.Equal(expectedJObject[i]["extraction_method"], actualJObject[i]["extraction_method"]); Assert.True(Math.Abs(Math.Floor(pageHeight - expectedJObject[i]["top"].Value <double>()) - Math.Floor(actualJObject[i]["top"].Value <double>())) < precision); Assert.True(Math.Abs(Math.Floor(expectedJObject[i]["left"].Value <double>()) - Math.Floor(actualJObject[i]["left"].Value <double>())) < precision); Assert.True(Math.Abs(Math.Floor(expectedJObject[i]["width"].Value <double>()) - Math.Floor(actualJObject[i]["width"].Value <double>())) < precision); Assert.True(Math.Abs(Math.Floor(expectedJObject[i]["height"].Value <double>()) - Math.Floor(actualJObject[i]["height"].Value <double>())) < precision); Assert.True(Math.Abs(Math.Floor(expectedJObject[i]["right"].Value <double>()) - Math.Floor(actualJObject[i]["right"].Value <double>())) < precision); Assert.True(Math.Abs(Math.Floor(pageHeight - expectedJObject[i]["bottom"].Value <double>()) - Math.Floor(actualJObject[i]["bottom"].Value <double>())) < precision); var expectedData = (JArray)expectedJObject[i]["data"]; var actualData = (JArray)actualJObject[i]["data"]; Assert.Equal(expectedData.Count, actualData.Count); for (int r = 0; r < expectedData.Count; r++) { var rowExpected = (JArray)expectedData[r]; var rowActual = (JArray)actualData[r]; Assert.Equal(rowExpected.Count, rowActual.Count); for (int c = 0; c < rowExpected.Count; c++) { var cellExpected = (JObject)rowExpected[c]; var cellActual = (JObject)rowActual[c]; if (string.IsNullOrEmpty(cellExpected["text"].Value <string>())) { continue; // empty cell have no coordinate data??? } Assert.True(Math.Abs(Math.Floor(pageHeight - cellExpected["top"].Value <double>()) - Math.Floor(cellActual["top"].Value <double>())) < precision); Assert.True(Math.Abs(Math.Floor(cellExpected["left"].Value <double>()) - Math.Floor(cellActual["left"].Value <double>())) < precision); Assert.True(Math.Abs(Math.Floor(cellExpected["width"].Value <double>()) - Math.Floor(cellActual["width"].Value <double>())) < precision); Assert.True(Math.Abs(Math.Floor(cellExpected["height"].Value <double>()) - Math.Floor(cellActual["height"].Value <double>())) < precision); Assert.Equal(cellExpected["text"].Value <string>(), cellActual["text"].Value <string>()); } } } //Assert.Equal(expectedJson, sb.ToString()); }
public void TestIncompleteGrid() { PageArea page = UtilsForTesting.GetPage("Resources/china.pdf", 1); SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); List <Table> tables = se.Extract(page); Assert.Equal(2, tables.Count); }
public void TestEmptyRegion() { PageArea page = UtilsForTesting.GetAreaFromPage("Resources/indictb1h_14.pdf", 1, new PdfRectangle(0, 700, 100.9, 800)); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.Extract(page)[0]; Assert.Equal(EXPECTED_EMPTY_TABLE, UtilsForTesting.TableToArrayOfRows(table)); }
public void TestShouldDetectASingleSpreadsheet() { PageArea page = UtilsForTesting.GetAreaFromPage("Resources/offense.pdf", 1, new PdfRectangle(16.44, 792 - 680.85, 597.84, 792 - 16.44)); // 68.08f, 16.44f, 680.85f, 597.84f); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); List <Table> tables = bea.Extract(page); Assert.Single(tables); }
public IWriteResult WritePageArea(IRenderContext context, PageArea pageArea) { if (pageArea == PageArea.Body) { context.Html.WriteElementLine("p", "Dummy template"); } return(new WriteResult()); }
IWriteResult IRegion.WritePageArea( IRenderContext context, PageArea pageArea, Action <IRenderContext, object> onListItem, Func <IRenderContext, PageArea, IWriteResult> contentWriter) { // TODO: Add code here to write html onto the page return(WriteResult.Continue()); }
public override IWriteResult WritePageArea(IRenderContext context, PageArea pageArea) { var requestPath = context.OwinContext.Request.Path; var relativePath = requestPath; var html = context.Html; if (_rootPath.HasValue && !requestPath.StartsWithSegments(_rootPath, out relativePath)) { return(base.WritePageArea(context, pageArea)); } if (pageArea == PageArea.Head) { html.WriteUnclosedElement( "link", "rel", "canonical", "href", "http://owinframework.net" + requestPath.ToString().ToLower()); context.Html.WriteLine(); } else if (pageArea == PageArea.Body) { if (!_rootPath.HasValue || requestPath.StartsWithSegments(_rootPath, out relativePath)) { var template = Dependencies.NameManager.ResolveTemplate(relativePath.Value); if (template != null) { //_titleRegex.Matches(template.); html.WriteOpenTag("div", "class", _className); html.WriteLine(); template.WritePageArea(context, pageArea); html.WriteCloseTag("div"); html.WriteLine(); //if (!string.IsNullOrEmpty(title)) //{ // html.WriteScriptOpen(); // html.WriteLine("document.title='" + title + "';"); // html.WriteScriptClose(); // html.WriteLine(); //} } } } else if (pageArea == PageArea.Initialization) { html.WriteScriptOpen(); html.WriteLine("var contentDiv=document.getElementsByClassName('" + _className + "')[0];"); html.WriteLine("var pageTitles=contentDiv.getElementsByTagName('title');"); html.WriteLine("if (pageTitles.length>0)document.title=pageTitles[0].text;"); html.WriteLine(); html.WriteScriptClose(); } return(base.WritePageArea(context, pageArea)); }
private void AddPageArea(PageArea pageArea) { if (_pageAreas.Contains(pageArea)) { return; } _pageAreas = _pageAreas.Concat(Enumerable.Repeat(pageArea, 1)).ToArray(); }
/// <summary> /// Detects the tables in the page. /// </summary> /// <param name="page">The page where to detect the tables.</param> public List<TableRectangle> Detect(PageArea page) { List<Cell> cells = SpreadsheetExtractionAlgorithm.FindCells(page.HorizontalRulings, page.VerticalRulings); List<TableRectangle> tables = SpreadsheetExtractionAlgorithm.FindSpreadsheetsFromCells(cells.Cast<TableRectangle>().ToList()); // we want tables to be returned from top to bottom on the page Utils.Sort(tables, new TableRectangle.ILL_DEFINED_ORDER()); return tables; }
public override IWriteResult WritePageArea(IRenderContext context, PageArea pageArea) { if (pageArea == PageArea.Body) { var rootElement = DrawDiagram(context); Write(rootElement, context.Html); } return base.WritePageArea(context, pageArea); }
public void TestRemoveSequentialSpaces() { PageArea page = UtilsForTesting.GetAreaFromFirstPage("Resources/m27.pdf", new PdfRectangle(28.28, 532 - (103.04 - 79.2), 732.6, 532)); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.Extract(page)[0]; var firstRow = table.Rows[0]; Assert.Equal("ALLEGIANT AIR", firstRow[1].GetText()); Assert.Equal("ALLEGIANT AIR LLC", firstRow[2].GetText()); }
public override IWriteResult WritePageArea(IRenderContext context, PageArea pageArea) { if (pageArea == PageArea.Body) { var person = context.Data.Get <Person>(); context.Html.WriteElementLine("p", person.Name); context.Html.WriteOpenTag("hr", true); context.Html.WriteLine(); } return(WriteResult.Continue()); }
public override IWriteResult WritePageArea( IRenderContext context, PageArea pageArea) { if (pageArea == PageArea.Body) { var menuItem = context.Data.Get <MenuItem>(); context.Html.WriteElementLine("li", menuItem.Name, "class", Package.NamespaceName + "_mb_heading"); } return(WriteResult.Continue()); }
/// <summary> /// Extracts the tables in the page. /// </summary> /// <param name="page">The page where to extract the tables.</param> /// <param name="verticalRulingPositions">List of vertical rulings, indicated by there x position.</param> public List <Table> Extract(PageArea page, IReadOnlyList <float> verticalRulingPositions) { List <Ruling> verticalRulings = new List <Ruling>(verticalRulingPositions.Count); foreach (float p in verticalRulingPositions) { verticalRulings.Add(new Ruling(page.Height, p, 0.0f, page.Height)); // wrong here??? } this.verticalRulings = verticalRulings; return(this.Extract(page)); }
public void WritePageArea( IRenderContext context, PageArea pageArea) { if (_element.AssetDeployment != AssetDeployment.InPage) { return; } if (pageArea == PageArea.Styles) { if (CssRules != null && CssRules.Length > 0) { var writer = _cssWriterFactory.Create(context); for (var i = 0; i < CssRules.Length; i++) { CssRules[i](writer); } if (context.IncludeComments) { context.Html.WriteComment("css rules for " + _commentNameFunc()); } context.Html.WriteOpenTag("style"); context.Html.WriteLine(); writer.ToHtml(context.Html); context.Html.WriteCloseTag("style"); context.Html.WriteLine(); } } else if (pageArea == PageArea.Scripts) { if (JavascriptFunctions != null && JavascriptFunctions.Length > 0) { var writer = _javascriptWriterFactory.Create(context); for (var i = 0; i < JavascriptFunctions.Length; i++) { JavascriptFunctions[i](writer); } if (context.IncludeComments) { context.Html.WriteComment("javascript functions for " + _commentNameFunc()); } context.Html.WriteScriptOpen(); writer.ToHtml(context.Html); context.Html.WriteScriptClose(); } } }
private void EnsurePageArea(PageArea pageArea, bool delete) { if (delete) { PageAreas = PageAreas.Where(a => a != pageArea).ToArray(); } else if (!PageAreas.Contains(pageArea)) { PageAreas = PageAreas.Concat(Enumerable.Repeat(pageArea, 1)).ToArray(); } }