Esempio n. 1
0
        private List <Table> GetTables()
        {
            PageArea page = UtilsForTesting.GetPage("Resources/twotables.pdf", 1);
            SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();

            return(sea.Extract(page));
        }
Esempio n. 2
0
        public void TestCSVMultilineRow()
        {
            string   expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/frx_2012_disclosure.csv");
            PageArea page        = UtilsForTesting.GetAreaFromFirstPage("Resources/frx_2012_disclosure.pdf", new PdfRectangle(double.NaN, double.NaN, double.NaN, double.NaN)); // 53.0f, 49.0f, 735.0f, 550.0f);
            SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
            Table table = sea.Extract(page)[0];

            StringBuilder sb = new StringBuilder();

            (new CSVWriter()).Write(sb, table);
            string s = sb.ToString();

            Assert.Equal(expectedCsv, s);

            /*
             * using (var stream = new MemoryStream())
             * using (var sb = new StreamWriter(stream) { AutoFlush = true })
             * {
             *  (new CSVWriter()).write(sb, table);
             *  var reader = new StreamReader(stream);
             *  stream.Position = 0;
             *  var s = reader.ReadToEnd().Trim(); // trim to remove last new line
             *  Assert.Equal(expectedCsv, s);
             * }
             */
        }
Esempio n. 3
0
        IRegionDefinition IRegionDefinition.AddTemplate(string templatePath, PageArea pageArea)
        {
            var templateComponent = (_region.Content as TemplateComponent) ?? new TemplateComponent(_componentDependenciesFactory);

            switch (pageArea)
            {
            case PageArea.Head:
                templateComponent.HeadTemplate(templatePath);
                break;

            case PageArea.Scripts:
                templateComponent.ScriptTemplate(templatePath);
                break;

            case PageArea.Styles:
                templateComponent.StyleTemplate(templatePath);
                break;

            case PageArea.Body:
                templateComponent.BodyTemplate(templatePath);
                break;

            case PageArea.Initialization:
                templateComponent.InitializationTemplate(templatePath);
                break;
            }

            _region.Content = templateComponent;

            return(this);
        }
Esempio n. 4
0
        public void TestCSVSerializeInfinity()
        {
            string expectedCsv = UtilsForTesting.LoadCsv("Resources/csv/schools.csv");
            // top,    left,   bottom,  right              // page height = 612
            // 53.74f, 16.97f, 548.74f, 762.3f)

            PageArea page = UtilsForTesting.GetAreaFromFirstPage("Resources/schools.pdf", new PdfRectangle(16.97, 612 - 548.74, 762.3, 612 - 53.74 - 1)); // remove 1 because add an empty line at the top if not
            SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
            Table table = sea.Extract(page)[0];

            StringBuilder sb = new StringBuilder();

            (new CSVWriter()).Write(sb, table);
            string s = sb.ToString();

            Assert.Equal(expectedCsv.Trim(), s.Replace("\r\n", "\n"));

            /*
             * using (var stream = new MemoryStream())
             * using (var sb = new StreamWriter(stream) { AutoFlush = true })
             * {
             *  (new CSVWriter()).write(sb, table);
             *  var reader = new StreamReader(stream);
             *  stream.Position = 0;
             *  var s = reader.ReadToEnd().Trim(); // trim to remove last new line
             *  Assert.Equal(expectedCsv, s);
             * }
             */
        }
Esempio n. 5
0
        public void TestExtractTableWithExternallyDefinedRulings()
        {
            PageArea page = UtilsForTesting.GetPage("Resources/us-007.pdf", 1);
            SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm();
            List <Table> tables = bea.Extract(page, EXTERNALLY_DEFINED_RULINGS.ToList());

            Assert.Single(tables);
            Table table = tables[0];

            Assert.Equal(18, table.Cells.Count);

            var rows = table.Rows;

            Assert.Equal("Payroll Period", rows[0][0].GetText());
            Assert.Equal("One Withholding\rAllowance", rows[0][1].GetText());
            Assert.Equal("Weekly", rows[1][0].GetText());
            Assert.Equal("$71.15", rows[1][1].GetText());
            Assert.Equal("Biweekly", rows[2][0].GetText());
            Assert.Equal("142.31", rows[2][1].GetText());
            Assert.Equal("Semimonthly", rows[3][0].GetText());
            Assert.Equal("154.17", rows[3][1].GetText());
            Assert.Equal("Monthly", rows[4][0].GetText());
            Assert.Equal("308.33", rows[4][1].GetText());
            Assert.Equal("Quarterly", rows[5][0].GetText());
            Assert.Equal("925.00", rows[5][1].GetText());
            Assert.Equal("Semiannually", rows[6][0].GetText());
            Assert.Equal("1,850.00", rows[6][1].GetText());
            Assert.Equal("Annually", rows[7][0].GetText());
            Assert.Equal("3,700.00", rows[7][1].GetText());
            Assert.Equal("Daily or Miscellaneous\r(each day of the payroll period)", rows[8][0].GetText());
            Assert.Equal("14.23", rows[8][1].GetText());
        }
Esempio n. 6
0
        public void TestExtractSpreadsheetWithinAnArea()
        {
            PageArea page = UtilsForTesting.GetAreaFromPage("Resources/puertos1.pdf", 1, new PdfRectangle(30.32142857142857, 793 - 554.8821428571429, 546.7964285714286, 793 - 273.9035714285714)); // 273.9035714285714f, 30.32142857142857f, 554.8821428571429f, 546.7964285714286f);
            SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm();
            List <Table> tables = se.Extract(page);
            Table        table  = tables[0];

            Assert.Equal(15, table.Rows.Count);

            const string expected = "\"\",TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM\n" +
                                    "Peces vivos,1,25,1,23,2,38,1,37,2,67,2,89,1\n" +
                                    "\"Pescado fresco\n" +
                                    "o refrigerado.\n" +
                                    "exc. filetes\",7.704,7.175,8.931,6.892,12.635,10.255,16.742,13.688,14.357,11.674,13.035,13.429,9.727\n" +
                                    "\"Pescado congelado\n" +
                                    "exc. filetes\",90.560,105.950,112.645,108.416,132.895,115.874,152.767,133.765,148.882,134.847,156.619,165.134,137.179\n" +
                                    "\"Filetes y demás car-\n" +
                                    "nes de pescado\",105.434,200.563,151.142,218.389,152.174,227.780,178.123,291.863,169.422,313.735,176.427,381.640,144.814\n" +
                                    "\"Pescado sec./sal./\n" +
                                    "en salm. har./pol./\n" +
                                    "pell. aptos\n" +
                                    "p/c humano\",6.837,14.493,6.660,9.167,14.630,17.579,18.150,21.302,18.197,25.739,13.460,23.549,11.709\n" +
                                    "Crustáceos,61.691,375.798,52.488,251.043,47.635,387.783,27.815,217.443,7.123,86.019,39.488,373.583,45.191\n" +
                                    "Moluscos,162.027,174.507,109.436,111.443,90.834,104.741,57.695,109.141,98.182,206.304,187.023,251.352,157.531\n" +
                                    "\"Prod. no exp. en\n" +
                                    "otros capítulos.\n" +
                                    "No apto p/c humano\",203,328,7,35,521,343,\"1,710\",\"1,568\",125,246,124,263,131\n" +
                                    "\"Grasas y aceites de\n" +
                                    "pescado y mamíferos\n" +
                                    "marinos\",913,297,\"1,250\",476,\"1,031\",521,\"1,019\",642,690,483,489,710,959\n" +
                                    "\"Extractos y jugos de\n" +
                                    "pescado y mariscos\",5,25,1,3,4,4,31,93,39,117,77,230,80\n" +
                                    "\"Preparaciones y con-\n" +
                                    "servas de pescado\",846,\"3,737\",\"1,688\",\"4,411\",\"1,556\",\"3,681\",\"2,292\",\"5,474\",\"2,167\",\"7,494\",\"2,591\",\"8,833\",\"2,795\"\n" +
                                    "\"Preparaciones y con-\n" +
                                    "servas de mariscos\",348,\"3,667\",345,\"1,771\",738,\"3,627\",561,\"2,620\",607,\"3,928\",314,\"2,819\",250\n" +
                                    "\"Harina, polvo y pe-\n" +
                                    "llets de pescado.No\n" +
                                    "aptos p/c humano\",\"16,947\",\"8,547\",\"11,867\",\"6,315\",\"32,528\",\"13,985\",\"37,313\",\"18,989\",\"35,787\",\"19,914\",\"37,821\",\"27,174\",\"30,000\"\n" +
                                    "TOTAL,\"453,515\",\"895,111\",\"456,431\",\"718,382\",\"487,183\",\"886,211\",\"494,220\",\"816,623\",\"495,580\",\"810,565\",\"627,469\",\"1,248,804\",\"540,367\"\n";

            // TODO add better assertions
            StringBuilder sb = new StringBuilder();

            (new CSVWriter()).Write(sb, tables[0]);
            string result = sb.ToString();

            //List<CSVRecord> parsedExpected = org.apache.commons.csv.CSVParser.parse(expected, CSVFormat.EXCEL).getRecords();
            //List<CSVRecord> parsedResult = org.apache.commons.csv.CSVParser.parse(result, CSVFormat.EXCEL).getRecords();
            using (var csv = new CsvReader(new StreamReader(new MemoryStream(Encoding.ASCII.GetBytes(result))), CultureInfo.InvariantCulture))
            {
                /*
                 * Assert.Equal(parsedResult.Count, parsedExpected.Count);
                 * for (int i = 0; i < parsedResult.Count; i++)
                 * {
                 *  Assert.Equal(parsedResult[i].size(), parsedExpected[i].size());
                 * }
                 */
            }
        }
Esempio n. 7
0
        public void TestExtractColumnsCorrectly2()
        {
            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) // || RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
            {
                PageArea page = UtilsForTesting.GetPage(EU_017_PDF, 3);
                BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(page.VerticalRulings);
                Table table = bea.Extract(page.GetArea(new PdfRectangle(148.44, 543 - (711.875 - 299.625), 452.32, 543)))[0];

                var result = UtilsForTesting.TableToArrayOfRows(table);

                Assert.Equal(EU_017_EXPECTED.Length, result.Length);
                for (int i = 0; i < EU_017_EXPECTED.Length; i++)
                {
                    var expecteds = EU_017_EXPECTED[i];
                    var actuals   = result[i];
                    Assert.Equal(expecteds.Length, actuals.Length);
                    for (int j = 0; j < expecteds.Length; j++)
                    {
                        var e = expecteds[j];
                        var a = actuals[j];
                        Assert.Equal(e, a);
                    }
                }
            }
            else
            {
                // fails on linux and mac os. Linked to PdfPig not finding the correct font.
                // need to use apt-get -y install ttf-mscorefonts-installer
                // still have mscorefonts - eula license could not be presented
            }
        }
Esempio n. 8
0
        public void TestExtractColumnsCorrectly()
        {
            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) // || RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
            {
                PageArea page = UtilsForTesting.GetAreaFromPage(EU_002_PDF, 1, new PdfRectangle(70.0, 725 - (233 - 115), 510.0, 725));
                BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm();
                Table table = bea.Extract(page)[0];

                var actualArray = UtilsForTesting.TableToArrayOfRows(table);
                Assert.Equal(EU_002_EXPECTED.Length, actualArray.Length);

                for (int i = 0; i < EU_002_EXPECTED.Length; i++)
                {
                    var expecteds = EU_002_EXPECTED[i];
                    var actuals   = actualArray[i];
                    Assert.Equal(expecteds.Length, actuals.Length);
                    for (int j = 0; j < expecteds.Length; j++)
                    {
                        var e = expecteds[j];
                        var a = actuals[j];
                        Assert.Equal(e, a);
                    }
                }
            }
            else
            {
                // fails on linux and mac os. Linked to PdfPig not finding the correct font.
                // need to use apt-get -y install ttf-mscorefonts-installer
                // still have mscorefonts - eula license could not be presented
            }
        }
Esempio n. 9
0
        public void TestRealLifeRTL()
        {
            PageArea page = UtilsForTesting.GetPage("Resources/mednine.pdf", 1);
            SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
            List <Table> tables = sea.Extract(page);

            Assert.Single(tables);
            Table table = tables[0];
            var   rows  = table.Rows;

            Assert.Equal("الانتخابات التشريعية  2014", rows[0][0].GetText()); // the doubled spaces might be a bug in my implementation. // bobld: missing space or worng words order
            Assert.Equal("ورقة كشف نتائج دائرة مدنين", rows[1][0].GetText());
            Assert.Equal("426", rows[4][0].GetText());
            Assert.Equal("63", rows[4][1].GetText());
            Assert.Equal("43", rows[4][2].GetText());
            Assert.Equal("56", rows[4][3].GetText());
            Assert.Equal("58", rows[4][4].GetText());
            Assert.Equal("49", rows[4][5].GetText());
            Assert.Equal("55", rows[4][6].GetText());
            Assert.Equal("33", rows[4][7].GetText());
            Assert.Equal("32", rows[4][8].GetText());
            Assert.Equal("37", rows[4][9].GetText());
            Assert.Equal("قائمة من أجل تحقيق سلطة الشعب", rows[4][10].GetText());

            // there is one remaining problems that are not yet addressed
            // - diacritics (e.g. Arabic's tanwinً and probably Hebrew nekudot) are put in the wrong place.
            // this should get fixed, but this is a good first stab at the problem.

            // these (commented-out) tests reflect the theoretical correct answer,
            // which is not currently possible because of the two problems listed above
            //Assert.Equal("مرحباً", rows[0][0].getText()); // really ought to be ً, but this is forgiveable for now
        }
Esempio n. 10
0
        public void TestRTL()
        {
            PageArea page = UtilsForTesting.GetPage("Resources/arabic.pdf", 1);
            SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
            List <Table> tables = sea.Extract(page);
            // Assert.Equal(1, tables.size());
            Table table = tables[0];

            var rows = table.Rows;

            Assert.Equal("اسمي سلطان", rows[1][1].GetText());
            Assert.Equal("من اين انت؟", rows[2][1].GetText());
            Assert.Equal("1234", rows[3][0].GetText());
            Assert.Equal("هل انت شباك؟", rows[4][0].GetText());
            Assert.Equal("انا من ولاية كارولينا الشمال", rows[2][0].GetText()); // conjoined lam-alif gets missed
            Assert.Equal("اسمي Jeremy في الانجليزية", rows[4][1].GetText());    // conjoined lam-alif gets missed
            Assert.Equal("عندي 47 قطط", rows[3][1].GetText());                  // the real right answer is 47.
            Assert.Equal("Jeremy is جرمي in Arabic", rows[5][0].GetText());     // the real right answer is 47.
            Assert.Equal("مرحباً", rows[1][0].GetText());                       // really ought to be ً, but this is forgiveable for now

            // there is one remaining problems that are not yet addressed
            // - diacritics (e.g. Arabic's tanwinً and probably Hebrew nekudot) are put in the wrong place.
            // this should get fixed, but this is a good first stab at the problem.

            // these (commented-out) tests reflect the theoretical correct answer,
            // which is not currently possible because of the two problems listed above
            // Assert.Equal("مرحباً",                       table.getRows()[0][0].getText()); // really ought to be ً, but this is forgiveable for now
        }
Esempio n. 11
0
        public void Eu004()
        {
            using (PdfDocument document = PdfDocument.Open("Resources/icdar2013-dataset/competition-dataset-eu/eu-004.pdf", new ParsingOptions()
            {
                ClipPaths = true
            }))
            {
                ObjectExtractor oe   = new ObjectExtractor(document);
                PageArea        page = oe.Extract(3);

                var detector = new SimpleNurminenDetectionAlgorithm();
                var regions  = detector.Detect(page);

                var newArea = page.GetArea(regions[0].BoundingBox);

                var sea    = new SpreadsheetExtractionAlgorithm();
                var tables = sea.Extract(newArea);

                /*
                 * var detector = new SimpleNurminenDetectionAlgorithm();
                 * var regions = detector.Detect(page);
                 *
                 * foreach (var a in regions)
                 * {
                 *  IExtractionAlgorithm ea = new BasicExtractionAlgorithm();
                 *  var newArea = page.GetArea(a.BoundingBox);
                 *  List<Table> tables = ea.Extract(newArea);
                 * }
                 */
            }
        }
Esempio n. 12
0
            public override IWriteResult WritePageArea(
                IRenderContext context,
                PageArea pageArea)
            {
                if (pageArea == PageArea.Body)
                {
                    var data = context.Data.Get <VerticalText>();

                    context.Html.WriteOpenTag("svg", "width", data.Width.ToString(), "height", data.Height.ToString());
                    context.Html.WriteLine();

                    context.Html.WriteElementLine("style", "text { " + data.TextStyle + " }");

                    if (!string.IsNullOrEmpty(data.Background))
                    {
                        context.Html.WriteOpenTag("rect", true, "width", "100%", "height", "100%", "fill", data.Background);
                    }

                    context.Html.WriteOpenTag("g", "transform", "translate(" + data.X + "," + data.Y + ") rotate(-90)");
                    context.Html.WriteLine();

                    context.Html.WriteElementLine("text", data.Caption, "textLength", data.TextHeight.ToString(), "lengthAdjust", data.LengthAdjust);

                    context.Html.WriteCloseTag("g");
                    context.Html.WriteLine();

                    context.Html.WriteCloseTag("svg");
                    context.Html.WriteLine();
                }
                return(WriteResult.Continue());
            }
Esempio n. 13
0
        private Table GetTable()
        {
            PageArea page = UtilsForTesting.GetAreaFromFirstPage("Resources/argentina_diputados_voting_record.pdf", new PdfRectangle(12.75, 55.0, 561, 567)); // 269.875f, 12.75f, 790.5f, 561f);
            BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm();

            return(bea.Extract(page)[0]);
        }
Esempio n. 14
0
        public async Task <ActionResult> CreateArea(PageArea pageArea)
        {
            try
            {
                string redirectURI = "CreateArea";
                if (ModelState.IsValid)
                {
                    PageActionManager pageAction = new PageActionManager();
                    OperationStatus   response   = await pageAction.AddUpdatePageArea(pageArea, GetUsername);

                    if (Convert.ToBoolean(response.StatusCode))
                    {
                        ActionMessage(response.Message, MessageType.Success);
                        redirectURI = nameof(PageArea);
                        return(RedirectToAction(redirectURI));
                    }
                    else
                    {
                        ActionMessage(response.Message, MessageType.Error);
                        return(View("CreateArea", pageArea));
                    }
                }
                else
                {
                    ActionMessage("All the fields are required!", MessageType.Error);
                    return(View("CreateArea", pageArea));
                }
            }
            catch (Exception ex)
            {
                ProcessException(ex);
                ActionMessage(ex.Message, MessageType.Error);
                return(View("CreateArea", pageArea));
            }
        }
Esempio n. 15
0
        public void TestSpanningCells()
        {
            PageArea page                     = UtilsForTesting.GetPage("Resources/spanning_cells.pdf", 1);
            string   expectedJson             = UtilsForTesting.LoadJson("Resources/json/spanning_cells.json");
            SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm();
            List <Table> tables               = se.Extract(page);

            Assert.Equal(2, tables.Count);

            var expectedJObject = (JArray)JsonConvert.DeserializeObject(expectedJson);

            StringBuilder sb = new StringBuilder();

            (new JSONWriter()).Write(sb, tables);
            var actualJObject = (JArray)JsonConvert.DeserializeObject(sb.ToString());

            double pageHeight = 842;
            double precision  = 2;

            for (int i = 0; i < 2; i++)
            {
                Assert.Equal(expectedJObject[i]["extraction_method"], actualJObject[i]["extraction_method"]);

                Assert.True(Math.Abs(Math.Floor(pageHeight - expectedJObject[i]["top"].Value <double>()) - Math.Floor(actualJObject[i]["top"].Value <double>())) < precision);
                Assert.True(Math.Abs(Math.Floor(expectedJObject[i]["left"].Value <double>()) - Math.Floor(actualJObject[i]["left"].Value <double>())) < precision);
                Assert.True(Math.Abs(Math.Floor(expectedJObject[i]["width"].Value <double>()) - Math.Floor(actualJObject[i]["width"].Value <double>())) < precision);
                Assert.True(Math.Abs(Math.Floor(expectedJObject[i]["height"].Value <double>()) - Math.Floor(actualJObject[i]["height"].Value <double>())) < precision);
                Assert.True(Math.Abs(Math.Floor(expectedJObject[i]["right"].Value <double>()) - Math.Floor(actualJObject[i]["right"].Value <double>())) < precision);
                Assert.True(Math.Abs(Math.Floor(pageHeight - expectedJObject[i]["bottom"].Value <double>()) - Math.Floor(actualJObject[i]["bottom"].Value <double>())) < precision);

                var expectedData = (JArray)expectedJObject[i]["data"];
                var actualData   = (JArray)actualJObject[i]["data"];
                Assert.Equal(expectedData.Count, actualData.Count);

                for (int r = 0; r < expectedData.Count; r++)
                {
                    var rowExpected = (JArray)expectedData[r];
                    var rowActual   = (JArray)actualData[r];
                    Assert.Equal(rowExpected.Count, rowActual.Count);

                    for (int c = 0; c < rowExpected.Count; c++)
                    {
                        var cellExpected = (JObject)rowExpected[c];
                        var cellActual   = (JObject)rowActual[c];

                        if (string.IsNullOrEmpty(cellExpected["text"].Value <string>()))
                        {
                            continue;                                                             // empty cell have no coordinate data???
                        }
                        Assert.True(Math.Abs(Math.Floor(pageHeight - cellExpected["top"].Value <double>()) - Math.Floor(cellActual["top"].Value <double>())) < precision);
                        Assert.True(Math.Abs(Math.Floor(cellExpected["left"].Value <double>()) - Math.Floor(cellActual["left"].Value <double>())) < precision);
                        Assert.True(Math.Abs(Math.Floor(cellExpected["width"].Value <double>()) - Math.Floor(cellActual["width"].Value <double>())) < precision);
                        Assert.True(Math.Abs(Math.Floor(cellExpected["height"].Value <double>()) - Math.Floor(cellActual["height"].Value <double>())) < precision);
                        Assert.Equal(cellExpected["text"].Value <string>(), cellActual["text"].Value <string>());
                    }
                }
            }
            //Assert.Equal(expectedJson, sb.ToString());
        }
Esempio n. 16
0
        public void TestIncompleteGrid()
        {
            PageArea page = UtilsForTesting.GetPage("Resources/china.pdf", 1);
            SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm();
            List <Table> tables = se.Extract(page);

            Assert.Equal(2, tables.Count);
        }
Esempio n. 17
0
        public void TestEmptyRegion()
        {
            PageArea page = UtilsForTesting.GetAreaFromPage("Resources/indictb1h_14.pdf", 1, new PdfRectangle(0, 700, 100.9, 800));
            BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm();
            Table table = bea.Extract(page)[0];

            Assert.Equal(EXPECTED_EMPTY_TABLE, UtilsForTesting.TableToArrayOfRows(table));
        }
Esempio n. 18
0
        public void TestShouldDetectASingleSpreadsheet()
        {
            PageArea page = UtilsForTesting.GetAreaFromPage("Resources/offense.pdf", 1, new PdfRectangle(16.44, 792 - 680.85, 597.84, 792 - 16.44)); // 68.08f, 16.44f, 680.85f, 597.84f);
            SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm();
            List <Table> tables = bea.Extract(page);

            Assert.Single(tables);
        }
Esempio n. 19
0
            public IWriteResult WritePageArea(IRenderContext context, PageArea pageArea)
            {
                if (pageArea == PageArea.Body)
                {
                    context.Html.WriteElementLine("p", "Dummy template");
                }

                return(new WriteResult());
            }
Esempio n. 20
0
 IWriteResult IRegion.WritePageArea(
     IRenderContext context,
     PageArea pageArea,
     Action <IRenderContext, object> onListItem,
     Func <IRenderContext, PageArea, IWriteResult> contentWriter)
 {
     // TODO: Add code here to write html onto the page
     return(WriteResult.Continue());
 }
        public override IWriteResult WritePageArea(IRenderContext context, PageArea pageArea)
        {
            var requestPath  = context.OwinContext.Request.Path;
            var relativePath = requestPath;
            var html         = context.Html;

            if (_rootPath.HasValue && !requestPath.StartsWithSegments(_rootPath, out relativePath))
            {
                return(base.WritePageArea(context, pageArea));
            }

            if (pageArea == PageArea.Head)
            {
                html.WriteUnclosedElement(
                    "link", "rel", "canonical", "href",
                    "http://owinframework.net" + requestPath.ToString().ToLower());
                context.Html.WriteLine();
            }
            else if (pageArea == PageArea.Body)
            {
                if (!_rootPath.HasValue || requestPath.StartsWithSegments(_rootPath, out relativePath))
                {
                    var template = Dependencies.NameManager.ResolveTemplate(relativePath.Value);
                    if (template != null)
                    {
                        //_titleRegex.Matches(template.);

                        html.WriteOpenTag("div", "class", _className);
                        html.WriteLine();
                        template.WritePageArea(context, pageArea);
                        html.WriteCloseTag("div");
                        html.WriteLine();

                        //if (!string.IsNullOrEmpty(title))
                        //{
                        //    html.WriteScriptOpen();
                        //    html.WriteLine("document.title='" + title + "';");
                        //    html.WriteScriptClose();
                        //    html.WriteLine();
                        //}
                    }
                }
            }
            else if (pageArea == PageArea.Initialization)
            {
                html.WriteScriptOpen();
                html.WriteLine("var contentDiv=document.getElementsByClassName('" + _className + "')[0];");
                html.WriteLine("var pageTitles=contentDiv.getElementsByTagName('title');");
                html.WriteLine("if (pageTitles.length>0)document.title=pageTitles[0].text;");
                html.WriteLine();
                html.WriteScriptClose();
            }



            return(base.WritePageArea(context, pageArea));
        }
Esempio n. 22
0
        private void AddPageArea(PageArea pageArea)
        {
            if (_pageAreas.Contains(pageArea))
            {
                return;
            }

            _pageAreas = _pageAreas.Concat(Enumerable.Repeat(pageArea, 1)).ToArray();
        }
Esempio n. 23
0
        /// <summary>
        /// Detects the tables in the page.
        /// </summary>
        /// <param name="page">The page where to detect the tables.</param>
        public List<TableRectangle> Detect(PageArea page)
        {
            List<Cell> cells = SpreadsheetExtractionAlgorithm.FindCells(page.HorizontalRulings, page.VerticalRulings);

            List<TableRectangle> tables = SpreadsheetExtractionAlgorithm.FindSpreadsheetsFromCells(cells.Cast<TableRectangle>().ToList());

            // we want tables to be returned from top to bottom on the page
            Utils.Sort(tables, new TableRectangle.ILL_DEFINED_ORDER());
            return tables;
        }
Esempio n. 24
0
        public override IWriteResult WritePageArea(IRenderContext context, PageArea pageArea)
        {
            if (pageArea == PageArea.Body)
            {
                var rootElement = DrawDiagram(context);
                Write(rootElement, context.Html);
            }

            return base.WritePageArea(context, pageArea);
        }
Esempio n. 25
0
        public void TestRemoveSequentialSpaces()
        {
            PageArea page = UtilsForTesting.GetAreaFromFirstPage("Resources/m27.pdf", new PdfRectangle(28.28, 532 - (103.04 - 79.2), 732.6, 532));
            BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm();
            Table table    = bea.Extract(page)[0];
            var   firstRow = table.Rows[0];

            Assert.Equal("ALLEGIANT AIR", firstRow[1].GetText());
            Assert.Equal("ALLEGIANT AIR LLC", firstRow[2].GetText());
        }
Esempio n. 26
0
 public override IWriteResult WritePageArea(IRenderContext context, PageArea pageArea)
 {
     if (pageArea == PageArea.Body)
     {
         var person = context.Data.Get <Person>();
         context.Html.WriteElementLine("p", person.Name);
         context.Html.WriteOpenTag("hr", true);
         context.Html.WriteLine();
     }
     return(WriteResult.Continue());
 }
 public override IWriteResult WritePageArea(
     IRenderContext context,
     PageArea pageArea)
 {
     if (pageArea == PageArea.Body)
     {
         var menuItem = context.Data.Get <MenuItem>();
         context.Html.WriteElementLine("li", menuItem.Name, "class", Package.NamespaceName + "_mb_heading");
     }
     return(WriteResult.Continue());
 }
Esempio n. 28
0
        /// <summary>
        /// Extracts the tables in the page.
        /// </summary>
        /// <param name="page">The page where to extract the tables.</param>
        /// <param name="verticalRulingPositions">List of vertical rulings, indicated by there x position.</param>
        public List <Table> Extract(PageArea page, IReadOnlyList <float> verticalRulingPositions)
        {
            List <Ruling> verticalRulings = new List <Ruling>(verticalRulingPositions.Count);

            foreach (float p in verticalRulingPositions)
            {
                verticalRulings.Add(new Ruling(page.Height, p, 0.0f, page.Height)); // wrong here???
            }
            this.verticalRulings = verticalRulings;
            return(this.Extract(page));
        }
Esempio n. 29
0
        public void WritePageArea(
            IRenderContext context,
            PageArea pageArea)
        {
            if (_element.AssetDeployment != AssetDeployment.InPage)
            {
                return;
            }

            if (pageArea == PageArea.Styles)
            {
                if (CssRules != null && CssRules.Length > 0)
                {
                    var writer = _cssWriterFactory.Create(context);

                    for (var i = 0; i < CssRules.Length; i++)
                    {
                        CssRules[i](writer);
                    }

                    if (context.IncludeComments)
                    {
                        context.Html.WriteComment("css rules for " + _commentNameFunc());
                    }

                    context.Html.WriteOpenTag("style");
                    context.Html.WriteLine();
                    writer.ToHtml(context.Html);
                    context.Html.WriteCloseTag("style");
                    context.Html.WriteLine();
                }
            }
            else if (pageArea == PageArea.Scripts)
            {
                if (JavascriptFunctions != null && JavascriptFunctions.Length > 0)
                {
                    var writer = _javascriptWriterFactory.Create(context);

                    for (var i = 0; i < JavascriptFunctions.Length; i++)
                    {
                        JavascriptFunctions[i](writer);
                    }

                    if (context.IncludeComments)
                    {
                        context.Html.WriteComment("javascript functions for " + _commentNameFunc());
                    }

                    context.Html.WriteScriptOpen();
                    writer.ToHtml(context.Html);
                    context.Html.WriteScriptClose();
                }
            }
        }
Esempio n. 30
0
 private void EnsurePageArea(PageArea pageArea, bool delete)
 {
     if (delete)
     {
         PageAreas = PageAreas.Where(a => a != pageArea).ToArray();
     }
     else if (!PageAreas.Contains(pageArea))
     {
         PageAreas = PageAreas.Concat(Enumerable.Repeat(pageArea, 1)).ToArray();
     }
 }