Esempio n. 1
0
 public ActionProvider(ICommandManager commandManager)
 {
     this.runner         = new TestRunner();
     this.commandManager = commandManager;
     this.extractor      = new ObjectExtractor();
     this.actionsHub     = new Dictionary <string, Dictionary <int, ActionModel> >();
 }
Esempio n. 2
0
        //public bool Evaluate(int t, string h)
        //{
        //    throw new NotImplementedException();

        public ActionResultEnum Execute(object[] objects)
        {
            var customer = ObjectExtractor.Extract <TestEntities.Customer>(objects);

            customer.Name = "Child";
            return(ActionResultEnum.Successful);
        }
        public ActionResultEnum Execute(object[] objects)
        {
            var table = ObjectExtractor.Extract <Table>(objects);

            table.IsReferenceDataTable = true;
            return(ActionResultEnum.Successful);
        }
Esempio n. 4
0
        public void Eu004()
        {
            using (PdfDocument document = PdfDocument.Open("Resources/icdar2013-dataset/competition-dataset-eu/eu-004.pdf", new ParsingOptions()
            {
                ClipPaths = true
            }))
            {
                ObjectExtractor oe   = new ObjectExtractor(document);
                PageArea        page = oe.Extract(3);

                var detector = new SimpleNurminenDetectionAlgorithm();
                var regions  = detector.Detect(page);

                var newArea = page.GetArea(regions[0].BoundingBox);

                var sea    = new SpreadsheetExtractionAlgorithm();
                var tables = sea.Extract(newArea);

                /*
                 * var detector = new SimpleNurminenDetectionAlgorithm();
                 * var regions = detector.Detect(page);
                 *
                 * foreach (var a in regions)
                 * {
                 *  IExtractionAlgorithm ea = new BasicExtractionAlgorithm();
                 *  var newArea = page.GetArea(a.BoundingBox);
                 *  List<Table> tables = ea.Extract(newArea);
                 * }
                 */
            }
        }
Esempio n. 5
0
        public void TestDoNotNPEInPointComparator()
        {
            using (PdfDocument pdf_document = PdfDocument.Open("Resources/npe_issue_206.pdf", new ParsingOptions()
            {
                ClipPaths = true
            }))
            {
                ObjectExtractor oe = new ObjectExtractor(pdf_document);

                PageArea p = oe.ExtractPage(1);
                Assert.NotNull(p);
            }
        }
Esempio n. 6
0
        public void TestExtractWrongPageNumber()// throws IOException
        {
            using (PdfDocument pdf_document = PdfDocument.Open("Resources/S2MNCEbirdisland.pdf", new ParsingOptions()
            {
                ClipPaths = true
            }))
            {
                Assert.Equal(2, pdf_document.NumberOfPages);

                ObjectExtractor oe = new ObjectExtractor(pdf_document);
                Assert.Throws <IndexOutOfRangeException>(() => oe.Extract(3));
            }
        }
        public ActionResultEnum Execute(object[] objects)
        {
            var entity = ObjectExtractor.Extract <TableDrivedEntity>(objects);

            if (entity.IndependentDataEntry == null)
            {
                if ((entity.IsStructurReferencee == null || entity.IsStructurReferencee == false) &&
                    (entity.IsAssociative == null || entity.IsAssociative == false))
                {
                    entity.IndependentDataEntry = true;
                }
            }
            return(ActionResultEnum.Successful);
        }
Esempio n. 8
0
        public ActionResultEnum Execute(object[] objects)
        {
            var entity = ObjectExtractor.Extract <TableDrivedEntity>(objects);

            if (entity.IsAssociative == null)
            {
                if (entity.Relationship.Count(x => x.RelationshipType != null && x.RelationshipType.ManyToOneRelationshipType != null && x.TableDrivedEntityID1 != x.TableDrivedEntityID2 && (x.TableDrivedEntity1.IsDataReference != true) &&
                                              (x.TableDrivedEntity1.IsStructurReferencee != true)) > 1)
                {
                    entity.IsAssociative = true;
                }
            }
            return(ActionResultEnum.Successful);
        }
Esempio n. 9
0
 public void TestCanReadPDFWithOwnerEncryption()
 {
     using (PdfDocument pdf_document = PdfDocument.Open("Resources/S2MNCEbirdisland.pdf"))
     {
         ObjectExtractor oe = new ObjectExtractor(pdf_document);
         PageIterator    pi = oe.Extract();
         int             i  = 0;
         while (pi.MoveNext())
         {
             i++;
         }
         Assert.Equal(2, i);
     }
 }
        public void CreatesExtractorGraph()
        {
            var bitmap = new Bitmap(new Size(10, 10), ColorMode.Argb8888);

            using (var source = new BitmapImageSource(bitmap))
                using (var mask = new BitmapImageSource(bitmap))
                    using (var extractor = new ObjectExtractor(source, mask))
                    {
                        string result = CreateGraph(extractor);
                        Assert.AreEqual(2, new Regex(Regex.Escape(NodeId(source))).Matches(result).Count);
                        Assert.AreEqual(2, new Regex(Regex.Escape(NodeId(mask))).Matches(result).Count);
                        Assert.AreEqual(3, new Regex(Regex.Escape(NodeId(extractor))).Matches(result).Count);
                    }
        }
Esempio n. 11
0
        public void TestDontThrowNPEInShfill()
        {
            using (PdfDocument pdf_document = PdfDocument.Open("Resources/labor.pdf", new ParsingOptions()
            {
                ClipPaths = true
            }))
            {
                ObjectExtractor oe = new ObjectExtractor(pdf_document);
                PageIterator    pi = oe.Extract();
                Assert.True(pi.MoveNext());

                PageArea p = pi.Current;
                Assert.NotNull(p);
            }
        }
Esempio n. 12
0
        public void TestTextExtractionDoesNotRaise()
        {
            using (PdfDocument pdf_document = PdfDocument.Open("Resources/rotated_page.pdf", new ParsingOptions()
            {
                ClipPaths = true
            }))
            {
                ObjectExtractor oe = new ObjectExtractor(pdf_document);
                PageIterator    pi = oe.Extract();

                Assert.True(pi.MoveNext());
                Assert.NotNull(pi.Current);
                Assert.False(pi.MoveNext());
            }
        }
Esempio n. 13
0
        public void TestExtractOnePage()
        {
            using (PdfDocument pdf_document = PdfDocument.Open("Resources/S2MNCEbirdisland.pdf", new ParsingOptions()
            {
                ClipPaths = true
            }))
            {
                Assert.Equal(2, pdf_document.NumberOfPages);

                ObjectExtractor oe   = new ObjectExtractor(pdf_document);
                PageArea        page = oe.Extract(2);

                Assert.NotNull(page);
            }
        }
Esempio n. 14
0
        public void TestTextElementsContainedInPage()
        {
            using (PdfDocument pdf_document = PdfDocument.Open("Resources/cs-en-us-pbms.pdf", new ParsingOptions()
            {
                ClipPaths = true
            }))
            {
                ObjectExtractor oe = new ObjectExtractor(pdf_document);

                PageArea page = oe.ExtractPage(1);

                foreach (TextElement te in page.GetText())
                {
                    Assert.True(page.BoundingBox.Contains(te.BoundingBox));
                }
            }
        }
Esempio n. 15
0
 public void TestGoodPassword()
 {
     using (PdfDocument pdf_document = PdfDocument.Open("Resources/encrypted.pdf", new ParsingOptions()
     {
         Password = "******"
     }))
     {
         ObjectExtractor oe    = new ObjectExtractor(pdf_document);
         List <PageArea> pages = new List <PageArea>();
         PageIterator    pi    = oe.Extract();
         while (pi.MoveNext())
         {
             pages.Add(pi.Current);
         }
         Assert.Single(pages);
     }
 }
Esempio n. 16
0
        /// <summary>
        /// 通过PDF文档对象解析PDF
        /// </summary>
        /// <param name="pdfDocument">PDF文档</param>
        /// <param name="tableContainType">表格包含样式</param>
        /// <returns></returns>
        public static PDFModel Parser(PDDocument pdfDocument, TableContainType tableContainType)
        {
            ObjectExtractor extractor    = new ObjectExtractor(pdfDocument);
            PageIterator    pageIterator = extractor.extract();
            SpreadsheetExtractionAlgorithm tableExtractor = new SpreadsheetExtractionAlgorithm();

            PDFModel pdfModel = new PDFModel();

            PDFTextStripper pdfStripper = new PDFTextStripper();

            pdfStripper.setPageEnd(pageEndMark);
            //pdfStripper.setParagraphEnd(paragraphEndMark);
            string[] strs = Regex.Split(pdfStripper.getText(pdfDocument), pageEndMark, RegexOptions.IgnoreCase);
            if (strs != null && strs.Length > 0)
            {
                pdfModel.Pages = new List <PdfPageModel>();
                int cp = 0;

                while (pageIterator.hasNext())
                {
                    PdfPageModel pdfPage = new PdfPageModel();
                    pdfPage.CurrentPage = cp + 1;
                    pdfPage.Text        = strs[cp];

                    List <Table> tables     = new List <Table>();
                    Page         page       = pageIterator.next();
                    var          pageTables = tableExtractor.extract(page).toArray();
                    if (pageTables != null && pageTables.Length > 0)
                    {
                        for (int i = 0; i < pageTables.Length; i++)
                        {
                            tables.Add(pageTables[i] as Table);
                        }
                    }
                    pdfPage.Tables = tables;
                    pdfModel.Pages.Add(pdfPage);
                    cp++;
                }

                pdfModel.PageNumber = pdfModel.Pages.Count;

                return(PdfTextFormater(pdfModel, tableContainType));
            }

            return(null);
        }
Esempio n. 17
0
        public ActionResultEnum Execute(object[] objects)
        {
            var entity = ObjectExtractor.Extract <TableDrivedEntity>(objects);

            if (entity.IsDataReference == null && entity.IsStructurReferencee == null)
            {
                if (entity.Relationship.Where(x => x.RelationshipType != null && x.RelationshipType.OneToManyRelationshipType != null).Count() > 0)
                {
                    if (entity.Relationship.Where(x => x.RelationshipType != null && (x.RelationshipType.ImplicitOneToOneRelationshipType != null ||
                                                                                      x.RelationshipType.SuperToSubRelationshipType != null ||
                                                                                      (x.RelationshipType.UnionToSubUnionRelationshipType != null && x.RelationshipType.UnionToSubUnionRelationshipType.UnionRelationshipType.UnionHoldsKeys == true) ||
                                                                                      (x.RelationshipType.SubUnionToUnionRelationshipType != null && x.RelationshipType.SubUnionToUnionRelationshipType.UnionRelationshipType.UnionHoldsKeys == false))).Count() == 0)
                    {
                        BizColumn biz     = new BizColumn();
                        var       columns = biz.GetAllColumns(entity.ID, true);
                        if (columns.Count <= 4)
                        {
                            if (columns.Any(x => x.ColumnType == Enum_ColumnType.String))
                            {
                                entity.IsDataReference = true;
                            }
                        }
                    }
                }
            }

            if (entity.IsDataReference == null && entity.IsStructurReferencee == null)
            {
                if (entity.Relationship.Count(x => x.RelationshipType != null && x.RelationshipType.ImplicitOneToOneRelationshipType != null) > 1)
                {
                    if (entity.Relationship.Where(x => x.RelationshipType != null && (x.RelationshipType.OneToManyRelationshipType != null ||
                                                                                      x.RelationshipType.SuperToSubRelationshipType != null ||
                                                                                      (x.RelationshipType.UnionToSubUnionRelationshipType != null && x.RelationshipType.UnionToSubUnionRelationshipType.UnionRelationshipType.UnionHoldsKeys == true) ||
                                                                                      (x.RelationshipType.SubUnionToUnionRelationshipType != null && x.RelationshipType.SubUnionToUnionRelationshipType.UnionRelationshipType.UnionHoldsKeys == false))).Count() == 0)
                    {
                        if (entity.IsStructurReferencee == null)
                        {
                            entity.IsStructurReferencee = true;
                        }
                    }
                }
            }
            return(ActionResultEnum.Successful);
        }
Esempio n. 18
0
        public void TestShouldDetectRulings()
        {
            using (PdfDocument pdf_document = PdfDocument.Open("Resources/should_detect_rulings.pdf", new ParsingOptions()
            {
                ClipPaths = true
            }))
            {
                ObjectExtractor oe = new ObjectExtractor(pdf_document);
                PageIterator    pi = oe.Extract();

                PageArea page = pi.Next();
                IReadOnlyList <Ruling> rulings = page.GetRulings();

                foreach (Ruling r in rulings)
                {
                    Assert.True(page.BoundingBox.Contains(r.Line.GetBoundingRectangle(), true));
                }
            }
        }
Esempio n. 19
0
        public void TestLinesToCells()
        {
            using (PdfDocument document = PdfDocument.Open("test3.pdf", new ParsingOptions()
            {
                ClipPaths = true
            }))
            {
                ObjectExtractor oe   = new ObjectExtractor(document);
                PageArea        page = oe.Extract(1);

                SimpleNurminenDetectionAlgorithm detector = new SimpleNurminenDetectionAlgorithm();
                var regions = detector.Detect(page);

                foreach (var a in regions)
                {
                    IExtractionAlgorithm ea = new BasicExtractionAlgorithm();
                    var          newArea    = page.GetArea(a.BoundingBox);
                    List <Table> tables     = ea.Extract(newArea);
                }
            }
        }
Esempio n. 20
0
        public static PageArea GetPage(string path, int pageNumber)
        {
            ObjectExtractor oe = null;

            try
            {
                PageArea page;
                using (PdfDocument document = PdfDocument.Open(path, new ParsingOptions()
                {
                    ClipPaths = true
                }))
                {
                    oe   = new ObjectExtractor(document);
                    page = oe.Extract(pageNumber);
                }
                return(page);
            }
            finally
            {
                oe?.Close();
            }
        }
        public bool Evaluate(params object[] objects)
        {
            var table = ObjectExtractor.Extract <Table>(objects);

            return(table.Column.Count <= Biz_Vocabulary.GetVocabulary <int>("Vocabluray_ReferenceTableColumnsCount", objects));
        }
        public void CreatesExtractorGraph()
        {
            var bitmap = new Bitmap(new Size(10, 10), ColorMode.Argb8888);
            using (var source = new BitmapImageSource(bitmap))
            using (var mask = new BitmapImageSource(bitmap))
            using (var extractor = new ObjectExtractor(source, mask))
            {

                string result = CreateGraph(extractor);
                Assert.AreEqual(2, new Regex(Regex.Escape(NodeId(source))).Matches(result).Count);
                Assert.AreEqual(2, new Regex(Regex.Escape(NodeId(mask))).Matches(result).Count);
                Assert.AreEqual(3, new Regex(Regex.Escape(NodeId(extractor))).Matches(result).Count);
            }
        }
Esempio n. 23
0
        //public bool Evaluate(int t, string h)
        //{
        //    throw new NotImplementedException();

        public bool Evaluate(params object[] objects)
        {
            var customer = ObjectExtractor.Extract <TestEntities.Customer>(objects);

            return(customer.Age < Biz_Vocabulary.GetVocabulary <int>("Age", objects));
        }