public ActionProvider(ICommandManager commandManager) { this.runner = new TestRunner(); this.commandManager = commandManager; this.extractor = new ObjectExtractor(); this.actionsHub = new Dictionary <string, Dictionary <int, ActionModel> >(); }
//public bool Evaluate(int t, string h) //{ // throw new NotImplementedException(); public ActionResultEnum Execute(object[] objects) { var customer = ObjectExtractor.Extract <TestEntities.Customer>(objects); customer.Name = "Child"; return(ActionResultEnum.Successful); }
public ActionResultEnum Execute(object[] objects) { var table = ObjectExtractor.Extract <Table>(objects); table.IsReferenceDataTable = true; return(ActionResultEnum.Successful); }
public void Eu004() { using (PdfDocument document = PdfDocument.Open("Resources/icdar2013-dataset/competition-dataset-eu/eu-004.pdf", new ParsingOptions() { ClipPaths = true })) { ObjectExtractor oe = new ObjectExtractor(document); PageArea page = oe.Extract(3); var detector = new SimpleNurminenDetectionAlgorithm(); var regions = detector.Detect(page); var newArea = page.GetArea(regions[0].BoundingBox); var sea = new SpreadsheetExtractionAlgorithm(); var tables = sea.Extract(newArea); /* * var detector = new SimpleNurminenDetectionAlgorithm(); * var regions = detector.Detect(page); * * foreach (var a in regions) * { * IExtractionAlgorithm ea = new BasicExtractionAlgorithm(); * var newArea = page.GetArea(a.BoundingBox); * List<Table> tables = ea.Extract(newArea); * } */ } }
public void TestDoNotNPEInPointComparator() { using (PdfDocument pdf_document = PdfDocument.Open("Resources/npe_issue_206.pdf", new ParsingOptions() { ClipPaths = true })) { ObjectExtractor oe = new ObjectExtractor(pdf_document); PageArea p = oe.ExtractPage(1); Assert.NotNull(p); } }
public void TestExtractWrongPageNumber()// throws IOException { using (PdfDocument pdf_document = PdfDocument.Open("Resources/S2MNCEbirdisland.pdf", new ParsingOptions() { ClipPaths = true })) { Assert.Equal(2, pdf_document.NumberOfPages); ObjectExtractor oe = new ObjectExtractor(pdf_document); Assert.Throws <IndexOutOfRangeException>(() => oe.Extract(3)); } }
public ActionResultEnum Execute(object[] objects) { var entity = ObjectExtractor.Extract <TableDrivedEntity>(objects); if (entity.IndependentDataEntry == null) { if ((entity.IsStructurReferencee == null || entity.IsStructurReferencee == false) && (entity.IsAssociative == null || entity.IsAssociative == false)) { entity.IndependentDataEntry = true; } } return(ActionResultEnum.Successful); }
public ActionResultEnum Execute(object[] objects) { var entity = ObjectExtractor.Extract <TableDrivedEntity>(objects); if (entity.IsAssociative == null) { if (entity.Relationship.Count(x => x.RelationshipType != null && x.RelationshipType.ManyToOneRelationshipType != null && x.TableDrivedEntityID1 != x.TableDrivedEntityID2 && (x.TableDrivedEntity1.IsDataReference != true) && (x.TableDrivedEntity1.IsStructurReferencee != true)) > 1) { entity.IsAssociative = true; } } return(ActionResultEnum.Successful); }
public void TestCanReadPDFWithOwnerEncryption() { using (PdfDocument pdf_document = PdfDocument.Open("Resources/S2MNCEbirdisland.pdf")) { ObjectExtractor oe = new ObjectExtractor(pdf_document); PageIterator pi = oe.Extract(); int i = 0; while (pi.MoveNext()) { i++; } Assert.Equal(2, i); } }
public void CreatesExtractorGraph() { var bitmap = new Bitmap(new Size(10, 10), ColorMode.Argb8888); using (var source = new BitmapImageSource(bitmap)) using (var mask = new BitmapImageSource(bitmap)) using (var extractor = new ObjectExtractor(source, mask)) { string result = CreateGraph(extractor); Assert.AreEqual(2, new Regex(Regex.Escape(NodeId(source))).Matches(result).Count); Assert.AreEqual(2, new Regex(Regex.Escape(NodeId(mask))).Matches(result).Count); Assert.AreEqual(3, new Regex(Regex.Escape(NodeId(extractor))).Matches(result).Count); } }
public void TestDontThrowNPEInShfill() { using (PdfDocument pdf_document = PdfDocument.Open("Resources/labor.pdf", new ParsingOptions() { ClipPaths = true })) { ObjectExtractor oe = new ObjectExtractor(pdf_document); PageIterator pi = oe.Extract(); Assert.True(pi.MoveNext()); PageArea p = pi.Current; Assert.NotNull(p); } }
public void TestTextExtractionDoesNotRaise() { using (PdfDocument pdf_document = PdfDocument.Open("Resources/rotated_page.pdf", new ParsingOptions() { ClipPaths = true })) { ObjectExtractor oe = new ObjectExtractor(pdf_document); PageIterator pi = oe.Extract(); Assert.True(pi.MoveNext()); Assert.NotNull(pi.Current); Assert.False(pi.MoveNext()); } }
public void TestExtractOnePage() { using (PdfDocument pdf_document = PdfDocument.Open("Resources/S2MNCEbirdisland.pdf", new ParsingOptions() { ClipPaths = true })) { Assert.Equal(2, pdf_document.NumberOfPages); ObjectExtractor oe = new ObjectExtractor(pdf_document); PageArea page = oe.Extract(2); Assert.NotNull(page); } }
public void TestTextElementsContainedInPage() { using (PdfDocument pdf_document = PdfDocument.Open("Resources/cs-en-us-pbms.pdf", new ParsingOptions() { ClipPaths = true })) { ObjectExtractor oe = new ObjectExtractor(pdf_document); PageArea page = oe.ExtractPage(1); foreach (TextElement te in page.GetText()) { Assert.True(page.BoundingBox.Contains(te.BoundingBox)); } } }
public void TestGoodPassword() { using (PdfDocument pdf_document = PdfDocument.Open("Resources/encrypted.pdf", new ParsingOptions() { Password = "******" })) { ObjectExtractor oe = new ObjectExtractor(pdf_document); List <PageArea> pages = new List <PageArea>(); PageIterator pi = oe.Extract(); while (pi.MoveNext()) { pages.Add(pi.Current); } Assert.Single(pages); } }
/// <summary> /// 通过PDF文档对象解析PDF /// </summary> /// <param name="pdfDocument">PDF文档</param> /// <param name="tableContainType">表格包含样式</param> /// <returns></returns> public static PDFModel Parser(PDDocument pdfDocument, TableContainType tableContainType) { ObjectExtractor extractor = new ObjectExtractor(pdfDocument); PageIterator pageIterator = extractor.extract(); SpreadsheetExtractionAlgorithm tableExtractor = new SpreadsheetExtractionAlgorithm(); PDFModel pdfModel = new PDFModel(); PDFTextStripper pdfStripper = new PDFTextStripper(); pdfStripper.setPageEnd(pageEndMark); //pdfStripper.setParagraphEnd(paragraphEndMark); string[] strs = Regex.Split(pdfStripper.getText(pdfDocument), pageEndMark, RegexOptions.IgnoreCase); if (strs != null && strs.Length > 0) { pdfModel.Pages = new List <PdfPageModel>(); int cp = 0; while (pageIterator.hasNext()) { PdfPageModel pdfPage = new PdfPageModel(); pdfPage.CurrentPage = cp + 1; pdfPage.Text = strs[cp]; List <Table> tables = new List <Table>(); Page page = pageIterator.next(); var pageTables = tableExtractor.extract(page).toArray(); if (pageTables != null && pageTables.Length > 0) { for (int i = 0; i < pageTables.Length; i++) { tables.Add(pageTables[i] as Table); } } pdfPage.Tables = tables; pdfModel.Pages.Add(pdfPage); cp++; } pdfModel.PageNumber = pdfModel.Pages.Count; return(PdfTextFormater(pdfModel, tableContainType)); } return(null); }
public ActionResultEnum Execute(object[] objects) { var entity = ObjectExtractor.Extract <TableDrivedEntity>(objects); if (entity.IsDataReference == null && entity.IsStructurReferencee == null) { if (entity.Relationship.Where(x => x.RelationshipType != null && x.RelationshipType.OneToManyRelationshipType != null).Count() > 0) { if (entity.Relationship.Where(x => x.RelationshipType != null && (x.RelationshipType.ImplicitOneToOneRelationshipType != null || x.RelationshipType.SuperToSubRelationshipType != null || (x.RelationshipType.UnionToSubUnionRelationshipType != null && x.RelationshipType.UnionToSubUnionRelationshipType.UnionRelationshipType.UnionHoldsKeys == true) || (x.RelationshipType.SubUnionToUnionRelationshipType != null && x.RelationshipType.SubUnionToUnionRelationshipType.UnionRelationshipType.UnionHoldsKeys == false))).Count() == 0) { BizColumn biz = new BizColumn(); var columns = biz.GetAllColumns(entity.ID, true); if (columns.Count <= 4) { if (columns.Any(x => x.ColumnType == Enum_ColumnType.String)) { entity.IsDataReference = true; } } } } } if (entity.IsDataReference == null && entity.IsStructurReferencee == null) { if (entity.Relationship.Count(x => x.RelationshipType != null && x.RelationshipType.ImplicitOneToOneRelationshipType != null) > 1) { if (entity.Relationship.Where(x => x.RelationshipType != null && (x.RelationshipType.OneToManyRelationshipType != null || x.RelationshipType.SuperToSubRelationshipType != null || (x.RelationshipType.UnionToSubUnionRelationshipType != null && x.RelationshipType.UnionToSubUnionRelationshipType.UnionRelationshipType.UnionHoldsKeys == true) || (x.RelationshipType.SubUnionToUnionRelationshipType != null && x.RelationshipType.SubUnionToUnionRelationshipType.UnionRelationshipType.UnionHoldsKeys == false))).Count() == 0) { if (entity.IsStructurReferencee == null) { entity.IsStructurReferencee = true; } } } } return(ActionResultEnum.Successful); }
public void TestShouldDetectRulings() { using (PdfDocument pdf_document = PdfDocument.Open("Resources/should_detect_rulings.pdf", new ParsingOptions() { ClipPaths = true })) { ObjectExtractor oe = new ObjectExtractor(pdf_document); PageIterator pi = oe.Extract(); PageArea page = pi.Next(); IReadOnlyList <Ruling> rulings = page.GetRulings(); foreach (Ruling r in rulings) { Assert.True(page.BoundingBox.Contains(r.Line.GetBoundingRectangle(), true)); } } }
public void TestLinesToCells() { using (PdfDocument document = PdfDocument.Open("test3.pdf", new ParsingOptions() { ClipPaths = true })) { ObjectExtractor oe = new ObjectExtractor(document); PageArea page = oe.Extract(1); SimpleNurminenDetectionAlgorithm detector = new SimpleNurminenDetectionAlgorithm(); var regions = detector.Detect(page); foreach (var a in regions) { IExtractionAlgorithm ea = new BasicExtractionAlgorithm(); var newArea = page.GetArea(a.BoundingBox); List <Table> tables = ea.Extract(newArea); } } }
public static PageArea GetPage(string path, int pageNumber) { ObjectExtractor oe = null; try { PageArea page; using (PdfDocument document = PdfDocument.Open(path, new ParsingOptions() { ClipPaths = true })) { oe = new ObjectExtractor(document); page = oe.Extract(pageNumber); } return(page); } finally { oe?.Close(); } }
public bool Evaluate(params object[] objects) { var table = ObjectExtractor.Extract <Table>(objects); return(table.Column.Count <= Biz_Vocabulary.GetVocabulary <int>("Vocabluray_ReferenceTableColumnsCount", objects)); }
//public bool Evaluate(int t, string h) //{ // throw new NotImplementedException(); public bool Evaluate(params object[] objects) { var customer = ObjectExtractor.Extract <TestEntities.Customer>(objects); return(customer.Age < Biz_Vocabulary.GetVocabulary <int>("Age", objects)); }