private void MainForm_Load(object sender, EventArgs e) { AppDomain.CurrentDomain.ProcessExit += new EventHandler(OnProcessExit); DataBase.DB DocObjects = new DataBase.DB(); DocObjects.TruncateObjects(); // delete any data that might be in database string[] args = Environment.GetCommandLineArgs(); if (args.Length > 1) { AutoprocessDocs(args[1]); } //C3PO_Text //Key 1 415260fc79c1481986027718fa4c45ac //Key 2 f6c0e985e2854e3c9db4ddf385ea5d89 //C3PO_LUIS //Key 1 92d1791c0dff4a4fa6953b89818b8ccd //Key 2 3dd2d4a6235d4fb4bc1fd7fe372831c3 /*ServiceClients.LuisClient LC = new ServiceClients.LuisClient("c68ba480925a4dc4be6ac44d35ace6cf"); * string results = LC.ReadText("Purchase Order").Result; * ServiceClients.LUISResponse LUISResults = new ServiceClients.LUISResponse(); * LUISResults = Newtonsoft.Json.JsonConvert.DeserializeObject<ServiceClients.LUISResponse>(results); */ }
public void getObjectsFromPDF(string PDFFileName, out double PDFPageWidth, out double PDFPageHeight) { FileStream pageObjectsInput = new FileStream(PDFFileName, FileMode.Open, FileAccess.Read, FileShare.Read); PdfFixedDocument document = new PdfFixedDocument(pageObjectsInput); PdfPage DisplayPage = document.Pages[0]; PDFPageWidth = document.Pages[0].Width; PDFPageHeight = document.Pages[0].Height; PdfContentExtractor ce = new PdfContentExtractor(DisplayPage); PdfVisualObjectCollection voc = ce.ExtractVisualObjects(false); DataBase.DB DocObjects = new DataBase.DB(); DocObjects.TruncateObjects(); // delete any data that might be in database for (int i = 0; i < voc.Count; i++) { switch (voc[i].Type) { case PdfVisualObjectType.Image: PdfImageVisualObject ivo = voc[i] as PdfImageVisualObject; //add image ocr here DocObjects.InsertObjectRow("Image", ivo.Image.ImageID, "", "", ivo.Image.ImageCorners[0].X, ivo.Image.ImageCorners[0].Y, ivo.Image.ImageCorners[1].X, ivo.Image.ImageCorners[1].Y, ivo.Image.ImageCorners[2].X, ivo.Image.ImageCorners[2].Y, ivo.Image.ImageCorners[3].X, ivo.Image.ImageCorners[3].Y); break; case PdfVisualObjectType.Text: PdfTextVisualObject tvo = voc[i] as PdfTextVisualObject; string Translated_Text = ""; int Object_ID = DocObjects.InsertObjectRow("Text", tvo.TextFragment.Text, "", Translated_Text, tvo.TextFragment.FragmentCorners[0].X, tvo.TextFragment.FragmentCorners[0].Y, tvo.TextFragment.FragmentCorners[1].X, tvo.TextFragment.FragmentCorners[1].Y, tvo.TextFragment.FragmentCorners[2].X, tvo.TextFragment.FragmentCorners[2].Y, tvo.TextFragment.FragmentCorners[3].X, tvo.TextFragment.FragmentCorners[3].Y); // int CharCtr = 0; // foreach (Xfinium.Pdf.Content.PdfTextGlyph Character in tvo.TextFragment.Glyphs) //{ // DocObjects.InsertInvoiceObjectDetail(Object_ID, CharCtr, Character.Text, Character.GlyphCorners[0].X, Character.Width, Character.GlyphCorners[0].Y, tvo.TextFragment.FragmentCorners[3].Y); // CharCtr++; // } break; case PdfVisualObjectType.Path: PdfPathVisualObject pvo = voc[i] as PdfPathVisualObject; // Examine all the path points and determine the minimum rectangle that bounds the path. double minX = 999999, minY = 999999, maxX = -999999, maxY = -999999; for (int j = 0; j < pvo.PathItems.Count; j++) { PdfPathItem pi = pvo.PathItems[j]; if (pi.Points != null) { for (int k = 0; k < pi.Points.Length; k++) { if (minX >= pi.Points[k].X) { minX = pi.Points[k].X; } if (minY >= pi.Points[k].Y) { minY = pi.Points[k].Y; } if (maxX <= pi.Points[k].X) { maxX = pi.Points[k].X; } if (maxY <= pi.Points[k].Y) { maxY = pi.Points[k].Y; } } } DocObjects.InsertObjectRow("Path", pvo.ToString(), "", "", minX, minY, maxX, minY, minX, maxY, maxX, maxY); } break; } } //DocObjects.FormTextObjects(); //DocObjects.FormLineObjects(); //DocObjects.FormCenteredObjects(); }