private void bSave_Click(object sender, EventArgs e) { int VectorType = 0; if (rbAddress.Checked) { VectorType = 1; } else if (rbInfoBlock.Checked) { VectorType = 2; } else if (rbRGLine.Checked) { VectorType = 3; } if (VectorType > 0) { DataBase.DB DocObjects = new DataBase.DB(); DocObjects.SaveVectorSet(VectorType, FieldDataDetails); this.Dispose(); } else { MessageBox.Show("Please indicate a type for this object."); } //start here with fixing up a data model to house my raw vector data }
private void ArchiveTrainingData() { DataBase.DB DocObjects = new DataBase.DB(); List <string[]> FieldDataDetails = new List <string[]>(); FieldDataDetails = DocObjects.GetDocObjects("Extract"); int GridRow = 0; foreach (string[] FieldDataDetail in FieldDataDetails) { DocObjects.ArchiveObjectRow( FieldDataDetail[0], // Object_Type FieldDataDetail[1], //Native_Value FieldDataDetail[2], //OCR_Value FieldDataDetail[3], //XLTD_Value Convert.ToDouble(FieldDataDetail[4]), //X1 Convert.ToDouble(FieldDataDetail[5]), //Y1 Convert.ToDouble(FieldDataDetail[6]), //X2 Convert.ToDouble(FieldDataDetail[7]), //Y2 Convert.ToDouble(FieldDataDetail[8]), //X3 Convert.ToDouble(FieldDataDetail[9]), //Y3 Convert.ToDouble(FieldDataDetail[10]), //X4 Convert.ToDouble(FieldDataDetail[11]), //Y4 PDFFileName); GridRow++; } }
private void MainForm_Load(object sender, EventArgs e) { AppDomain.CurrentDomain.ProcessExit += new EventHandler(OnProcessExit); DataBase.DB DocObjects = new DataBase.DB(); DocObjects.TruncateObjects(); // delete any data that might be in database string[] args = Environment.GetCommandLineArgs(); if (args.Length > 1) { AutoprocessDocs(args[1]); } //C3PO_Text //Key 1 415260fc79c1481986027718fa4c45ac //Key 2 f6c0e985e2854e3c9db4ddf385ea5d89 //C3PO_LUIS //Key 1 92d1791c0dff4a4fa6953b89818b8ccd //Key 2 3dd2d4a6235d4fb4bc1fd7fe372831c3 /*ServiceClients.LuisClient LC = new ServiceClients.LuisClient("c68ba480925a4dc4be6ac44d35ace6cf"); * string results = LC.ReadText("Purchase Order").Result; * ServiceClients.LUISResponse LUISResults = new ServiceClients.LUISResponse(); * LUISResults = Newtonsoft.Json.JsonConvert.DeserializeObject<ServiceClients.LUISResponse>(results); */ }
private void bSegCorrection_Click(object sender, EventArgs e) { DataBase.DB DocObjects = new DataBase.DB(); DocObjects.FormLineObjects(); PopulateGridView("LinedText"); int MeanSpacing = DocObjects.FindMeanSpacing(); }
private void bBlackBlob_Click(object sender, EventArgs e) { ImageProcessor EvalImage = new ImageProcessor(); DataBase.DB DocObjects = new DataBase.DB(); int ImageWidth = 0; int ImageHeight = 0; List <Rectangle> BlackBoxes = EvalImage.BlackBlob(PDFFileName + ".png", out ImageWidth, out ImageHeight); double ImageScale = PDFPageWidth / ImageWidth; DocObjects.ExtractTextFromRect(BlackBoxes, ImageScale); PopulateGridView("Extract"); }
private void PopulateGridView(string ProcessField) { dgvDocObjects.Rows.Clear(); DataBase.DB DocObjects = new DataBase.DB(); List <string[]> FieldDataDetails = new List <string[]>(); FieldDataDetails = DocObjects.GetDocObjects(ProcessField); //size the data grid to the number of rows in the data int RowNum = FieldDataDetails.Count(); if (dgvDocObjects.Rows.Count < RowNum) { int NewRows = RowNum - dgvDocObjects.Rows.Count; dgvDocObjects.Rows.Add(NewRows); } int GridRow = 0; foreach (string[] FieldDataDetail in FieldDataDetails) { dgvDocObjects.Rows[GridRow].Cells[0].Value = FieldDataDetail[0]; // Object_Type dgvDocObjects.Rows[GridRow].Cells[1].Value = FieldDataDetail[1]; //Native_Value dgvDocObjects.Rows[GridRow].Cells[2].Value = FieldDataDetail[2]; //OCR_Value dgvDocObjects.Rows[GridRow].Cells[3].Value = FieldDataDetail[3]; //XLTD_Value dgvDocObjects.Rows[GridRow].Cells[4].Value = FieldDataDetail[4]; //X1 dgvDocObjects.Rows[GridRow].Cells[5].Value = FieldDataDetail[5]; //Y1 dgvDocObjects.Rows[GridRow].Cells[6].Value = FieldDataDetail[6]; //X2 dgvDocObjects.Rows[GridRow].Cells[7].Value = FieldDataDetail[7]; //Y2 dgvDocObjects.Rows[GridRow].Cells[8].Value = FieldDataDetail[8]; //X3 dgvDocObjects.Rows[GridRow].Cells[9].Value = FieldDataDetail[9]; //Y3 dgvDocObjects.Rows[GridRow].Cells[10].Value = FieldDataDetail[10]; //X4 dgvDocObjects.Rows[GridRow].Cells[11].Value = FieldDataDetail[11]; //Y4 dgvDocObjects.Rows[GridRow].Cells[12].Value = FieldDataDetail[12]; //Horizontal_Axis dgvDocObjects.Rows[GridRow].Cells[13].Value = FieldDataDetail[13]; //Vertical_Axis dgvDocObjects.Rows[GridRow].Cells[14].Value = FieldDataDetail[14]; //Span dgvDocObjects.Rows[GridRow].Cells[15].Value = FieldDataDetail[15]; //Object_ID GridRow++; } }
private void pbPDFImage_MouseUp(object sender, MouseEventArgs e) { if (drawing) { drawing = false; var rc = getRectangle(); rectangles.Clear(); if (rc.Width > 0 && rc.Height > 0) { rectangles.Add(rc); } pbPDFImage.Invalidate(); //convert the image box coordinates to the document coordinates Rectangle display = pbPDFImage.DisplayRectangle; double X1Doc = rc.X * Convert.ToDouble(PDFPageWidth) / Convert.ToDouble(display.Width); double Y1Doc = rc.Y * Convert.ToDouble(PDFPageHeight) / Convert.ToDouble(display.Height); double WidthDoc = rc.Width * Convert.ToDouble(PDFPageWidth) / Convert.ToDouble(display.Width); double HeightDoc = rc.Height * Convert.ToDouble(PDFPageHeight) / Convert.ToDouble(display.Height); //get all Lined Text Objects that fall within this box DataBase.DB DocObjects = new DataBase.DB(); List <string[]> FieldDataDetails = new List <string[]>(); FieldDataDetails = DocObjects.GetDocLTObjectsInBox(X1Doc, Y1Doc, WidthDoc, HeightDoc); //highlight the fields on the screen foreach (string[] FieldData in FieldDataDetails) { DrawRect(Convert.ToDouble(FieldData[4]), Convert.ToDouble(FieldData[9]), Convert.ToDouble(FieldData[5]), Convert.ToDouble(FieldData[6])); //X1, Y1, Y3, X2 } Classifier UserAction = new Classifier(FieldDataDetails); UserAction.ShowDialog(); //show a modal dialog box here for manual classification } }
private void btnObjectAnalysis_Click(object sender, EventArgs e) { //use the PDF library to break out all of the PDF Objects Readers.PDFReader Rdr = new Readers.PDFReader(); Rdr.getObjectsFromPDF(PDFFileName, out PDFPageWidth, out PDFPageHeight); //break up text objects with multiple words into a single word per object DataBase.DB DocObjects = new DataBase.DB(); DocObjects.LocateKeywords(); PdfFixedDocument doc = new PdfFixedDocument(PDFFileName); PdfPageRenderer renderer = new PdfPageRenderer(doc.Pages[0]); FileStream PNGStream = File.OpenWrite(PDFFileName + ".png"); renderer.ConvertPageToImage(dpi, PNGStream, PdfPageImageFormat.Png); PNGStream.Flush(); PNGStream.Close(); LoadImageBox(); PopulateGridView("Text"); }
private void AutoprocessDocs(string Filename) { //load the imagebox PDFFileName = Filename; //Autoprocess //use the PDF library to break out all of the PDF Objects Readers.PDFReader Rdr = new Readers.PDFReader(); Rdr.getObjectsFromPDF(Filename, out PDFPageWidth, out PDFPageHeight); //break up text objects with multiple words into a single word per object DataBase.DB DocObjects = new DataBase.DB(); DocObjects.LocateKeywords(); ImageProcessor EvalImage = new ImageProcessor(); int ImageWidth = 0; int ImageHeight = 0; List <Rectangle> BlackBoxes = EvalImage.BlackBox(Filename + ".png", out ImageWidth, out ImageHeight, true); double ImageScale = PDFPageWidth / ImageWidth; DocObjects.ExtractTextFromRect(BlackBoxes, ImageScale); List <Rectangle> BlackBlobs = EvalImage.BlackBlob(Filename + ".png", out ImageWidth, out ImageHeight, true); ImageScale = PDFPageWidth / ImageWidth; DocObjects.ExtractTextFromRect(BlackBoxes, ImageScale); DocObjects.TranslateText(); ArchiveTrainingData(); PopulateGridView("Extract"); }
private void bIdentifyData_Click(object sender, EventArgs e) { DataBase.DB DocObjects = new DataBase.DB(); DocObjects.ProcessIntents(); }
private void btranslate_click(object sender, EventArgs e) { DataBase.DB DocObjects = new DataBase.DB(); DocObjects.TranslateText(); PopulateGridView("Extract"); }
public void getObjectsFromPDF(string PDFFileName, out double PDFPageWidth, out double PDFPageHeight) { FileStream pageObjectsInput = new FileStream(PDFFileName, FileMode.Open, FileAccess.Read, FileShare.Read); PdfFixedDocument document = new PdfFixedDocument(pageObjectsInput); PdfPage DisplayPage = document.Pages[0]; PDFPageWidth = document.Pages[0].Width; PDFPageHeight = document.Pages[0].Height; PdfContentExtractor ce = new PdfContentExtractor(DisplayPage); PdfVisualObjectCollection voc = ce.ExtractVisualObjects(false); DataBase.DB DocObjects = new DataBase.DB(); DocObjects.TruncateObjects(); // delete any data that might be in database for (int i = 0; i < voc.Count; i++) { switch (voc[i].Type) { case PdfVisualObjectType.Image: PdfImageVisualObject ivo = voc[i] as PdfImageVisualObject; //add image ocr here DocObjects.InsertObjectRow("Image", ivo.Image.ImageID, "", "", ivo.Image.ImageCorners[0].X, ivo.Image.ImageCorners[0].Y, ivo.Image.ImageCorners[1].X, ivo.Image.ImageCorners[1].Y, ivo.Image.ImageCorners[2].X, ivo.Image.ImageCorners[2].Y, ivo.Image.ImageCorners[3].X, ivo.Image.ImageCorners[3].Y); break; case PdfVisualObjectType.Text: PdfTextVisualObject tvo = voc[i] as PdfTextVisualObject; string Translated_Text = ""; int Object_ID = DocObjects.InsertObjectRow("Text", tvo.TextFragment.Text, "", Translated_Text, tvo.TextFragment.FragmentCorners[0].X, tvo.TextFragment.FragmentCorners[0].Y, tvo.TextFragment.FragmentCorners[1].X, tvo.TextFragment.FragmentCorners[1].Y, tvo.TextFragment.FragmentCorners[2].X, tvo.TextFragment.FragmentCorners[2].Y, tvo.TextFragment.FragmentCorners[3].X, tvo.TextFragment.FragmentCorners[3].Y); // int CharCtr = 0; // foreach (Xfinium.Pdf.Content.PdfTextGlyph Character in tvo.TextFragment.Glyphs) //{ // DocObjects.InsertInvoiceObjectDetail(Object_ID, CharCtr, Character.Text, Character.GlyphCorners[0].X, Character.Width, Character.GlyphCorners[0].Y, tvo.TextFragment.FragmentCorners[3].Y); // CharCtr++; // } break; case PdfVisualObjectType.Path: PdfPathVisualObject pvo = voc[i] as PdfPathVisualObject; // Examine all the path points and determine the minimum rectangle that bounds the path. double minX = 999999, minY = 999999, maxX = -999999, maxY = -999999; for (int j = 0; j < pvo.PathItems.Count; j++) { PdfPathItem pi = pvo.PathItems[j]; if (pi.Points != null) { for (int k = 0; k < pi.Points.Length; k++) { if (minX >= pi.Points[k].X) { minX = pi.Points[k].X; } if (minY >= pi.Points[k].Y) { minY = pi.Points[k].Y; } if (maxX <= pi.Points[k].X) { maxX = pi.Points[k].X; } if (maxY <= pi.Points[k].Y) { maxY = pi.Points[k].Y; } } } DocObjects.InsertObjectRow("Path", pvo.ToString(), "", "", minX, minY, maxX, minY, minX, maxY, maxX, maxY); } break; } } //DocObjects.FormTextObjects(); //DocObjects.FormLineObjects(); //DocObjects.FormCenteredObjects(); }