Beispiel #1
0
        private void bSave_Click(object sender, EventArgs e)
        {
            int VectorType = 0;

            if (rbAddress.Checked)
            {
                VectorType = 1;
            }
            else if (rbInfoBlock.Checked)
            {
                VectorType = 2;
            }
            else if (rbRGLine.Checked)
            {
                VectorType = 3;
            }

            if (VectorType > 0)
            {
                DataBase.DB DocObjects = new DataBase.DB();
                DocObjects.SaveVectorSet(VectorType, FieldDataDetails);
                this.Dispose();
            }
            else
            {
                MessageBox.Show("Please indicate a type for this object.");
            }



            //start here with fixing up a data model to house my raw vector data
        }
Beispiel #2
0
        private void ArchiveTrainingData()
        {
            DataBase.DB DocObjects = new DataBase.DB();

            List <string[]> FieldDataDetails = new List <string[]>();

            FieldDataDetails = DocObjects.GetDocObjects("Extract");

            int GridRow = 0;

            foreach (string[] FieldDataDetail in FieldDataDetails)
            {
                DocObjects.ArchiveObjectRow(
                    FieldDataDetail[0],                    // Object_Type
                    FieldDataDetail[1],                    //Native_Value
                    FieldDataDetail[2],                    //OCR_Value
                    FieldDataDetail[3],                    //XLTD_Value
                    Convert.ToDouble(FieldDataDetail[4]),  //X1
                    Convert.ToDouble(FieldDataDetail[5]),  //Y1
                    Convert.ToDouble(FieldDataDetail[6]),  //X2
                    Convert.ToDouble(FieldDataDetail[7]),  //Y2
                    Convert.ToDouble(FieldDataDetail[8]),  //X3
                    Convert.ToDouble(FieldDataDetail[9]),  //Y3
                    Convert.ToDouble(FieldDataDetail[10]), //X4
                    Convert.ToDouble(FieldDataDetail[11]), //Y4
                    PDFFileName);
                GridRow++;
            }
        }
Beispiel #3
0
        private void MainForm_Load(object sender, EventArgs e)
        {
            AppDomain.CurrentDomain.ProcessExit += new EventHandler(OnProcessExit);

            DataBase.DB DocObjects = new DataBase.DB();
            DocObjects.TruncateObjects(); // delete any data that might be in database

            string[] args = Environment.GetCommandLineArgs();
            if (args.Length > 1)
            {
                AutoprocessDocs(args[1]);
            }



            //C3PO_Text
            //Key 1 415260fc79c1481986027718fa4c45ac
            //Key 2 f6c0e985e2854e3c9db4ddf385ea5d89

            //C3PO_LUIS
            //Key 1 92d1791c0dff4a4fa6953b89818b8ccd
            //Key 2 3dd2d4a6235d4fb4bc1fd7fe372831c3


            /*ServiceClients.LuisClient LC = new ServiceClients.LuisClient("c68ba480925a4dc4be6ac44d35ace6cf");
             * string results = LC.ReadText("Purchase Order").Result;
             * ServiceClients.LUISResponse LUISResults = new ServiceClients.LUISResponse();
             * LUISResults = Newtonsoft.Json.JsonConvert.DeserializeObject<ServiceClients.LUISResponse>(results);
             */
        }
Beispiel #4
0
 private void bSegCorrection_Click(object sender, EventArgs e)
 {
     DataBase.DB DocObjects = new DataBase.DB();
     DocObjects.FormLineObjects();
     PopulateGridView("LinedText");
     int MeanSpacing = DocObjects.FindMeanSpacing();
 }
Beispiel #5
0
        private void bBlackBlob_Click(object sender, EventArgs e)
        {
            ImageProcessor EvalImage = new ImageProcessor();

            DataBase.DB DocObjects  = new DataBase.DB();
            int         ImageWidth  = 0;
            int         ImageHeight = 0;

            List <Rectangle> BlackBoxes = EvalImage.BlackBlob(PDFFileName + ".png", out ImageWidth, out ImageHeight);
            double           ImageScale = PDFPageWidth / ImageWidth;

            DocObjects.ExtractTextFromRect(BlackBoxes, ImageScale);
            PopulateGridView("Extract");
        }
Beispiel #6
0
        private void PopulateGridView(string ProcessField)
        {
            dgvDocObjects.Rows.Clear();


            DataBase.DB DocObjects = new DataBase.DB();

            List <string[]> FieldDataDetails = new List <string[]>();

            FieldDataDetails = DocObjects.GetDocObjects(ProcessField);

            //size the data grid to the number of rows in the data

            int RowNum = FieldDataDetails.Count();

            if (dgvDocObjects.Rows.Count < RowNum)
            {
                int NewRows = RowNum - dgvDocObjects.Rows.Count;
                dgvDocObjects.Rows.Add(NewRows);
            }

            int GridRow = 0;

            foreach (string[] FieldDataDetail in FieldDataDetails)
            {
                dgvDocObjects.Rows[GridRow].Cells[0].Value  = FieldDataDetail[0];  // Object_Type
                dgvDocObjects.Rows[GridRow].Cells[1].Value  = FieldDataDetail[1];  //Native_Value
                dgvDocObjects.Rows[GridRow].Cells[2].Value  = FieldDataDetail[2];  //OCR_Value
                dgvDocObjects.Rows[GridRow].Cells[3].Value  = FieldDataDetail[3];  //XLTD_Value
                dgvDocObjects.Rows[GridRow].Cells[4].Value  = FieldDataDetail[4];  //X1
                dgvDocObjects.Rows[GridRow].Cells[5].Value  = FieldDataDetail[5];  //Y1
                dgvDocObjects.Rows[GridRow].Cells[6].Value  = FieldDataDetail[6];  //X2
                dgvDocObjects.Rows[GridRow].Cells[7].Value  = FieldDataDetail[7];  //Y2
                dgvDocObjects.Rows[GridRow].Cells[8].Value  = FieldDataDetail[8];  //X3
                dgvDocObjects.Rows[GridRow].Cells[9].Value  = FieldDataDetail[9];  //Y3
                dgvDocObjects.Rows[GridRow].Cells[10].Value = FieldDataDetail[10]; //X4
                dgvDocObjects.Rows[GridRow].Cells[11].Value = FieldDataDetail[11]; //Y4
                dgvDocObjects.Rows[GridRow].Cells[12].Value = FieldDataDetail[12]; //Horizontal_Axis
                dgvDocObjects.Rows[GridRow].Cells[13].Value = FieldDataDetail[13]; //Vertical_Axis
                dgvDocObjects.Rows[GridRow].Cells[14].Value = FieldDataDetail[14]; //Span
                dgvDocObjects.Rows[GridRow].Cells[15].Value = FieldDataDetail[15]; //Object_ID
                GridRow++;
            }
        }
Beispiel #7
0
        private void pbPDFImage_MouseUp(object sender, MouseEventArgs e)
        {
            if (drawing)
            {
                drawing = false;
                var rc = getRectangle();
                rectangles.Clear();
                if (rc.Width > 0 && rc.Height > 0)
                {
                    rectangles.Add(rc);
                }
                pbPDFImage.Invalidate();

                //convert the image box coordinates to the document coordinates
                Rectangle display = pbPDFImage.DisplayRectangle;
                double    X1Doc   = rc.X * Convert.ToDouble(PDFPageWidth) / Convert.ToDouble(display.Width);
                double    Y1Doc   = rc.Y * Convert.ToDouble(PDFPageHeight) / Convert.ToDouble(display.Height);

                double WidthDoc  = rc.Width * Convert.ToDouble(PDFPageWidth) / Convert.ToDouble(display.Width);
                double HeightDoc = rc.Height * Convert.ToDouble(PDFPageHeight) / Convert.ToDouble(display.Height);

                //get all Lined Text Objects that fall within this box
                DataBase.DB DocObjects = new DataBase.DB();

                List <string[]> FieldDataDetails = new List <string[]>();

                FieldDataDetails = DocObjects.GetDocLTObjectsInBox(X1Doc, Y1Doc, WidthDoc, HeightDoc);
                //highlight the fields on the screen

                foreach (string[] FieldData in FieldDataDetails)
                {
                    DrawRect(Convert.ToDouble(FieldData[4]), Convert.ToDouble(FieldData[9]), Convert.ToDouble(FieldData[5]), Convert.ToDouble(FieldData[6])); //X1, Y1, Y3, X2
                }

                Classifier UserAction = new Classifier(FieldDataDetails);

                UserAction.ShowDialog(); //show a modal dialog box here for manual classification
            }
        }
Beispiel #8
0
        private void btnObjectAnalysis_Click(object sender, EventArgs e)
        {
            //use the PDF library to break out all of the PDF Objects
            Readers.PDFReader Rdr = new Readers.PDFReader();
            Rdr.getObjectsFromPDF(PDFFileName, out PDFPageWidth, out PDFPageHeight);
            //break up text objects with multiple words into a single word per object
            DataBase.DB DocObjects = new DataBase.DB();

            DocObjects.LocateKeywords();

            PdfFixedDocument doc      = new PdfFixedDocument(PDFFileName);
            PdfPageRenderer  renderer = new PdfPageRenderer(doc.Pages[0]);

            FileStream PNGStream = File.OpenWrite(PDFFileName + ".png");

            renderer.ConvertPageToImage(dpi, PNGStream, PdfPageImageFormat.Png);
            PNGStream.Flush();
            PNGStream.Close();
            LoadImageBox();

            PopulateGridView("Text");
        }
Beispiel #9
0
        private void AutoprocessDocs(string Filename)
        {
            //load the imagebox
            PDFFileName = Filename;



            //Autoprocess
            //use the PDF library to break out all of the PDF Objects
            Readers.PDFReader Rdr = new Readers.PDFReader();
            Rdr.getObjectsFromPDF(Filename, out PDFPageWidth, out PDFPageHeight);
            //break up text objects with multiple words into a single word per object
            DataBase.DB DocObjects = new DataBase.DB();

            DocObjects.LocateKeywords();

            ImageProcessor EvalImage   = new ImageProcessor();
            int            ImageWidth  = 0;
            int            ImageHeight = 0;

            List <Rectangle> BlackBoxes = EvalImage.BlackBox(Filename + ".png", out ImageWidth, out ImageHeight, true);
            double           ImageScale = PDFPageWidth / ImageWidth;

            DocObjects.ExtractTextFromRect(BlackBoxes, ImageScale);
            List <Rectangle> BlackBlobs = EvalImage.BlackBlob(Filename + ".png", out ImageWidth, out ImageHeight, true);

            ImageScale = PDFPageWidth / ImageWidth;
            DocObjects.ExtractTextFromRect(BlackBoxes, ImageScale);

            DocObjects.TranslateText();

            ArchiveTrainingData();


            PopulateGridView("Extract");
        }
Beispiel #10
0
 private void bIdentifyData_Click(object sender, EventArgs e)
 {
     DataBase.DB DocObjects = new DataBase.DB();
     DocObjects.ProcessIntents();
 }
Beispiel #11
0
 private void btranslate_click(object sender, EventArgs e)
 {
     DataBase.DB DocObjects = new DataBase.DB();
     DocObjects.TranslateText();
     PopulateGridView("Extract");
 }
Beispiel #12
0
        public void getObjectsFromPDF(string PDFFileName, out double PDFPageWidth, out double PDFPageHeight)
        {
            FileStream pageObjectsInput = new FileStream(PDFFileName, FileMode.Open, FileAccess.Read, FileShare.Read);

            PdfFixedDocument document    = new PdfFixedDocument(pageObjectsInput);
            PdfPage          DisplayPage = document.Pages[0];

            PDFPageWidth  = document.Pages[0].Width;
            PDFPageHeight = document.Pages[0].Height;

            PdfContentExtractor       ce  = new PdfContentExtractor(DisplayPage);
            PdfVisualObjectCollection voc = ce.ExtractVisualObjects(false);

            DataBase.DB DocObjects = new DataBase.DB();


            DocObjects.TruncateObjects(); // delete any data that might be in database

            for (int i = 0; i < voc.Count; i++)
            {
                switch (voc[i].Type)
                {
                case PdfVisualObjectType.Image:
                    PdfImageVisualObject ivo = voc[i] as PdfImageVisualObject;
                    //add image ocr here
                    DocObjects.InsertObjectRow("Image", ivo.Image.ImageID, "", "",
                                               ivo.Image.ImageCorners[0].X, ivo.Image.ImageCorners[0].Y,
                                               ivo.Image.ImageCorners[1].X, ivo.Image.ImageCorners[1].Y,
                                               ivo.Image.ImageCorners[2].X, ivo.Image.ImageCorners[2].Y,
                                               ivo.Image.ImageCorners[3].X, ivo.Image.ImageCorners[3].Y);


                    break;

                case PdfVisualObjectType.Text:
                    PdfTextVisualObject tvo = voc[i] as PdfTextVisualObject;


                    string Translated_Text = "";

                    int Object_ID = DocObjects.InsertObjectRow("Text", tvo.TextFragment.Text, "", Translated_Text,
                                                               tvo.TextFragment.FragmentCorners[0].X, tvo.TextFragment.FragmentCorners[0].Y,
                                                               tvo.TextFragment.FragmentCorners[1].X, tvo.TextFragment.FragmentCorners[1].Y,
                                                               tvo.TextFragment.FragmentCorners[2].X, tvo.TextFragment.FragmentCorners[2].Y,
                                                               tvo.TextFragment.FragmentCorners[3].X, tvo.TextFragment.FragmentCorners[3].Y);

                    // int CharCtr = 0;

                    // foreach (Xfinium.Pdf.Content.PdfTextGlyph Character in tvo.TextFragment.Glyphs)
                    //{
                    // DocObjects.InsertInvoiceObjectDetail(Object_ID, CharCtr, Character.Text, Character.GlyphCorners[0].X, Character.Width, Character.GlyphCorners[0].Y, tvo.TextFragment.FragmentCorners[3].Y);
                    // CharCtr++;

                    // }
                    break;

                case PdfVisualObjectType.Path:
                    PdfPathVisualObject pvo = voc[i] as PdfPathVisualObject;
                    // Examine all the path points and determine the minimum rectangle that bounds the path.
                    double minX = 999999, minY = 999999, maxX = -999999, maxY = -999999;
                    for (int j = 0; j < pvo.PathItems.Count; j++)
                    {
                        PdfPathItem pi = pvo.PathItems[j];
                        if (pi.Points != null)
                        {
                            for (int k = 0; k < pi.Points.Length; k++)
                            {
                                if (minX >= pi.Points[k].X)
                                {
                                    minX = pi.Points[k].X;
                                }
                                if (minY >= pi.Points[k].Y)
                                {
                                    minY = pi.Points[k].Y;
                                }
                                if (maxX <= pi.Points[k].X)
                                {
                                    maxX = pi.Points[k].X;
                                }
                                if (maxY <= pi.Points[k].Y)
                                {
                                    maxY = pi.Points[k].Y;
                                }
                            }
                        }

                        DocObjects.InsertObjectRow("Path", pvo.ToString(), "", "", minX, minY, maxX, minY, minX, maxY, maxX, maxY);
                    }

                    break;
                }
            }
            //DocObjects.FormTextObjects();
            //DocObjects.FormLineObjects();
            //DocObjects.FormCenteredObjects();
        }