Beispiel #1
0
        private void MainForm_Load(object sender, EventArgs e)
        {
            AppDomain.CurrentDomain.ProcessExit += new EventHandler(OnProcessExit);

            DataBase.DB DocObjects = new DataBase.DB();
            DocObjects.TruncateObjects(); // delete any data that might be in database

            string[] args = Environment.GetCommandLineArgs();
            if (args.Length > 1)
            {
                AutoprocessDocs(args[1]);
            }



            //C3PO_Text
            //Key 1 415260fc79c1481986027718fa4c45ac
            //Key 2 f6c0e985e2854e3c9db4ddf385ea5d89

            //C3PO_LUIS
            //Key 1 92d1791c0dff4a4fa6953b89818b8ccd
            //Key 2 3dd2d4a6235d4fb4bc1fd7fe372831c3


            /*ServiceClients.LuisClient LC = new ServiceClients.LuisClient("c68ba480925a4dc4be6ac44d35ace6cf");
             * string results = LC.ReadText("Purchase Order").Result;
             * ServiceClients.LUISResponse LUISResults = new ServiceClients.LUISResponse();
             * LUISResults = Newtonsoft.Json.JsonConvert.DeserializeObject<ServiceClients.LUISResponse>(results);
             */
        }
Beispiel #2
0
        public void getObjectsFromPDF(string PDFFileName, out double PDFPageWidth, out double PDFPageHeight)
        {
            FileStream pageObjectsInput = new FileStream(PDFFileName, FileMode.Open, FileAccess.Read, FileShare.Read);

            PdfFixedDocument document    = new PdfFixedDocument(pageObjectsInput);
            PdfPage          DisplayPage = document.Pages[0];

            PDFPageWidth  = document.Pages[0].Width;
            PDFPageHeight = document.Pages[0].Height;

            PdfContentExtractor       ce  = new PdfContentExtractor(DisplayPage);
            PdfVisualObjectCollection voc = ce.ExtractVisualObjects(false);

            DataBase.DB DocObjects = new DataBase.DB();


            DocObjects.TruncateObjects(); // delete any data that might be in database

            for (int i = 0; i < voc.Count; i++)
            {
                switch (voc[i].Type)
                {
                case PdfVisualObjectType.Image:
                    PdfImageVisualObject ivo = voc[i] as PdfImageVisualObject;
                    //add image ocr here
                    DocObjects.InsertObjectRow("Image", ivo.Image.ImageID, "", "",
                                               ivo.Image.ImageCorners[0].X, ivo.Image.ImageCorners[0].Y,
                                               ivo.Image.ImageCorners[1].X, ivo.Image.ImageCorners[1].Y,
                                               ivo.Image.ImageCorners[2].X, ivo.Image.ImageCorners[2].Y,
                                               ivo.Image.ImageCorners[3].X, ivo.Image.ImageCorners[3].Y);


                    break;

                case PdfVisualObjectType.Text:
                    PdfTextVisualObject tvo = voc[i] as PdfTextVisualObject;


                    string Translated_Text = "";

                    int Object_ID = DocObjects.InsertObjectRow("Text", tvo.TextFragment.Text, "", Translated_Text,
                                                               tvo.TextFragment.FragmentCorners[0].X, tvo.TextFragment.FragmentCorners[0].Y,
                                                               tvo.TextFragment.FragmentCorners[1].X, tvo.TextFragment.FragmentCorners[1].Y,
                                                               tvo.TextFragment.FragmentCorners[2].X, tvo.TextFragment.FragmentCorners[2].Y,
                                                               tvo.TextFragment.FragmentCorners[3].X, tvo.TextFragment.FragmentCorners[3].Y);

                    // int CharCtr = 0;

                    // foreach (Xfinium.Pdf.Content.PdfTextGlyph Character in tvo.TextFragment.Glyphs)
                    //{
                    // DocObjects.InsertInvoiceObjectDetail(Object_ID, CharCtr, Character.Text, Character.GlyphCorners[0].X, Character.Width, Character.GlyphCorners[0].Y, tvo.TextFragment.FragmentCorners[3].Y);
                    // CharCtr++;

                    // }
                    break;

                case PdfVisualObjectType.Path:
                    PdfPathVisualObject pvo = voc[i] as PdfPathVisualObject;
                    // Examine all the path points and determine the minimum rectangle that bounds the path.
                    double minX = 999999, minY = 999999, maxX = -999999, maxY = -999999;
                    for (int j = 0; j < pvo.PathItems.Count; j++)
                    {
                        PdfPathItem pi = pvo.PathItems[j];
                        if (pi.Points != null)
                        {
                            for (int k = 0; k < pi.Points.Length; k++)
                            {
                                if (minX >= pi.Points[k].X)
                                {
                                    minX = pi.Points[k].X;
                                }
                                if (minY >= pi.Points[k].Y)
                                {
                                    minY = pi.Points[k].Y;
                                }
                                if (maxX <= pi.Points[k].X)
                                {
                                    maxX = pi.Points[k].X;
                                }
                                if (maxY <= pi.Points[k].Y)
                                {
                                    maxY = pi.Points[k].Y;
                                }
                            }
                        }

                        DocObjects.InsertObjectRow("Path", pvo.ToString(), "", "", minX, minY, maxX, minY, minX, maxY, maxX, maxY);
                    }

                    break;
                }
            }
            //DocObjects.FormTextObjects();
            //DocObjects.FormLineObjects();
            //DocObjects.FormCenteredObjects();
        }