示例#1
0
    public static void convertImage2Text_OneOrAllPage(string requestId, COMMANDS cmd, string input, Dictionary <string, object> data)
    {
        var redis = new RedisBase(new RedisSetting(REDIS_TYPE.ONLY_WRITE, __CONFIG.REDIS_PORT_READ));

        long docId = 0;
        int  page  = -1;

        string[] a = input.Split('.');
        long.TryParse(a[0], out docId);
        if (a.Length > 1)
        {
            int.TryParse(a[1], out page);
        }

        var ocr_lang  = data.Get <string>("ocr_lang", "vie");
        var ocr_mode  = data.Get <EngineMode>("ocr_mode", EngineMode.Default);
        var ocr_level = data.Get <PageIteratorLevel>("ocr_level", PageIteratorLevel.Word);

        try
        {
            if (redis.HEXISTS(docId.ToString(), page.ToString()))
            {
                var bitmap = redis.HGET_BITMAP(docId, page);
                if (bitmap != null)
                {
                    var dic = new Dictionary <string, object>()
                    {
                        { "id", docId },
                        { "page", page },
                    };

                    using (var engine = new TesseractEngine("tessdata", ocr_lang, ocr_mode))
                        using (var pix = new BitmapToPixConverter().Convert(bitmap))
                        {
                            using (var tes = engine.Process(pix))
                            {
                                switch (cmd)
                                {
                                case COMMANDS.OCR_TEXT_PAGE:
                                    string s = tes.GetText().Trim();
                                    dic.Add("ocr_text", s);
                                    break;

                                case COMMANDS.OCR_BOX_PAGE:
                                    var boxes = tes.GetSegmentedRegions(ocr_level).Select(x =>
                                                                                          string.Format("{0}_{1}_{2}_{3}", x.X, x.Y, x.Width, x.Height)).ToArray();
                                    dic.Add("box_format", "x_y_width_height");
                                    dic.Add("box_text", string.Join("|", boxes.Select(x => x.ToString()).ToArray()));
                                    dic.Add("box_count", boxes.Length);
                                    break;
                                }
                            }
                        }

                    App.Reply(cmd, requestId, input, dic);
                }
            }
        }
        catch (Exception exInfo)
        {
            //string errInfo = cmd.ToString() + " -> " + file + Environment.NewLine + exInfo.Message + Environment.NewLine + exInfo.StackTrace;
            //redis.HSET("_ERROR:PDF:" + cmd.ToString(), docId.ToString(), errInfo);
        }
    }
示例#2
0
    public static void SplitAllJpeg(string requestId, COMMANDS cmd, string input, Dictionary <string, object> data)
    {
        string file = input;

        if (File.Exists(file))
        {
            var  redis = new RedisBase(new RedisSetting(REDIS_TYPE.ONLY_WRITE, __CONFIG.REDIS_PORT_WRITE));
            long docId = 0;
            try
            {
                using (var doc = PdfDocument.Load(file))
                {
                    int      pageTotal = doc.PageCount;
                    DOC_TYPE docType   = DOC_TYPE.JPG_OGRINAL;
                    if (data.ContainsKey("png"))
                    {
                        docType = DOC_TYPE.PNG_OGRINAL;
                    }
                    docId = StaticDocument.BuildId(docType, pageTotal, new FileInfo(file).Length);

                    if (redis.HEXISTS(docId.ToString(), "0"))
                    {
                        App.Reply(cmd, requestId, input, new Dictionary <string, object>()
                        {
                            { "id", docId },
                            { "type", docType.ToString() },
                            { "size", 0 },
                            { "page", 0 },
                            { "page_total", pageTotal },
                        });
                        return;
                    }

                    var sizes = new Dictionary <string, string>();
                    for (int i = 0; i < pageTotal; i++)
                    {
                        byte[] buf = null;
                        int    len = 0;
                        bool   ok  = false;
                        string err = "";
                        try
                        {
                            buf = _pageAsBitmapBytes(doc, i, docType);
                            len = buf.Length;
                            ok  = redis.HSET(docId, i, buf);
                        }
                        catch (Exception ex)
                        {
                            err = ex.Message + Environment.NewLine + ex.StackTrace;
                        }
                        App.Reply(cmd, requestId, input, new Dictionary <string, object>()
                        {
                            { "id", docId },
                            { "type", docType.ToString() },
                            { "size", len },
                            { "page", i },
                            { "page_total", pageTotal },
                        });
                        sizes.Add(string.Format("{0}:{1}", docId, i), len.ToString());
                        //Thread.Sleep(100);
                    }
                    redis.HMSET("_IMG_SIZE", sizes);
                    App.Reply(cmd, requestId, input, new Dictionary <string, object>()
                    {
                        { "id", docId },
                        { "type", docType.ToString() },
                        { "size", 0 },
                        { "page", pageTotal },
                        { "page_total", pageTotal },
                    });
                }
            }
            catch (Exception exInfo)
            {
                string errInfo = cmd.ToString() + " -> " + file + Environment.NewLine + exInfo.Message + Environment.NewLine + exInfo.StackTrace;
                redis.HSET("_ERROR:PDF:" + cmd.ToString(), docId.ToString(), errInfo);
            }
        }
    }