public static void convertImage2Text_OneOrAllPage(string requestId, COMMANDS cmd, string input, Dictionary <string, object> data) { var redis = new RedisBase(new RedisSetting(REDIS_TYPE.ONLY_WRITE, __CONFIG.REDIS_PORT_READ)); long docId = 0; int page = -1; string[] a = input.Split('.'); long.TryParse(a[0], out docId); if (a.Length > 1) { int.TryParse(a[1], out page); } var ocr_lang = data.Get <string>("ocr_lang", "vie"); var ocr_mode = data.Get <EngineMode>("ocr_mode", EngineMode.Default); var ocr_level = data.Get <PageIteratorLevel>("ocr_level", PageIteratorLevel.Word); try { if (redis.HEXISTS(docId.ToString(), page.ToString())) { var bitmap = redis.HGET_BITMAP(docId, page); if (bitmap != null) { var dic = new Dictionary <string, object>() { { "id", docId }, { "page", page }, }; using (var engine = new TesseractEngine("tessdata", ocr_lang, ocr_mode)) using (var pix = new BitmapToPixConverter().Convert(bitmap)) { using (var tes = engine.Process(pix)) { switch (cmd) { case COMMANDS.OCR_TEXT_PAGE: string s = tes.GetText().Trim(); dic.Add("ocr_text", s); break; case COMMANDS.OCR_BOX_PAGE: var boxes = tes.GetSegmentedRegions(ocr_level).Select(x => string.Format("{0}_{1}_{2}_{3}", x.X, x.Y, x.Width, x.Height)).ToArray(); dic.Add("box_format", "x_y_width_height"); dic.Add("box_text", string.Join("|", boxes.Select(x => x.ToString()).ToArray())); dic.Add("box_count", boxes.Length); break; } } } App.Reply(cmd, requestId, input, dic); } } } catch (Exception exInfo) { //string errInfo = cmd.ToString() + " -> " + file + Environment.NewLine + exInfo.Message + Environment.NewLine + exInfo.StackTrace; //redis.HSET("_ERROR:PDF:" + cmd.ToString(), docId.ToString(), errInfo); } }
public static void SplitAllJpeg(string requestId, COMMANDS cmd, string input, Dictionary <string, object> data) { string file = input; if (File.Exists(file)) { var redis = new RedisBase(new RedisSetting(REDIS_TYPE.ONLY_WRITE, __CONFIG.REDIS_PORT_WRITE)); long docId = 0; try { using (var doc = PdfDocument.Load(file)) { int pageTotal = doc.PageCount; DOC_TYPE docType = DOC_TYPE.JPG_OGRINAL; if (data.ContainsKey("png")) { docType = DOC_TYPE.PNG_OGRINAL; } docId = StaticDocument.BuildId(docType, pageTotal, new FileInfo(file).Length); if (redis.HEXISTS(docId.ToString(), "0")) { App.Reply(cmd, requestId, input, new Dictionary <string, object>() { { "id", docId }, { "type", docType.ToString() }, { "size", 0 }, { "page", 0 }, { "page_total", pageTotal }, }); return; } var sizes = new Dictionary <string, string>(); for (int i = 0; i < pageTotal; i++) { byte[] buf = null; int len = 0; bool ok = false; string err = ""; try { buf = _pageAsBitmapBytes(doc, i, docType); len = buf.Length; ok = redis.HSET(docId, i, buf); } catch (Exception ex) { err = ex.Message + Environment.NewLine + ex.StackTrace; } App.Reply(cmd, requestId, input, new Dictionary <string, object>() { { "id", docId }, { "type", docType.ToString() }, { "size", len }, { "page", i }, { "page_total", pageTotal }, }); sizes.Add(string.Format("{0}:{1}", docId, i), len.ToString()); //Thread.Sleep(100); } redis.HMSET("_IMG_SIZE", sizes); App.Reply(cmd, requestId, input, new Dictionary <string, object>() { { "id", docId }, { "type", docType.ToString() }, { "size", 0 }, { "page", pageTotal }, { "page_total", pageTotal }, }); } } catch (Exception exInfo) { string errInfo = cmd.ToString() + " -> " + file + Environment.NewLine + exInfo.Message + Environment.NewLine + exInfo.StackTrace; redis.HSET("_ERROR:PDF:" + cmd.ToString(), docId.ToString(), errInfo); } } }