private void CapacityMark(int newIdx) { int idx = _smallMarkList.Count; for (int i = idx; i < newIdx; i++) { FeSmall small = new FeSmall(); small.Idx = i; _smallMarkList.Add(i, small); } }
private void CapacityBox(int newSize) { int idx = _smallBoxList.Count; for (int i = idx; i < newSize; i++) { FeSmall small = new FeSmall(); small.Idx = i; _smallBoxList.Add(i, small); } }
private void ProcBoxDoubleClick(FeSmall box) { FeSmall mark; if (_smallMarkList.TryGetValue(box.Idx, out mark)) { string smallFileName = string.Format("{0}_{1}{2}", _fiName, box.Idx, _fiExt); string smallFullName = Path.Combine(_fiPath, FeConst.DIR_TEMP_IMG, smallFileName); mark.GetFromSmall(box); mark.SaveImage(smallFullName); } }
public void GetFromSmall(FeSmall small) { _picBmp = small._picBmp.Clone(new Rectangle(0, 0, small._picBmp.Width, small._picBmp.Height), PixelFormat.DontCare); using (MemoryStream ms = new MemoryStream()) { _picBmp.Save(ms, ImageFormat.Tiff); _pictureBox.Image = Image.FromStream(ms); } _textBox.Text = small._textBox.Text; _isEmpty = false; _value = small._value; _left = small.Left; _bottom = small._bottom; _right = small._right; _top = small._top; }
public void PutBox(int smallIdx, string value, int left, int bottom, int right, int top) { if (smallIdx >= _smallBoxList.Count) { CapacityBox(smallIdx + 1); } FeSmall small = _smallBoxList[smallIdx]; small.IsEmpty = false; small.Idx = smallIdx; small.Value = value; small.Left = left; small.Bottom = bottom; small.Right = right; small.Top = top; _boxCount++; }
private void btnSaveMark_Click(object sender, EventArgs e) { StringBuilder sb = new StringBuilder(); foreach (var train in _trainList) { foreach (var item in train.SmallMarkList) { FeSmall mark = item.Value; string line = mark.GetMarkLine(train.FileName); if (line == "") { continue; } sb.AppendLine(line); } } SaveFile(sb, FeConst.FILE_MARK); }
public void PutMark(int smallIdx, string value, int left, int bottom, int right, int top) { if (smallIdx >= _smallMarkList.Count) { CapacityMark(smallIdx + 1); } FeSmall small = _smallMarkList[smallIdx]; small.IsEmpty = false; small.Idx = smallIdx; small.Value = value; small.Left = left; small.Bottom = bottom; small.Right = right; small.Top = top; _markCount++; if (_smallBoxList[smallIdx].Value == value) { small.IsSame = true; _sameCount++; _smallBoxList[smallIdx].IsSame = true; } }
private void ProcMarkDoubleClick(FeSmall mark) { mark.FullEmpty(); }
private void btnTrain_Click(object sender, EventArgs e) { /* * mark.txt+tmpImg ==> ts.yzm.exp0.tif 合并 * box * mark.txt 修正box * unicharset_extractor ts.yzm.exp0.box * shapeclustering -F font_properties -U unicharset ts.yzm.exp0.tr * mftraining -F font_properties -U unicharset -O ts.unicharset ts.yzm.exp0.tr * cntraining ts.yzm.exp0.tr * rename normproto ts.normproto * rename inttemp ts.inttemp * rename pffmtable ts.pffmtable * rename shapetable ts.shapetable * combine_tessdata ts * copy ts.traineddata E:\tesseract-ocr\tessdata */ string imagePath = tbTrainImagePath.Text; string lang = tbLang.Text.Trim(); string font = tbFont.Text.Trim(); string tmpBoxPath = Path.Combine(imagePath, FeConst.DIR_TEMP_BOX); string tmpImgPath = Path.Combine(imagePath, FeConst.DIR_TEMP_IMG); string tmpTrainPath = Path.Combine(imagePath, FeConst.DIR_TEMP_TRAIN); Dictionary <int, FeSmall> smallList = new Dictionary <int, FeSmall>(); List <string> fileList = new List <string>(); int idx = 0; string markFile = Path.Combine(imagePath, FeConst.FILE_MARK); using (FileStream fs = new FileStream(markFile, FileMode.OpenOrCreate)) { StreamReader sr = new StreamReader(fs, Encoding.Default); string lineStr = string.Empty; while ((lineStr = sr.ReadLine()) != null) { string[] arr = lineStr.Split(','); //char left bottom right top filename smallidx if (arr.Length != 7) { continue; } string fileName = arr[5]; string fiName = Path.GetFileNameWithoutExtension(fileName); string fiExt = Path.GetExtension(fileName); int smallIdx = int.Parse(arr[6]); FeSmall small = new FeSmall(); small.Idx = idx; small.FileName = string.Format("{0}_{1}{2}", fiName, smallIdx, fiExt); small.Value = arr[0]; smallList.Add(idx, small); fileList.Add(Path.Combine(tmpImgPath, small.FileName)); idx++; } } //合并tiff string mergeFile = string.Format("{0}.{1}.exp0.tif", lang, font); MergeTiff(fileList, Path.Combine(tmpTrainPath, mergeFile)); //生成box string args; string trainFile = Path.Combine(_tessacertPath, "tessdata", lang + ".traineddata"); if (File.Exists(trainFile)) { args = string.Format("{0}.{1}.exp0.tif {0}.{1}.exp0 -l {0} -psm 10 batch.nochop makebox", lang, font); } else { args = string.Format("{0}.{1}.exp0.tif {0}.{1}.exp0 -psm 10 batch.nochop makebox", lang, font); } ProcessBat(_tessacertExe, tmpTrainPath, args); //mark.txt 修正 box StringBuilder sb = new StringBuilder(); string boxFile = Path.Combine(tmpTrainPath, string.Format("{0}.{1}.exp0.box", lang, font)); using (FileStream fs = new FileStream(boxFile, FileMode.Open)) { StreamReader sr = new StreamReader(fs, Encoding.Default); string lineStr = string.Empty; while ((lineStr = sr.ReadLine()) != null) { lineStr = lineStr.Trim(); string[] arr = lineStr.Split(' '); if (arr.Length == 6) { int tmpIdx = int.Parse(arr[5]); FeSmall small; if (smallList.TryGetValue(tmpIdx, out small)) { //修正 if (arr[0] != small.Value) { arr[0] = small.Value; lineStr = string.Format("{0} {1} {2} {3} {4} {5}", arr[0], arr[1], arr[2], arr[3], arr[4], arr[5]); } } } sb.AppendLine(lineStr); } sr.Close(); } SaveFile(sb, boxFile); if (File.Exists(trainFile)) { args = string.Format("{0}.{1}.exp0.tif {0}.{1}.exp0 -l {0} -psm 10 nobatch box.train", lang, font); } else { args = string.Format("{0}.{1}.exp0.tif {0}.{1}.exp0 -psm 10 nobatch box.train", lang, font); } ProcessBat(_tessacertExe, tmpTrainPath, args); //unicharset_extractor ts.yzm.exp0.box string cmdExe = Path.Combine(_tessacertPath, "unicharset_extractor.exe"); args = string.Format("{0}.{1}.exp0.box", lang, font); ProcessBat(cmdExe, tmpTrainPath, args); //echo ts 0 0 0 0 0 >> font_properties sb = new StringBuilder(); sb.AppendLine(string.Format("{0} 0 0 0 0 0", font)); SaveFile(sb, Path.Combine(tmpTrainPath, "font_properties")); //shapeclustering -F font_properties -U unicharset ts.yzm.exp0.tr cmdExe = Path.Combine(_tessacertPath, "shapeclustering.exe"); args = string.Format("-F font_properties -U unicharset {0}.{1}.exp0.tr", lang, font); ProcessBat(cmdExe, tmpTrainPath, args); //mftraining -F font_properties -U unicharset -O ts.unicharset ts.yzm.exp0.tr cmdExe = Path.Combine(_tessacertPath, "mftraining.exe"); args = string.Format("-F font_properties -U unicharset -O {0}.unicharset {0}.{1}.exp0.tr", lang, font); ProcessBat(cmdExe, tmpTrainPath, args); //cntraining ts.yzm.exp0.tr cmdExe = Path.Combine(_tessacertPath, "cntraining.exe"); args = string.Format("{0}.{1}.exp0.tr", lang, font); ProcessBat(cmdExe, tmpTrainPath, args); string tmpFile; FileInfo fi; tmpFile = Path.Combine(tmpTrainPath, string.Format("{0}.normproto", lang)); if (File.Exists(tmpFile)) { File.Delete(tmpFile); } tmpFile = Path.Combine(tmpTrainPath, string.Format("{0}.inttemp", lang)); if (File.Exists(tmpFile)) { File.Delete(tmpFile); } tmpFile = Path.Combine(tmpTrainPath, string.Format("{0}.pffmtable", lang)); if (File.Exists(tmpFile)) { File.Delete(tmpFile); } tmpFile = Path.Combine(tmpTrainPath, string.Format("{0}.shapetable", lang)); if (File.Exists(tmpFile)) { File.Delete(tmpFile); } tmpFile = Path.Combine(tmpTrainPath, "normproto"); fi = new FileInfo(tmpFile); fi.MoveTo(Path.Combine(tmpTrainPath, string.Format("{0}.normproto", lang))); tmpFile = Path.Combine(tmpTrainPath, "inttemp"); fi = new FileInfo(tmpFile); fi.MoveTo(Path.Combine(tmpTrainPath, string.Format("{0}.inttemp", lang))); tmpFile = Path.Combine(tmpTrainPath, "pffmtable"); fi = new FileInfo(tmpFile); fi.MoveTo(Path.Combine(tmpTrainPath, string.Format("{0}.pffmtable", lang))); tmpFile = Path.Combine(tmpTrainPath, "shapetable"); fi = new FileInfo(tmpFile); fi.MoveTo(Path.Combine(tmpTrainPath, string.Format("{0}.shapetable", lang))); //combine_tessdata ts cmdExe = Path.Combine(_tessacertPath, "combine_tessdata.exe"); args = string.Format("{0}", lang); ProcessBat(cmdExe, tmpTrainPath, args); string tmpTrainDataFile = string.Format("{0}.traineddata", lang); tmpFile = Path.Combine(tmpTrainPath, tmpTrainDataFile); fi = new FileInfo(tmpFile); fi.CopyTo(Path.Combine(_tessacertPath, "tessdata", tmpTrainDataFile), true); /* * string _fullName = @"E:\tesseract-train\tesseract-train\tesseract-train\bin\Debug\1.tiff"; * string fileName = Path.GetFileName(_fullName); * string path = Path.GetDirectoryName(_fullName); * string ext = Path.GetExtension(fileName); * string name = Path.GetFileNameWithoutExtension(fileName); * this.Text = string.Format("name={0},ext={1},path={2}", name, ext, path); */ }