Пример #1
0
        private void CapacityMark(int newIdx)
        {
            int idx = _smallMarkList.Count;

            for (int i = idx; i < newIdx; i++)
            {
                FeSmall small = new FeSmall();
                small.Idx = i;
                _smallMarkList.Add(i, small);
            }
        }
Пример #2
0
        private void CapacityBox(int newSize)
        {
            int idx = _smallBoxList.Count;

            for (int i = idx; i < newSize; i++)
            {
                FeSmall small = new FeSmall();
                small.Idx = i;
                _smallBoxList.Add(i, small);
            }
        }
Пример #3
0
        private void ProcBoxDoubleClick(FeSmall box)
        {
            FeSmall mark;

            if (_smallMarkList.TryGetValue(box.Idx, out mark))
            {
                string smallFileName = string.Format("{0}_{1}{2}", _fiName, box.Idx, _fiExt);
                string smallFullName = Path.Combine(_fiPath, FeConst.DIR_TEMP_IMG, smallFileName);
                mark.GetFromSmall(box);
                mark.SaveImage(smallFullName);
            }
        }
Пример #4
0
 public void GetFromSmall(FeSmall small)
 {
     _picBmp = small._picBmp.Clone(new Rectangle(0, 0, small._picBmp.Width, small._picBmp.Height), PixelFormat.DontCare);
     using (MemoryStream ms = new MemoryStream())
     {
         _picBmp.Save(ms, ImageFormat.Tiff);
         _pictureBox.Image = Image.FromStream(ms);
     }
     _textBox.Text = small._textBox.Text;
     _isEmpty      = false;
     _value        = small._value;
     _left         = small.Left;
     _bottom       = small._bottom;
     _right        = small._right;
     _top          = small._top;
 }
Пример #5
0
        public void PutBox(int smallIdx, string value, int left, int bottom, int right, int top)
        {
            if (smallIdx >= _smallBoxList.Count)
            {
                CapacityBox(smallIdx + 1);
            }
            FeSmall small = _smallBoxList[smallIdx];

            small.IsEmpty = false;
            small.Idx     = smallIdx;
            small.Value   = value;
            small.Left    = left;
            small.Bottom  = bottom;
            small.Right   = right;
            small.Top     = top;
            _boxCount++;
        }
Пример #6
0
        private void btnSaveMark_Click(object sender, EventArgs e)
        {
            StringBuilder sb = new StringBuilder();

            foreach (var train in _trainList)
            {
                foreach (var item in train.SmallMarkList)
                {
                    FeSmall mark = item.Value;
                    string  line = mark.GetMarkLine(train.FileName);
                    if (line == "")
                    {
                        continue;
                    }
                    sb.AppendLine(line);
                }
            }
            SaveFile(sb, FeConst.FILE_MARK);
        }
Пример #7
0
        public void PutMark(int smallIdx, string value, int left, int bottom, int right, int top)
        {
            if (smallIdx >= _smallMarkList.Count)
            {
                CapacityMark(smallIdx + 1);
            }
            FeSmall small = _smallMarkList[smallIdx];

            small.IsEmpty = false;
            small.Idx     = smallIdx;
            small.Value   = value;
            small.Left    = left;
            small.Bottom  = bottom;
            small.Right   = right;
            small.Top     = top;
            _markCount++;
            if (_smallBoxList[smallIdx].Value == value)
            {
                small.IsSame = true;
                _sameCount++;
                _smallBoxList[smallIdx].IsSame = true;
            }
        }
Пример #8
0
 private void ProcMarkDoubleClick(FeSmall mark)
 {
     mark.FullEmpty();
 }
Пример #9
0
        private void btnTrain_Click(object sender, EventArgs e)
        {
            /*
             * mark.txt+tmpImg ==> ts.yzm.exp0.tif 合并
             * box
             * mark.txt 修正box
             * unicharset_extractor ts.yzm.exp0.box
             * shapeclustering -F font_properties -U unicharset ts.yzm.exp0.tr
             * mftraining -F font_properties -U unicharset -O ts.unicharset ts.yzm.exp0.tr
             * cntraining ts.yzm.exp0.tr
             * rename normproto ts.normproto
             * rename inttemp ts.inttemp
             * rename pffmtable ts.pffmtable
             * rename shapetable ts.shapetable
             * combine_tessdata ts
             * copy ts.traineddata E:\tesseract-ocr\tessdata
             */
            string imagePath = tbTrainImagePath.Text;
            string lang      = tbLang.Text.Trim();
            string font      = tbFont.Text.Trim();

            string tmpBoxPath   = Path.Combine(imagePath, FeConst.DIR_TEMP_BOX);
            string tmpImgPath   = Path.Combine(imagePath, FeConst.DIR_TEMP_IMG);
            string tmpTrainPath = Path.Combine(imagePath, FeConst.DIR_TEMP_TRAIN);

            Dictionary <int, FeSmall> smallList = new Dictionary <int, FeSmall>();
            List <string>             fileList  = new List <string>();
            int    idx      = 0;
            string markFile = Path.Combine(imagePath, FeConst.FILE_MARK);

            using (FileStream fs = new FileStream(markFile, FileMode.OpenOrCreate))
            {
                StreamReader sr      = new StreamReader(fs, Encoding.Default);
                string       lineStr = string.Empty;
                while ((lineStr = sr.ReadLine()) != null)
                {
                    string[] arr = lineStr.Split(',');  //char left bottom right top filename smallidx
                    if (arr.Length != 7)
                    {
                        continue;
                    }
                    string  fileName = arr[5];
                    string  fiName   = Path.GetFileNameWithoutExtension(fileName);
                    string  fiExt    = Path.GetExtension(fileName);
                    int     smallIdx = int.Parse(arr[6]);
                    FeSmall small    = new FeSmall();
                    small.Idx      = idx;
                    small.FileName = string.Format("{0}_{1}{2}", fiName, smallIdx, fiExt);
                    small.Value    = arr[0];
                    smallList.Add(idx, small);
                    fileList.Add(Path.Combine(tmpImgPath, small.FileName));
                    idx++;
                }
            }

            //合并tiff

            string mergeFile = string.Format("{0}.{1}.exp0.tif", lang, font);

            MergeTiff(fileList, Path.Combine(tmpTrainPath, mergeFile));

            //生成box
            string args;
            string trainFile = Path.Combine(_tessacertPath, "tessdata", lang + ".traineddata");

            if (File.Exists(trainFile))
            {
                args = string.Format("{0}.{1}.exp0.tif {0}.{1}.exp0 -l {0} -psm 10 batch.nochop makebox", lang, font);
            }
            else
            {
                args = string.Format("{0}.{1}.exp0.tif {0}.{1}.exp0 -psm 10 batch.nochop makebox", lang, font);
            }
            ProcessBat(_tessacertExe, tmpTrainPath, args);

            //mark.txt 修正 box
            StringBuilder sb      = new StringBuilder();
            string        boxFile = Path.Combine(tmpTrainPath, string.Format("{0}.{1}.exp0.box", lang, font));

            using (FileStream fs = new FileStream(boxFile, FileMode.Open))
            {
                StreamReader sr      = new StreamReader(fs, Encoding.Default);
                string       lineStr = string.Empty;
                while ((lineStr = sr.ReadLine()) != null)
                {
                    lineStr = lineStr.Trim();
                    string[] arr = lineStr.Split(' ');
                    if (arr.Length == 6)
                    {
                        int     tmpIdx = int.Parse(arr[5]);
                        FeSmall small;
                        if (smallList.TryGetValue(tmpIdx, out small))
                        {
                            //修正
                            if (arr[0] != small.Value)
                            {
                                arr[0]  = small.Value;
                                lineStr = string.Format("{0} {1} {2} {3} {4} {5}", arr[0], arr[1], arr[2], arr[3], arr[4], arr[5]);
                            }
                        }
                    }
                    sb.AppendLine(lineStr);
                }
                sr.Close();
            }
            SaveFile(sb, boxFile);

            if (File.Exists(trainFile))
            {
                args = string.Format("{0}.{1}.exp0.tif {0}.{1}.exp0 -l {0} -psm 10 nobatch box.train", lang, font);
            }
            else
            {
                args = string.Format("{0}.{1}.exp0.tif {0}.{1}.exp0 -psm 10 nobatch box.train", lang, font);
            }
            ProcessBat(_tessacertExe, tmpTrainPath, args);

            //unicharset_extractor ts.yzm.exp0.box
            string cmdExe = Path.Combine(_tessacertPath, "unicharset_extractor.exe");

            args = string.Format("{0}.{1}.exp0.box", lang, font);
            ProcessBat(cmdExe, tmpTrainPath, args);

            //echo ts 0 0 0 0 0 >> font_properties
            sb = new StringBuilder();
            sb.AppendLine(string.Format("{0} 0 0 0 0 0", font));
            SaveFile(sb, Path.Combine(tmpTrainPath, "font_properties"));

            //shapeclustering -F font_properties -U unicharset ts.yzm.exp0.tr
            cmdExe = Path.Combine(_tessacertPath, "shapeclustering.exe");
            args   = string.Format("-F font_properties -U unicharset {0}.{1}.exp0.tr", lang, font);
            ProcessBat(cmdExe, tmpTrainPath, args);

            //mftraining -F font_properties -U unicharset -O ts.unicharset ts.yzm.exp0.tr
            cmdExe = Path.Combine(_tessacertPath, "mftraining.exe");
            args   = string.Format("-F font_properties -U unicharset -O {0}.unicharset {0}.{1}.exp0.tr", lang, font);
            ProcessBat(cmdExe, tmpTrainPath, args);

            //cntraining ts.yzm.exp0.tr
            cmdExe = Path.Combine(_tessacertPath, "cntraining.exe");
            args   = string.Format("{0}.{1}.exp0.tr", lang, font);
            ProcessBat(cmdExe, tmpTrainPath, args);

            string   tmpFile;
            FileInfo fi;

            tmpFile = Path.Combine(tmpTrainPath, string.Format("{0}.normproto", lang));
            if (File.Exists(tmpFile))
            {
                File.Delete(tmpFile);
            }
            tmpFile = Path.Combine(tmpTrainPath, string.Format("{0}.inttemp", lang));
            if (File.Exists(tmpFile))
            {
                File.Delete(tmpFile);
            }
            tmpFile = Path.Combine(tmpTrainPath, string.Format("{0}.pffmtable", lang));
            if (File.Exists(tmpFile))
            {
                File.Delete(tmpFile);
            }
            tmpFile = Path.Combine(tmpTrainPath, string.Format("{0}.shapetable", lang));
            if (File.Exists(tmpFile))
            {
                File.Delete(tmpFile);
            }

            tmpFile = Path.Combine(tmpTrainPath, "normproto");
            fi      = new FileInfo(tmpFile);
            fi.MoveTo(Path.Combine(tmpTrainPath, string.Format("{0}.normproto", lang)));
            tmpFile = Path.Combine(tmpTrainPath, "inttemp");
            fi      = new FileInfo(tmpFile);
            fi.MoveTo(Path.Combine(tmpTrainPath, string.Format("{0}.inttemp", lang)));
            tmpFile = Path.Combine(tmpTrainPath, "pffmtable");
            fi      = new FileInfo(tmpFile);
            fi.MoveTo(Path.Combine(tmpTrainPath, string.Format("{0}.pffmtable", lang)));
            tmpFile = Path.Combine(tmpTrainPath, "shapetable");
            fi      = new FileInfo(tmpFile);
            fi.MoveTo(Path.Combine(tmpTrainPath, string.Format("{0}.shapetable", lang)));

            //combine_tessdata ts
            cmdExe = Path.Combine(_tessacertPath, "combine_tessdata.exe");
            args   = string.Format("{0}", lang);
            ProcessBat(cmdExe, tmpTrainPath, args);

            string tmpTrainDataFile = string.Format("{0}.traineddata", lang);

            tmpFile = Path.Combine(tmpTrainPath, tmpTrainDataFile);
            fi      = new FileInfo(tmpFile);
            fi.CopyTo(Path.Combine(_tessacertPath, "tessdata", tmpTrainDataFile), true);

            /*
             * string _fullName = @"E:\tesseract-train\tesseract-train\tesseract-train\bin\Debug\1.tiff";
             * string fileName = Path.GetFileName(_fullName);
             * string path = Path.GetDirectoryName(_fullName);
             * string ext = Path.GetExtension(fileName);
             * string name = Path.GetFileNameWithoutExtension(fileName);
             * this.Text = string.Format("name={0},ext={1},path={2}", name, ext, path);
             */
        }