Ejemplo n.º 1
0
        private void createCnTrainingFile(TrainModel trainModel)
        {
            logger.Log("Generating cntraining file...");

            var mFTrainingExe = new FileInfo(Path.Combine(tesseractDirectory.FullName, "cntraining.exe"));

            Process mFTrainingProcess = new Process();

            mFTrainingProcess.StartInfo.FileName = mFTrainingExe.FullName;
            //tesseractProcess.StartInfo.RedirectStandardOutput = false;
            //tesseractProcess.StartInfo.RedirectStandardError = verbose;
            mFTrainingProcess.StartInfo.CreateNoWindow = !verbose;
            //tesseractProcess.StartInfo.UseShellExecute = false;
            //mFTrainingProcess.StartInfo.Arguments = new StringBuilder("--output_trainer ").Append(LOCAL_FOLDER_TEMP).Append(" ").ToString();
            foreach (var train in trainModel.Train)
            {
                StringBuilder args = new StringBuilder(train.FullName);
                args.Append(" ");
                mFTrainingProcess.StartInfo.Arguments += args.ToString();
            }

            mFTrainingProcess.Start();
            mFTrainingProcess.WaitForExit();

            logger.Log("Generation of cntraining file completed");
        }
Ejemplo n.º 2
0
        private void createClusteringFile(TrainModel trainModel)
        {
            logger.Log("Generating clustering file...");

            var clusteringExe = new FileInfo(Path.Combine(tesseractDirectory.FullName, "shapeclustering.exe"));

            Process shapeClusteringProcess = new Process();

            shapeClusteringProcess.StartInfo.FileName = clusteringExe.FullName;
            //tesseractProcess.StartInfo.RedirectStandardOutput = false;
            //tesseractProcess.StartInfo.RedirectStandardError = verbose;
            shapeClusteringProcess.StartInfo.CreateNoWindow = !verbose;
            //tesseractProcess.StartInfo.UseShellExecute = false;
            //shapeClusteringProcess.StartInfo.Arguments = new StringBuilder("--output_trainer ").Append(Path.Combine(LOCAL_FOLDER_TEMP, "shapetable")).Append(" ").ToString();
            shapeClusteringProcess.StartInfo.Arguments += new StringBuilder("-F ").Append(trainModel.FontProperties).Append(" ").ToString();
            shapeClusteringProcess.StartInfo.Arguments += new StringBuilder("-U ").Append(trainModel.Unicharset).Append(" ").ToString();
            foreach (var train in trainModel.Train)
            {
                StringBuilder args = new StringBuilder(train.FullName);
                args.Append(" ");
                shapeClusteringProcess.StartInfo.Arguments += args.ToString();
            }

            shapeClusteringProcess.Start();
            shapeClusteringProcess.WaitForExit();

            logger.Log("Generation of clustering file completed");
        }
Ejemplo n.º 3
0
        private void createUnicharset(TrainModel trainModel, DirectoryInfo localTempDirectory)
        {
            logger?.Log("Generating unicharset file...");

            var unicharsetExtractorExe = new FileInfo(Path.Combine(tesseractDirectory.FullName, "unicharset_extractor.exe"));

            Process unicharsetExtractorProcess = new Process();

            unicharsetExtractorProcess.StartInfo.FileName = unicharsetExtractorExe.FullName;
            //tesseractProcess.StartInfo.RedirectStandardOutput = false;
            //tesseractProcess.StartInfo.RedirectStandardError = verbose;
            unicharsetExtractorProcess.StartInfo.CreateNoWindow = !verbose;
            //tesseractProcess.StartInfo.UseShellExecute = false;
            unicharsetExtractorProcess.StartInfo.Arguments = new StringBuilder("--output_unicharset ").Append(Path.Combine(".", string.Format("unicharset", languageName))).Append(" ").ToString();
            foreach (var box in trainModel.Boxes)
            {
                StringBuilder args = new StringBuilder(box.FullName);
                args.Append(" ");
                unicharsetExtractorProcess.StartInfo.Arguments += args.ToString();
            }


            unicharsetExtractorProcess.Start();
            unicharsetExtractorProcess.WaitForExit();

            trainModel.Unicharset = new FileInfo(localTempDirectory.GetFiles("unicharset")[0].FullName);

            logger.Log("Generation of unicharset file completed");
        }
Ejemplo n.º 4
0
        private void createTrainFile(TrainModel trainModel, FileInfo tesseractExe)
        {
            logger?.Log("Generating tr file...");

            Process tesseractProcess = new Process();

            tesseractProcess.StartInfo.FileName = tesseractExe.FullName;
            //tesseractProcess.StartInfo.RedirectStandardOutput = false;
            //tesseractProcess.StartInfo.RedirectStandardError = verbose;
            tesseractProcess.StartInfo.CreateNoWindow = !verbose;
            //tesseractProcess.StartInfo.UseShellExecute = false;
            foreach (var image in trainModel.Images)
            {
                StringBuilder args = new StringBuilder(image.FullName);
                args.Append(" ");
                args.Append(image.FullName.Substring(0, image.FullName.Length - 4));
                args.Append(" box.train");
                tesseractProcess.StartInfo.Arguments = args.ToString();

                tesseractProcess.Start();
                tesseractProcess.WaitForExit();
            }

            logger?.Log("Generation of tr file completed");
        }
Ejemplo n.º 5
0
        private void createFontPropertiesFile(TrainModel trainModel)
        {
            logger.Log("Generating font properties file...");

            logger.Log(string.Format("Insert font properties parameters [{0} 0 0 1 0 0]", fontName), true);

            var fontProperties = Console.ReadLine();

            if (string.IsNullOrWhiteSpace(fontProperties))
            {
                fontProperties = string.Format("{0} 0 0 1 0 0", fontName);
            }

            string fontPropertiesPath = Path.Combine(".", string.Format("{0}.font_properties", languageName));
            var    stream             = new StreamWriter(fontPropertiesPath);

            stream.WriteLine(fontProperties);
            stream.Close();
            stream.Dispose();

            trainModel.FontProperties = new FileInfo(fontPropertiesPath);

            logger.Log("Generation of font properties file completed");
        }
Ejemplo n.º 6
0
        internal void Train()
        {
            //Generate local folder if not exists
            if (!Directory.Exists(LOCAL_FOLDER_TEMP))
            {
                Directory.CreateDirectory(LOCAL_FOLDER_TEMP);
            }

            if (!noCopy)
            {
                Directory.SetCurrentDirectory(LOCAL_FOLDER_TEMP);
            }
            else
            {
                Directory.SetCurrentDirectory(imagesDirectory.FullName);
            }

            // Check the presence of Tesseract exe inside folder
            FileInfo tesseractExe = new FileInfo(Path.Combine(tesseractDirectory.FullName, "tesseract.exe"));

            if (!tesseractExe.Exists)
            {
                throw new Exception("Tesseract.exe has not been found inside the provided path. Please provide a valid Tesseract data folder");
            }

            if (emode == EMode.BOX_CREATE)
            {
                logger?.Log("Starting process of box creation...", true);

                // Retrieve TIF images for training
                var images = retrieveTifImages(imagesDirectory);

                // Copy TIF images locally
                List <FileInfo> localImages = new List <FileInfo>();
                if (!noCopy)
                {
                    localImages = copyImagesLocally(images);
                }
                else
                {
                    localImages = images;
                }

                // Create boxes
                createBoxes(localImages, tesseractExe);

                logger?.Log("Box creation completed. Please check they are correct and start training with mode 'train'", true);
            }
            else if (emode == EMode.TRAIN)
            {
                logger?.Log("Starting process of training...", true);

                var trainModel = new TrainModel();

                var localDirectoryInfo = new DirectoryInfo(".");

                trainModel.Images = retrieveTifImages(localDirectoryInfo);

                trainModel.Boxes = retrieveBoxesFile(localDirectoryInfo);

                // Creation of train file
                createTrainFile(trainModel, tesseractExe);
                trainModel.Train = retrieveTrainFiles(localDirectoryInfo);

                // Creation of unicharset file
                createUnicharset(trainModel, localDirectoryInfo);

                // Creation of font properties file
                createFontPropertiesFile(trainModel);

                // Creation of clustering file
                createClusteringFile(trainModel);

                // Creation of mftraining file
                createMfTrainingFile(trainModel);

                // Creation of cntraining file
                createCnTrainingFile(trainModel);

                // Creation of unicharambigs file
                createUnicharambigs(tesseractExe);

                // Rename all file
                renameFile(localDirectoryInfo);

                // Combine all data in order to generate final training file
                combineData();
            }
            else
            {
                throw new Exception(string.Format("Invalid mode: {0}", emode.ToString()));
            }
        }