private void createCnTrainingFile(TrainModel trainModel) { logger.Log("Generating cntraining file..."); var mFTrainingExe = new FileInfo(Path.Combine(tesseractDirectory.FullName, "cntraining.exe")); Process mFTrainingProcess = new Process(); mFTrainingProcess.StartInfo.FileName = mFTrainingExe.FullName; //tesseractProcess.StartInfo.RedirectStandardOutput = false; //tesseractProcess.StartInfo.RedirectStandardError = verbose; mFTrainingProcess.StartInfo.CreateNoWindow = !verbose; //tesseractProcess.StartInfo.UseShellExecute = false; //mFTrainingProcess.StartInfo.Arguments = new StringBuilder("--output_trainer ").Append(LOCAL_FOLDER_TEMP).Append(" ").ToString(); foreach (var train in trainModel.Train) { StringBuilder args = new StringBuilder(train.FullName); args.Append(" "); mFTrainingProcess.StartInfo.Arguments += args.ToString(); } mFTrainingProcess.Start(); mFTrainingProcess.WaitForExit(); logger.Log("Generation of cntraining file completed"); }
private void createClusteringFile(TrainModel trainModel) { logger.Log("Generating clustering file..."); var clusteringExe = new FileInfo(Path.Combine(tesseractDirectory.FullName, "shapeclustering.exe")); Process shapeClusteringProcess = new Process(); shapeClusteringProcess.StartInfo.FileName = clusteringExe.FullName; //tesseractProcess.StartInfo.RedirectStandardOutput = false; //tesseractProcess.StartInfo.RedirectStandardError = verbose; shapeClusteringProcess.StartInfo.CreateNoWindow = !verbose; //tesseractProcess.StartInfo.UseShellExecute = false; //shapeClusteringProcess.StartInfo.Arguments = new StringBuilder("--output_trainer ").Append(Path.Combine(LOCAL_FOLDER_TEMP, "shapetable")).Append(" ").ToString(); shapeClusteringProcess.StartInfo.Arguments += new StringBuilder("-F ").Append(trainModel.FontProperties).Append(" ").ToString(); shapeClusteringProcess.StartInfo.Arguments += new StringBuilder("-U ").Append(trainModel.Unicharset).Append(" ").ToString(); foreach (var train in trainModel.Train) { StringBuilder args = new StringBuilder(train.FullName); args.Append(" "); shapeClusteringProcess.StartInfo.Arguments += args.ToString(); } shapeClusteringProcess.Start(); shapeClusteringProcess.WaitForExit(); logger.Log("Generation of clustering file completed"); }
private void createUnicharset(TrainModel trainModel, DirectoryInfo localTempDirectory) { logger?.Log("Generating unicharset file..."); var unicharsetExtractorExe = new FileInfo(Path.Combine(tesseractDirectory.FullName, "unicharset_extractor.exe")); Process unicharsetExtractorProcess = new Process(); unicharsetExtractorProcess.StartInfo.FileName = unicharsetExtractorExe.FullName; //tesseractProcess.StartInfo.RedirectStandardOutput = false; //tesseractProcess.StartInfo.RedirectStandardError = verbose; unicharsetExtractorProcess.StartInfo.CreateNoWindow = !verbose; //tesseractProcess.StartInfo.UseShellExecute = false; unicharsetExtractorProcess.StartInfo.Arguments = new StringBuilder("--output_unicharset ").Append(Path.Combine(".", string.Format("unicharset", languageName))).Append(" ").ToString(); foreach (var box in trainModel.Boxes) { StringBuilder args = new StringBuilder(box.FullName); args.Append(" "); unicharsetExtractorProcess.StartInfo.Arguments += args.ToString(); } unicharsetExtractorProcess.Start(); unicharsetExtractorProcess.WaitForExit(); trainModel.Unicharset = new FileInfo(localTempDirectory.GetFiles("unicharset")[0].FullName); logger.Log("Generation of unicharset file completed"); }
private void createTrainFile(TrainModel trainModel, FileInfo tesseractExe) { logger?.Log("Generating tr file..."); Process tesseractProcess = new Process(); tesseractProcess.StartInfo.FileName = tesseractExe.FullName; //tesseractProcess.StartInfo.RedirectStandardOutput = false; //tesseractProcess.StartInfo.RedirectStandardError = verbose; tesseractProcess.StartInfo.CreateNoWindow = !verbose; //tesseractProcess.StartInfo.UseShellExecute = false; foreach (var image in trainModel.Images) { StringBuilder args = new StringBuilder(image.FullName); args.Append(" "); args.Append(image.FullName.Substring(0, image.FullName.Length - 4)); args.Append(" box.train"); tesseractProcess.StartInfo.Arguments = args.ToString(); tesseractProcess.Start(); tesseractProcess.WaitForExit(); } logger?.Log("Generation of tr file completed"); }
private void createFontPropertiesFile(TrainModel trainModel) { logger.Log("Generating font properties file..."); logger.Log(string.Format("Insert font properties parameters [{0} 0 0 1 0 0]", fontName), true); var fontProperties = Console.ReadLine(); if (string.IsNullOrWhiteSpace(fontProperties)) { fontProperties = string.Format("{0} 0 0 1 0 0", fontName); } string fontPropertiesPath = Path.Combine(".", string.Format("{0}.font_properties", languageName)); var stream = new StreamWriter(fontPropertiesPath); stream.WriteLine(fontProperties); stream.Close(); stream.Dispose(); trainModel.FontProperties = new FileInfo(fontPropertiesPath); logger.Log("Generation of font properties file completed"); }
internal void Train() { //Generate local folder if not exists if (!Directory.Exists(LOCAL_FOLDER_TEMP)) { Directory.CreateDirectory(LOCAL_FOLDER_TEMP); } if (!noCopy) { Directory.SetCurrentDirectory(LOCAL_FOLDER_TEMP); } else { Directory.SetCurrentDirectory(imagesDirectory.FullName); } // Check the presence of Tesseract exe inside folder FileInfo tesseractExe = new FileInfo(Path.Combine(tesseractDirectory.FullName, "tesseract.exe")); if (!tesseractExe.Exists) { throw new Exception("Tesseract.exe has not been found inside the provided path. Please provide a valid Tesseract data folder"); } if (emode == EMode.BOX_CREATE) { logger?.Log("Starting process of box creation...", true); // Retrieve TIF images for training var images = retrieveTifImages(imagesDirectory); // Copy TIF images locally List <FileInfo> localImages = new List <FileInfo>(); if (!noCopy) { localImages = copyImagesLocally(images); } else { localImages = images; } // Create boxes createBoxes(localImages, tesseractExe); logger?.Log("Box creation completed. Please check they are correct and start training with mode 'train'", true); } else if (emode == EMode.TRAIN) { logger?.Log("Starting process of training...", true); var trainModel = new TrainModel(); var localDirectoryInfo = new DirectoryInfo("."); trainModel.Images = retrieveTifImages(localDirectoryInfo); trainModel.Boxes = retrieveBoxesFile(localDirectoryInfo); // Creation of train file createTrainFile(trainModel, tesseractExe); trainModel.Train = retrieveTrainFiles(localDirectoryInfo); // Creation of unicharset file createUnicharset(trainModel, localDirectoryInfo); // Creation of font properties file createFontPropertiesFile(trainModel); // Creation of clustering file createClusteringFile(trainModel); // Creation of mftraining file createMfTrainingFile(trainModel); // Creation of cntraining file createCnTrainingFile(trainModel); // Creation of unicharambigs file createUnicharambigs(tesseractExe); // Rename all file renameFile(localDirectoryInfo); // Combine all data in order to generate final training file combineData(); } else { throw new Exception(string.Format("Invalid mode: {0}", emode.ToString())); } }