/// <summary> /// Copy customization config file from the executable dir (those are kept as default which you can revert to) /// </summary> private void CopyConfigs() { FileInfo baseCustomizeYml = new FileInfo( HelperFunctions.GetLocalAppDataPath(OpusCatMTEngineSettings.Default.CustomizationBaseConfig)); FileInfo defaultCustomizeYml = new FileInfo(OpusCatMTEngineSettings.Default.CustomizationBaseConfig); //There might be a previous customize.yml file present, don't overwrite it unless it's older if (!baseCustomizeYml.Exists || (defaultCustomizeYml.LastWriteTime > baseCustomizeYml.LastWriteTime)) { File.Copy(OpusCatMTEngineSettings.Default.CustomizationBaseConfig, baseCustomizeYml.FullName, true); } }
public Process Customize() { this.OnProgressChanged(new ProgressChangedEventArgs(1, new MarianCustomizationStatus(CustomizationStep.Copying_model, null))); //First copy the model to new dir try { this.CopyModelDir(this.modelDir, this.customLabel); //Save model config as soon as the model dir exists this.customModel.SaveModelConfig(); } catch (Exception ex) { Log.Information($"Customization failed: {ex.Message}"); return(null); } //Save the batch to translate after customization to a file (to be batch translated after successful exit) if (this.postCustomizationBatch != null && this.postCustomizationBatch.Count > 0) { FileInfo postCustomizationBatchFile = new FileInfo(Path.Combine(this.customDir.FullName, OpusCatMTEngineSettings.Default.PostFinetuneBatchName)); using (var writer = postCustomizationBatchFile.CreateText()) { foreach (var sourceString in this.postCustomizationBatch) { writer.WriteLine(sourceString); } } } this.OnProgressChanged(new ProgressChangedEventArgs(2, new MarianCustomizationStatus(CustomizationStep.Copying_training_files, null))); //Copy raw files to model dir this.customSource = this.customSource.CopyTo(Path.Combine(this.customDir.FullName, "custom.source")); this.customTarget = this.customTarget.CopyTo(Path.Combine(this.customDir.FullName, "custom.target")); this.OnProgressChanged(new ProgressChangedEventArgs(3, new MarianCustomizationStatus(CustomizationStep.Preprocessing_training_files, null))); //Preprocess input files this.PreprocessInput(); var decoderYaml = this.customDir.GetFiles("decoder.yml").Single(); var deserializer = new Deserializer(); var decoderSettings = deserializer.Deserialize <MarianDecoderConfig>(decoderYaml.OpenText()); if (this.guidedAlignment) { //Generate alignments for fine-tuning corpus this.alignmentFile = new FileInfo(Path.Combine(this.customDir.FullName, "custom.alignments")); MarianHelper.GenerateAlignments(this.spSource, this.spTarget, this.alignmentFile, this.model.AlignmentPriorsFile); // //Generate alignments for validation set (for evaluating fine-tuning effect on alignment) this.validAlignmentFile = new FileInfo(Path.Combine(this.customDir.FullName, "combined.alignments")); MarianHelper.GenerateAlignments(this.spValidSource, this.spValidTarget, this.validAlignmentFile, this.model.AlignmentPriorsFile); } this.OnProgressChanged(new ProgressChangedEventArgs(4, new MarianCustomizationStatus(CustomizationStep.Initial_evaluation, null))); //Do the initial evaluation var initialValidProcess = this.model.TranslateAndEvaluate( this.spValidSource, new FileInfo(Path.Combine(this.customDir.FullName, "valid.0.txt")), this.spValidTarget, OpusCatMTEngineSettings.Default.OODValidSetSize, this.sourceLanguage, this.targetLanguage, true ); //Wait for the initial valid to finish before starting customization //(TODO: make sure this is not done on UI thread) initialValidProcess.WaitForExit(); this.OnProgressChanged(new ProgressChangedEventArgs(6, new MarianCustomizationStatus(CustomizationStep.Finetuning, null))); //Use the initial translation time as basis for estimating the duration of validation file //translation this.trainingLog.EstimatedTranslationDuration = Convert.ToInt32((initialValidProcess.ExitTime - initialValidProcess.StartTime).TotalSeconds); MarianTrainerConfig trainingConfig; var baseCustomizeYmlPath = HelperFunctions.GetLocalAppDataPath( OpusCatMTEngineSettings.Default.CustomizationBaseConfig); var processDir = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); //Make sure there's a customization file. if (!File.Exists(baseCustomizeYmlPath)) { File.Copy( Path.Combine(processDir, OpusCatMTEngineSettings.Default.CustomizationBaseConfig), baseCustomizeYmlPath); } //deserialize yaml file using (var reader = new StreamReader(baseCustomizeYmlPath)) { trainingConfig = deserializer.Deserialize <MarianTrainerConfig>(reader); } trainingConfig.trainSets = new List <string> { this.spSource.FullName, this.spTarget.FullName }; trainingConfig.ValidSets = new List <string> { this.spValidSource.FullName, this.spValidTarget.FullName }; trainingConfig.vocabs = new List <string> { Path.Combine(this.customDir.FullName, decoderSettings.vocabs[0]), Path.Combine(this.customDir.FullName, decoderSettings.vocabs[0]) }; switch (this.segmentationMethod) { case ".bpe": string validScriptPath = Path.Combine(this.customDir.FullName, "ValidateBpe.bat"); trainingConfig.validScriptPath = $"\"{validScriptPath}\""; File.Copy( Path.Combine(processDir, "ValidateBpe.bat"), validScriptPath); break; case ".spm": validScriptPath = Path.Combine(this.customDir.FullName, "ValidateSp.bat"); trainingConfig.validScriptPath = $"\"{validScriptPath}\""; File.Copy( Path.Combine(processDir, "ValidateSp.bat"), validScriptPath); break; default: break; } trainingConfig.validScriptArgs = new List <string> { $"{spValidTarget.FullName}", $"OOD{OpusCatMTEngineSettings.Default.OODValidSetSize.ToString()}" }; trainingConfig.validTranslationOutput = Path.Combine(this.customDir.FullName, "valid.{U}.txt"); if (this.guidedAlignment) { trainingConfig.guidedAlignment = this.alignmentFile.FullName; } trainingConfig.validLog = Path.Combine(this.customDir.FullName, "valid.log"); trainingConfig.log = Path.Combine(this.customDir.FullName, "train.log"); trainingConfig.model = Path.Combine(this.customDir.FullName, decoderSettings.models.Single()); var builder = new SerializerBuilder(); builder.ConfigureDefaultValuesHandling(DefaultValuesHandling.OmitNull); var serializer = builder.Build(); var configPath = Path.Combine(this.customDir.FullName, OpusCatMTEngineSettings.Default.CustomizationBaseConfig); using (var writer = File.CreateText(configPath)) { serializer.Serialize(writer, trainingConfig, typeof(MarianTrainerConfig)); } Process trainProcess = this.StartTraining(); return(trainProcess); }
private void OpenCustomSettingsInEditor_Click(object sender, RoutedEventArgs e) { var customizeYml = HelperFunctions.GetLocalAppDataPath(OpusCatMTEngineSettings.Default.CustomizationBaseConfig); Process.Start("notepad.exe", customizeYml); }