internal static FileInfo PreprocessLanguage( FileInfo languageFile, DirectoryInfo directory, string languageCode, FileInfo spmModel, bool includePlaceholderTags, bool includeTagPairs) { var preprocessedFile = new FileInfo(Path.Combine(directory.FullName, $"preprocessed_{languageFile.Name}")); var spFile = new FileInfo(Path.Combine(directory.FullName, $"sp_{languageFile.Name}")); using (var rawFile = languageFile.OpenText()) using (var preprocessedWriter = new StreamWriter(preprocessedFile.FullName)) { String line; while ((line = rawFile.ReadLine()) != null) { var preprocessedLine = MarianHelper.PreprocessLine(line, languageCode, includePlaceholderTags, includeTagPairs); preprocessedWriter.WriteLine(preprocessedLine); } } var spArgs = $"{preprocessedFile.FullName} --model {spmModel.FullName} --output {spFile.FullName}"; var spmProcess = MarianHelper.StartProcessInBackgroundWithRedirects("spm_encode.exe", spArgs); spmProcess.WaitForExit(); return(spFile); }
internal Process BatchTranslate(List <string> input) { Log.Information($"Starting batch translator for model {this.SystemName}."); var cmd = "TranslateBatchSentencePiece.bat"; FileInfo spInput = this.PreprocessInput(input); FileInfo spOutput = new FileInfo( spInput.FullName.Replace($".{SourceCode}", $".{TargetCode}")); var args = $"{this.modelDir.FullName} {spInput.FullName} {spOutput.FullName} --log-level=info --quiet"; var batchProcess = MarianHelper.StartProcessInBackgroundWithRedirects(cmd, args); batchProcess.Exited += (x, y) => BatchProcess_Exited(input, spOutput, x, y); return(batchProcess); }
public void Customize(EventHandler exitHandler) { //First copy the model to new dir try { this.CopyModelDir(this.modelDir, this.customLabel); } catch (Exception ex) { Log.Information($"Customization failed: {ex.Message}"); return; } //Preprocess input files this.PreprocessInput(); var decoderYaml = this.customDir.GetFiles("decoder.yml").Single(); var deserializer = new Deserializer(); var decoderSettings = deserializer.Deserialize <MarianDecoderConfig>(decoderYaml.OpenText()); MarianTrainerConfig trainingConfig; using (var reader = new StreamReader(FiskmoMTEngineSettings.Default.CustomizationBaseConfig)) { trainingConfig = deserializer.Deserialize <MarianTrainerConfig>(reader); } trainingConfig.TrainSets = new List <string> { spSource.FullName, spTarget.FullName }; trainingConfig.ValidSets = new List <string> { spValidSource.FullName, spValidTarget.FullName }; trainingConfig.vocabs = new List <string> { Path.Combine(this.customDir.FullName, decoderSettings.vocabs[0]), Path.Combine(this.customDir.FullName, decoderSettings.vocabs[0]) }; trainingConfig.validLog = Path.Combine(this.customDir.FullName, "valid.log"); trainingConfig.log = Path.Combine(this.customDir.FullName, "train.log"); trainingConfig.model = Path.Combine(this.customDir.FullName, decoderSettings.models.Single()); var serializer = new Serializer(); var configPath = Path.Combine(this.customDir.FullName, "customize.yml"); using (var writer = File.CreateText(configPath)) { serializer.Serialize(writer, trainingConfig, typeof(MarianTrainerConfig)); } //var trainingArgs = $"--config {configPath} --log-level=warn"; var trainingArgs = $"--config {configPath} --log-level=info --quiet"; var trainProcess = MarianHelper.StartProcessInBackgroundWithRedirects("marian.exe", trainingArgs); if (exitHandler != null) { trainProcess.Exited += exitHandler; } }