Ejemplo n.º 1
0
        internal static FileInfo PreprocessLanguage(
            FileInfo languageFile,
            DirectoryInfo directory,
            string languageCode,
            FileInfo spmModel,
            bool includePlaceholderTags,
            bool includeTagPairs)
        {
            var preprocessedFile = new FileInfo(Path.Combine(directory.FullName, $"preprocessed_{languageFile.Name}"));
            var spFile           = new FileInfo(Path.Combine(directory.FullName, $"sp_{languageFile.Name}"));


            using (var rawFile = languageFile.OpenText())
                using (var preprocessedWriter = new StreamWriter(preprocessedFile.FullName))
                {
                    String line;
                    while ((line = rawFile.ReadLine()) != null)
                    {
                        var preprocessedLine = MarianHelper.PreprocessLine(line, languageCode, includePlaceholderTags, includeTagPairs);
                        preprocessedWriter.WriteLine(preprocessedLine);
                    }
                }

            var spArgs     = $"{preprocessedFile.FullName} --model {spmModel.FullName} --output {spFile.FullName}";
            var spmProcess = MarianHelper.StartProcessInBackgroundWithRedirects("spm_encode.exe", spArgs);

            spmProcess.WaitForExit();
            return(spFile);
        }
Ejemplo n.º 2
0
        private void PreprocessInput()
        {
            var sourceSpm = this.customDir.GetFiles("source.spm").Single();
            var targetSpm = this.customDir.GetFiles("target.spm").Single();

            this.spSource = MarianHelper.PreprocessLanguage(this.customSource, this.customDir, this.sourceCode, sourceSpm, this.includePlaceholderTags, this.includeTagPairs);
            this.spTarget = MarianHelper.PreprocessLanguage(this.customTarget, this.customDir, this.targetCode, targetSpm, this.includePlaceholderTags, this.includeTagPairs);

            this.spValidSource = MarianHelper.PreprocessLanguage(this.validationSource, this.customDir, this.sourceCode, sourceSpm, this.includePlaceholderTags, this.includeTagPairs);
            this.spValidTarget = MarianHelper.PreprocessLanguage(this.validationTarget, this.customDir, this.targetCode, targetSpm, this.includePlaceholderTags, this.includeTagPairs);
        }
Ejemplo n.º 3
0
        internal Process BatchTranslate(List <string> input)
        {
            Log.Information($"Starting batch translator for model {this.SystemName}.");

            var cmd = "TranslateBatchSentencePiece.bat";

            FileInfo spInput  = this.PreprocessInput(input);
            FileInfo spOutput = new FileInfo(
                spInput.FullName.Replace($".{SourceCode}", $".{TargetCode}"));

            var args         = $"{this.modelDir.FullName} {spInput.FullName} {spOutput.FullName} --log-level=info --quiet";
            var batchProcess = MarianHelper.StartProcessInBackgroundWithRedirects(cmd, args);

            batchProcess.Exited += (x, y) => BatchProcess_Exited(input, spOutput, x, y);
            return(batchProcess);
        }
Ejemplo n.º 4
0
        private FileInfo PreprocessInput(List <string> input)
        {
            var fileGuid = Guid.NewGuid();
            var srcFile  = new FileInfo(Path.Combine(Path.GetTempPath(), $"{fileGuid}.{this.SourceCode}"));

            using (var srcStream = new StreamWriter(srcFile.FullName, true, Encoding.UTF8))
            {
                foreach (var line in input)
                {
                    srcStream.WriteLine(line);
                }
            }

            var spmModel  = this.modelDir.GetFiles("source.spm").Single();
            var spSrcFile = MarianHelper.PreprocessLanguage(srcFile, new DirectoryInfo(Path.GetTempPath()), this.SourceCode, spmModel, this.includePlaceholderTags, this.includeTagPairs);

            return(spSrcFile);
        }
Ejemplo n.º 5
0
        public void Customize(EventHandler exitHandler)
        {
            //First copy the model to new dir
            try
            {
                this.CopyModelDir(this.modelDir, this.customLabel);
            }
            catch (Exception ex)
            {
                Log.Information($"Customization failed: {ex.Message}");
                return;
            }
            //Preprocess input files
            this.PreprocessInput();

            var decoderYaml     = this.customDir.GetFiles("decoder.yml").Single();
            var deserializer    = new Deserializer();
            var decoderSettings = deserializer.Deserialize <MarianDecoderConfig>(decoderYaml.OpenText());

            MarianTrainerConfig trainingConfig;

            using (var reader = new StreamReader(FiskmoMTEngineSettings.Default.CustomizationBaseConfig))
            {
                trainingConfig = deserializer.Deserialize <MarianTrainerConfig>(reader);
            }

            trainingConfig.TrainSets = new List <string>
            {
                spSource.FullName,
                spTarget.FullName
            };

            trainingConfig.ValidSets = new List <string>
            {
                spValidSource.FullName,
                spValidTarget.FullName
            };

            trainingConfig.vocabs = new List <string>
            {
                Path.Combine(this.customDir.FullName, decoderSettings.vocabs[0]),
                Path.Combine(this.customDir.FullName, decoderSettings.vocabs[0])
            };

            trainingConfig.validLog = Path.Combine(this.customDir.FullName, "valid.log");
            trainingConfig.log      = Path.Combine(this.customDir.FullName, "train.log");

            trainingConfig.model = Path.Combine(this.customDir.FullName, decoderSettings.models.Single());

            var serializer = new Serializer();
            var configPath = Path.Combine(this.customDir.FullName, "customize.yml");

            using (var writer = File.CreateText(configPath))
            {
                serializer.Serialize(writer, trainingConfig, typeof(MarianTrainerConfig));
            }

            //var trainingArgs = $"--config {configPath} --log-level=warn";
            var trainingArgs = $"--config {configPath} --log-level=info --quiet";

            var trainProcess = MarianHelper.StartProcessInBackgroundWithRedirects("marian.exe", trainingArgs);

            if (exitHandler != null)
            {
                trainProcess.Exited += exitHandler;
            }
        }