public static ISerializer CreateUntaggedSerializer()
        {
            var builder = new SerializerBuilder();

            return(builder.ConfigureDefaultValuesHandling(DefaultValuesHandling.Preserve)
                   .WithTypeInspector(x => new SortedTypeInspector(x))
                   .DisableAliases()
                   .Build());
        }
Exemple #2
0
        public static ISerializer CreateSerializer()
        {
            var builder = new SerializerBuilder();

            builder.WithNamingConvention(namingConvention);
            builder.ConfigureDefaultValuesHandling(DefaultValuesHandling.OmitDefaults);

            return(builder.Build());
        }
Exemple #3
0
        public static void DataToYaml(TextWriter writer, IEnumerable <Dictionary <string, object> > datatable, YamlColumnNamesType yamlColumnNames = null)
        {
            var builder = new SerializerBuilder();

            builder.ConfigureDefaultValuesHandling(DefaultValuesHandling.Preserve);
            var serializer = builder.Build();
            var tree       = yamlColumnNames == null ? datatable : ConvertDataTableWithYamlColumns(datatable, yamlColumnNames);

            serializer.Serialize(writer, tree);
        }
        public static ISerializer CreateDefaultSerializer()
        {
            var builder = new SerializerBuilder();

            foreach ((string tag, var type) in nodeTypeResolver.Value.tagMappings)
            {
                builder.WithTagMapping(tag, type);
            }
            return(builder.ConfigureDefaultValuesHandling(DefaultValuesHandling.Preserve)
                   .WithTypeInspector(x => new SortedTypeInspector(x))
                   .DisableAliases()
                   .Build());
        }
Exemple #5
0
        public static void WriteFile <T>(string filePath, T value, Format format)
        {
            var directory = Path.GetDirectoryName(filePath);

            if (!Directory.Exists(directory))
            {
                Directory.CreateDirectory(directory);
            }

            using (var file = new FileStream(filePath, FileMode.Create, FileAccess.Write, FileShare.ReadWrite))
            {
                using (var writer = new StreamWriter(file, new UTF8Encoding(false)))
                {
                    switch (format)
                    {
                    case Format.Json:
                    {
                        using (var jsonTextWriter = new JsonTextWriter(writer))
                        {
                            jsonTextWriter.Formatting = Formatting.Indented;

                            var jsonSerializer = new JsonSerializer()
                            {
                                Formatting        = Formatting.Indented,
                                NullValueHandling = NullValueHandling.Ignore,
                            };

                            jsonSerializer.Serialize(jsonTextWriter, value);
                        }
                    }
                    break;

                    case Format.Yaml:
                    {
                        var builder = new SerializerBuilder();

                        builder.ConfigureDefaultValuesHandling(DefaultValuesHandling.OmitNull);

                        var yamlSerializer = builder.Build();

                        yamlSerializer.Serialize(writer, value);
                    }
                    break;
                    }
                }
            }
        }
Exemple #6
0
        public void SaveFile(string path, GraphData graphData)
        {
            var serializer = new SerializerBuilder();

            var elements = new Dictionary <Type, LinkedList <DrawableElement> >
            {
                { typeof(Rectangle), new LinkedList <DrawableElement>() },
                { typeof(Ellipse), new LinkedList <DrawableElement>() },
                { typeof(Pie), new LinkedList <DrawableElement>() },
                { typeof(PolyLine), new LinkedList <DrawableElement>() }
            };


            foreach (var drawable in graphData.drawables)
            {
                elements[drawable.GetType()].AddLast(drawable);
            }

            serializer.ConfigureDefaultValuesHandling(DefaultValuesHandling.OmitDefaults);
            using (StreamWriter writer = new StreamWriter(path))
            {
                serializer.Build().Serialize(writer, elements);
            }
        }
        public Process Customize()
        {
            this.OnProgressChanged(new ProgressChangedEventArgs(1, new MarianCustomizationStatus(CustomizationStep.Copying_model, null)));
            //First copy the model to new dir
            try
            {
                this.CopyModelDir(this.modelDir, this.customLabel);
                //Save model config as soon as the model dir exists
                this.customModel.SaveModelConfig();
            }
            catch (Exception ex)
            {
                Log.Information($"Customization failed: {ex.Message}");
                return(null);
            }

            //Save the batch to translate after customization to a file (to be batch translated after successful exit)
            if (this.postCustomizationBatch != null && this.postCustomizationBatch.Count > 0)
            {
                FileInfo postCustomizationBatchFile = new FileInfo(Path.Combine(this.customDir.FullName, OpusCatMTEngineSettings.Default.PostFinetuneBatchName));
                using (var writer = postCustomizationBatchFile.CreateText())
                {
                    foreach (var sourceString in this.postCustomizationBatch)
                    {
                        writer.WriteLine(sourceString);
                    }
                }
            }

            this.OnProgressChanged(new ProgressChangedEventArgs(2, new MarianCustomizationStatus(CustomizationStep.Copying_training_files, null)));
            //Copy raw files to model dir
            this.customSource = this.customSource.CopyTo(Path.Combine(this.customDir.FullName, "custom.source"));
            this.customTarget = this.customTarget.CopyTo(Path.Combine(this.customDir.FullName, "custom.target"));

            this.OnProgressChanged(new ProgressChangedEventArgs(3, new MarianCustomizationStatus(CustomizationStep.Preprocessing_training_files, null)));
            //Preprocess input files
            this.PreprocessInput();

            var decoderYaml  = this.customDir.GetFiles("decoder.yml").Single();
            var deserializer = new Deserializer();

            var decoderSettings = deserializer.Deserialize <MarianDecoderConfig>(decoderYaml.OpenText());

            if (this.guidedAlignment)
            {
                //Generate alignments for fine-tuning corpus
                this.alignmentFile = new FileInfo(Path.Combine(this.customDir.FullName, "custom.alignments"));
                MarianHelper.GenerateAlignments(this.spSource, this.spTarget, this.alignmentFile, this.model.AlignmentPriorsFile);

                //
                //Generate alignments for validation set (for evaluating fine-tuning effect on alignment)
                this.validAlignmentFile = new FileInfo(Path.Combine(this.customDir.FullName, "combined.alignments"));
                MarianHelper.GenerateAlignments(this.spValidSource, this.spValidTarget, this.validAlignmentFile, this.model.AlignmentPriorsFile);
            }

            this.OnProgressChanged(new ProgressChangedEventArgs(4, new MarianCustomizationStatus(CustomizationStep.Initial_evaluation, null)));
            //Do the initial evaluation
            var initialValidProcess = this.model.TranslateAndEvaluate(
                this.spValidSource,
                new FileInfo(Path.Combine(this.customDir.FullName, "valid.0.txt")),
                this.spValidTarget,
                OpusCatMTEngineSettings.Default.OODValidSetSize,
                this.sourceLanguage,
                this.targetLanguage,
                true
                );

            //Wait for the initial valid to finish before starting customization
            //(TODO: make sure this is not done on UI thread)
            initialValidProcess.WaitForExit();

            this.OnProgressChanged(new ProgressChangedEventArgs(6, new MarianCustomizationStatus(CustomizationStep.Finetuning, null)));

            //Use the initial translation time as basis for estimating the duration of validation file
            //translation
            this.trainingLog.EstimatedTranslationDuration = Convert.ToInt32((initialValidProcess.ExitTime - initialValidProcess.StartTime).TotalSeconds);

            MarianTrainerConfig trainingConfig;

            var baseCustomizeYmlPath =
                HelperFunctions.GetLocalAppDataPath(
                    OpusCatMTEngineSettings.Default.CustomizationBaseConfig);

            var processDir = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);

            //Make sure there's a customization file.
            if (!File.Exists(baseCustomizeYmlPath))
            {
                File.Copy(
                    Path.Combine(processDir, OpusCatMTEngineSettings.Default.CustomizationBaseConfig),
                    baseCustomizeYmlPath);
            }

            //deserialize yaml file
            using (var reader = new StreamReader(baseCustomizeYmlPath))
            {
                trainingConfig = deserializer.Deserialize <MarianTrainerConfig>(reader);
            }

            trainingConfig.trainSets = new List <string>
            {
                this.spSource.FullName,
                this.spTarget.FullName
            };

            trainingConfig.ValidSets = new List <string>
            {
                this.spValidSource.FullName,
                this.spValidTarget.FullName
            };

            trainingConfig.vocabs = new List <string>
            {
                Path.Combine(this.customDir.FullName, decoderSettings.vocabs[0]),
                Path.Combine(this.customDir.FullName, decoderSettings.vocabs[0])
            };

            switch (this.segmentationMethod)
            {
            case ".bpe":
                string validScriptPath = Path.Combine(this.customDir.FullName, "ValidateBpe.bat");
                trainingConfig.validScriptPath =
                    $"\"{validScriptPath}\"";
                File.Copy(
                    Path.Combine(processDir, "ValidateBpe.bat"), validScriptPath);
                break;

            case ".spm":
                validScriptPath = Path.Combine(this.customDir.FullName, "ValidateSp.bat");
                trainingConfig.validScriptPath =
                    $"\"{validScriptPath}\"";
                File.Copy(
                    Path.Combine(processDir, "ValidateSp.bat"), validScriptPath);
                break;

            default:
                break;
            }

            trainingConfig.validScriptArgs =
                new List <string> {
                $"{spValidTarget.FullName}",
                $"OOD{OpusCatMTEngineSettings.Default.OODValidSetSize.ToString()}"
            };
            trainingConfig.validTranslationOutput = Path.Combine(this.customDir.FullName, "valid.{U}.txt");

            if (this.guidedAlignment)
            {
                trainingConfig.guidedAlignment = this.alignmentFile.FullName;
            }

            trainingConfig.validLog = Path.Combine(this.customDir.FullName, "valid.log");
            trainingConfig.log      = Path.Combine(this.customDir.FullName, "train.log");

            trainingConfig.model = Path.Combine(this.customDir.FullName, decoderSettings.models.Single());

            var builder = new SerializerBuilder();

            builder.ConfigureDefaultValuesHandling(DefaultValuesHandling.OmitNull);
            var serializer = builder.Build();

            var configPath = Path.Combine(this.customDir.FullName, OpusCatMTEngineSettings.Default.CustomizationBaseConfig);

            using (var writer = File.CreateText(configPath))
            {
                serializer.Serialize(writer, trainingConfig, typeof(MarianTrainerConfig));
            }

            Process trainProcess = this.StartTraining();

            return(trainProcess);
        }