Beispiel #1
0
        private IEnumerable <string> PrepareExpSetFromRawTags(string beforeTagPath, string afterTagPath, string enuPath, string allFolder, string expRootFolder, string tag)
        {
            // Suppose we've already had the TAGGed files
            string tagFolder    = Path.Combine(expRootFolder, "Tag");
            string cleanFolder  = Path.Combine(expRootFolder, "Clean");
            string randomFolder = Path.Combine(expRootFolder, "Random");
            string chaosFolder  = Path.Combine(expRootFolder, "Chaos");
            // Create all.zh and all.en, where all files are all valid files.
            var pairs = PrepareData.CreateAllFiles(beforeTagPath, afterTagPath, enuPath, allFolder, "zh", "en");
            var list  = Common.ReadPairs(pairs.Item1, pairs.Item2);

            PrepareData.SplitPairData(list, tagFolder, Cfg.SrcLocale, Cfg.TgtLocale, Cfg.SrcVocabSize, Cfg.TgtVocabSize, 5000, 5000, true);
            PrepareData.FromTagToClean(tagFolder, cleanFolder, Cfg.SrcLocale, Cfg.TgtLocale);
            PrepareData.SetTagRatio(pairs.Item1);
            PrepareData.SetTag(tag);
            PrepareData.FromCleanToRandomTag(cleanFolder, randomFolder, Cfg.SrcLocale, Cfg.TgtLocale);
            PrepareData.FromCleanToChaosTag(cleanFolder, chaosFolder, Cfg.SrcLocale, Cfg.TgtLocale);
            //PrepareData.CreateBatchCommand(Cfg.SrcLocale, Cfg.TgtLocale, tagFolder, Cfg.TrainSteps);
            //PrepareData.CreateBatchCommand(Cfg.SrcLocale, Cfg.TgtLocale, cleanFolder, Cfg.TrainSteps);
            //PrepareData.CreateBatchCommand(Cfg.SrcLocale, Cfg.TgtLocale, randomFolder, Cfg.TrainSteps);
            yield return(chaosFolder);

            yield return(cleanFolder);

            yield return(randomFolder);

            yield return(tagFolder);
        }
Beispiel #2
0
        public static void MatchPairFiles()
        {
            var list = Cfg.UsedCorpora.SelectMany(x => MatchPairFilesByCorpus(x));

            PrepareData.SplitPairData(list, Cfg.WorkFolder, Cfg.SrcLocale, Cfg.TgtLocale, Cfg.SrcVocabSize, Cfg.TgtVocabSize, 5000, 5000, true);
        }