private IEnumerable <string> PrepareExpSetFromRawTags(string beforeTagPath, string afterTagPath, string enuPath, string allFolder, string expRootFolder, string tag) { // Suppose we've already had the TAGGed files string tagFolder = Path.Combine(expRootFolder, "Tag"); string cleanFolder = Path.Combine(expRootFolder, "Clean"); string randomFolder = Path.Combine(expRootFolder, "Random"); string chaosFolder = Path.Combine(expRootFolder, "Chaos"); // Create all.zh and all.en, where all files are all valid files. var pairs = PrepareData.CreateAllFiles(beforeTagPath, afterTagPath, enuPath, allFolder, "zh", "en"); var list = Common.ReadPairs(pairs.Item1, pairs.Item2); PrepareData.SplitPairData(list, tagFolder, Cfg.SrcLocale, Cfg.TgtLocale, Cfg.SrcVocabSize, Cfg.TgtVocabSize, 5000, 5000, true); PrepareData.FromTagToClean(tagFolder, cleanFolder, Cfg.SrcLocale, Cfg.TgtLocale); PrepareData.SetTagRatio(pairs.Item1); PrepareData.SetTag(tag); PrepareData.FromCleanToRandomTag(cleanFolder, randomFolder, Cfg.SrcLocale, Cfg.TgtLocale); PrepareData.FromCleanToChaosTag(cleanFolder, chaosFolder, Cfg.SrcLocale, Cfg.TgtLocale); //PrepareData.CreateBatchCommand(Cfg.SrcLocale, Cfg.TgtLocale, tagFolder, Cfg.TrainSteps); //PrepareData.CreateBatchCommand(Cfg.SrcLocale, Cfg.TgtLocale, cleanFolder, Cfg.TrainSteps); //PrepareData.CreateBatchCommand(Cfg.SrcLocale, Cfg.TgtLocale, randomFolder, Cfg.TrainSteps); yield return(chaosFolder); yield return(cleanFolder); yield return(randomFolder); yield return(tagFolder); }
public static void MatchPairFiles() { var list = Cfg.UsedCorpora.SelectMany(x => MatchPairFilesByCorpus(x)); PrepareData.SplitPairData(list, Cfg.WorkFolder, Cfg.SrcLocale, Cfg.TgtLocale, Cfg.SrcVocabSize, Cfg.TgtVocabSize, 5000, 5000, true); }