private void Cleanup() { var list = Directory.EnumerateFiles(Cfg.NonlabeledFolder).SelectMany(x => File.ReadLines(x)); string rawPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.raw"); string preProcessPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.pre"); string wbrPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.wbr"); string postProcessPath = Path.Combine(Cfg.NonSupTextFolder, $"{Cfg.BatchName}.{Cfg.Locale}.txt"); DataProcessing dataProcessing = new DataProcessing(Cfg); File.WriteAllLines(rawPath, list); dataProcessing.PreProcessFile(rawPath, preProcessPath); dataProcessing.WordBreakFile(preProcessPath, wbrPath); dataProcessing.PostProcessFile(wbrPath, postProcessPath); }
private string Cleanup(string inputFolder, string type) { var rawList = Directory.EnumerateFiles(inputFolder).SelectMany(x => File.ReadLines(x)); string rawPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{type}.{Cfg.Locale}.raw"); string preProcessPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{type}.{Cfg.Locale}.pre"); string wbrPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{type}.{Cfg.Locale}.wbr"); string postProcessPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{type}.{Cfg.Locale}.post"); DataProcessing dataProcessing = new DataProcessing(Cfg); File.WriteAllLines(rawPath, rawList); dataProcessing.PreProcessFile(rawPath, preProcessPath); dataProcessing.WordBreakFile(preProcessPath, wbrPath); dataProcessing.PostProcessFile(wbrPath, postProcessPath); return(postProcessPath); }
public void Run() { DataProcessing dataProcessing = new DataProcessing(Cfg); string prePath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.pre"); string wbrPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.wbr"); string postPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.post"); dataProcessing.PreProcessFile(Cfg.SimilarityTestPath, prePath); dataProcessing.WordBreakFile(prePath, wbrPath); dataProcessing.PostProcessFile(wbrPath, postPath); PrepareSimilarity(); var resultList = File.ReadLines(postPath).SelectMany(x => ProcessSingleLine(x)); File.WriteAllLines(Cfg.SimDetailPath, resultList); File.WriteAllLines(Cfg.SimBriefPath, BriefList); }
public void Run() { DataProcessing dataProcessing = new DataProcessing(Cfg); Digitalize digitalize = new Digitalize(Cfg); string prePath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.pre"); string wbrPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.wbr"); string postPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.post"); string dataPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.data"); string resultPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.result"); dataProcessing.PreProcessFile(Cfg.PredictTestPath, prePath); dataProcessing.WordBreakFile(prePath, wbrPath); dataProcessing.PostProcessFile(wbrPath, postPath); digitalize.Run(postPath, dataPath); RunPredict(dataPath, resultPath); var list = File.ReadLines(resultPath).Zip(File.ReadLines(postPath), (x, y) => x + "\t" + y); File.WriteAllLines(Cfg.PredictResultPath, list); }