Example #1
0
        private void Cleanup()
        {
            var            list            = Directory.EnumerateFiles(Cfg.NonlabeledFolder).SelectMany(x => File.ReadLines(x));
            string         rawPath         = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.raw");
            string         preProcessPath  = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.pre");
            string         wbrPath         = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.wbr");
            string         postProcessPath = Path.Combine(Cfg.NonSupTextFolder, $"{Cfg.BatchName}.{Cfg.Locale}.txt");
            DataProcessing dataProcessing  = new DataProcessing(Cfg);

            File.WriteAllLines(rawPath, list);
            dataProcessing.PreProcessFile(rawPath, preProcessPath);
            dataProcessing.WordBreakFile(preProcessPath, wbrPath);
            dataProcessing.PostProcessFile(wbrPath, postProcessPath);
        }
Example #2
0
        private string Cleanup(string inputFolder, string type)
        {
            var            rawList         = Directory.EnumerateFiles(inputFolder).SelectMany(x => File.ReadLines(x));
            string         rawPath         = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{type}.{Cfg.Locale}.raw");
            string         preProcessPath  = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{type}.{Cfg.Locale}.pre");
            string         wbrPath         = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{type}.{Cfg.Locale}.wbr");
            string         postProcessPath = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{type}.{Cfg.Locale}.post");
            DataProcessing dataProcessing  = new DataProcessing(Cfg);

            File.WriteAllLines(rawPath, rawList);
            dataProcessing.PreProcessFile(rawPath, preProcessPath);
            dataProcessing.WordBreakFile(preProcessPath, wbrPath);
            dataProcessing.PostProcessFile(wbrPath, postProcessPath);
            return(postProcessPath);
        }
Example #3
0
        public void Run()
        {
            DataProcessing dataProcessing = new DataProcessing(Cfg);
            string         prePath        = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.pre");
            string         wbrPath        = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.wbr");
            string         postPath       = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.post");

            dataProcessing.PreProcessFile(Cfg.SimilarityTestPath, prePath);
            dataProcessing.WordBreakFile(prePath, wbrPath);
            dataProcessing.PostProcessFile(wbrPath, postPath);

            PrepareSimilarity();

            var resultList = File.ReadLines(postPath).SelectMany(x => ProcessSingleLine(x));

            File.WriteAllLines(Cfg.SimDetailPath, resultList);
            File.WriteAllLines(Cfg.SimBriefPath, BriefList);
        }
        public void Run()
        {
            DataProcessing dataProcessing = new DataProcessing(Cfg);
            Digitalize     digitalize     = new Digitalize(Cfg);
            string         prePath        = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.pre");
            string         wbrPath        = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.wbr");
            string         postPath       = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.post");
            string         dataPath       = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.data");
            string         resultPath     = Path.Combine(Cfg.TmpFolder, $"{TmpName}.{Cfg.Locale}.result");

            dataProcessing.PreProcessFile(Cfg.PredictTestPath, prePath);
            dataProcessing.WordBreakFile(prePath, wbrPath);
            dataProcessing.PostProcessFile(wbrPath, postPath);
            digitalize.Run(postPath, dataPath);

            RunPredict(dataPath, resultPath);
            var list = File.ReadLines(resultPath).Zip(File.ReadLines(postPath), (x, y) => x + "\t" + y);

            File.WriteAllLines(Cfg.PredictResultPath, list);
        }