Ejemplo n.º 1
0
        private string[] mergeSourceFiles(string[] sourceFiles, int mergeOption, out string[] mergedFileNames)
        {
            string        name                   = CharsetDetector.GuessCharsetOfFile(sourceFiles[0]);
            List <string> list                   = new List <string>();
            List <string> list2                  = new List <string>();
            StringBuilder stringBuilder          = new StringBuilder();
            StringBuilder stringBuilder2         = new StringBuilder();
            bool          @checked               = changeFileNameCheckBox.Checked;
            int           num                    = sourceFiles.Length;
            bool          needMarkChapterHeaders = outputTypeComboBox.SelectedIndex == 2 || outputTypeComboBox.SelectedIndex == 4 || outputTypeComboBox.SelectedIndex == 6;

            for (int i = 0; i < num; i += mergeOption)
            {
                stringBuilder.Length  = 0;
                stringBuilder2.Length = 0;
                int num2 = 0;
                while (num2 < mergeOption && sourceFiles.Length > i + num2)
                {
                    stringBuilder.Append(Util.NormalizeTextAndRemoveIgnoredChinesePhrases(readFile(sourceFiles[i + num2], Encoding.GetEncoding(name), needMarkChapterHeaders))).Append("\n\n----------oOo----------\n\n");
                    if (num2 == 0)
                    {
                        stringBuilder2.Append(getOutputFileName(sourceFiles[i + num2], i + num2, num, @checked));
                    }
                    else if (mergeOption != 1 && (num2 == mergeOption - 1 || i + num2 == sourceFiles.Length - 1))
                    {
                        stringBuilder2.Append(" - ").Append(getOutputFileName(sourceFiles[i + num2], i + num2, num, @checked));
                    }
                    num2++;
                }
                list.Add(stringBuilder.ToString());
                list2.Add(stringBuilder2.ToString());
            }
            mergedFileNames = list2.ToArray();
            return(list.ToArray());
        }
Ejemplo n.º 2
0
        private string[] LoadFilterRules(string rulePath)
        {
            var charset = CharsetDetector.GuessCharsetOfFile(rulePath);

            if (charset == "GB2312")
            {
                charset = "UTF-8";
            }
            return(File.ReadAllLines(rulePath, Encoding.GetEncoding(charset)));
        }
Ejemplo n.º 3
0
        private Dictionary <string, string> LoadDictionaryAndGetFirstMeaningOfEach(string dictPath)
        {
            var dict    = new Dictionary <string, string>();
            var charSet = CharsetDetector.GuessCharsetOfFile(dictPath);

            using var textReader = new StreamReader(dictPath, Encoding.GetEncoding(charSet));
            foreach (var line in textReader.Lines())
            {
                var tuple = line.Split('=');
                if (tuple.Length == 2 && !dict.ContainsKey(tuple[0]))
                {
                    dict.Add(tuple[0], tuple[1].Split('/', '|')[0]);
                }
            }
            return(dict);
        }
Ejemplo n.º 4
0
        string ReadAllFiles(string[] files)
        {
            if (files.Length < 1)
            {
                return(string.Empty);
            }
            string        name          = CharsetDetector.GuessCharsetOfFile(files[0]);
            Encoding      encoding      = Encoding.GetEncoding(name);
            StringBuilder stringBuilder = new StringBuilder();

            foreach (string filePath in files)
            {
                stringBuilder.AppendLine(ReadFile(filePath, encoding, false));
            }
            return(stringBuilder.ToString());
        }
Ejemplo n.º 5
0
        private Dictionary <string, string> LoadDictionary(string dictPath)
        {
            var dict    = new Dictionary <string, string>();
            var charset = CharsetDetector.GuessCharsetOfFile(dictPath);

            using (var textReader = new StreamReader(dictPath, Encoding.GetEncoding(charset)))
            {
                string line;
                while ((line = textReader.ReadLine()) != null)
                {
                    var tuple = line.Split('=');
                    if (tuple.Length == 2 && !dict.ContainsKey(tuple[0]))
                    {
                        dict.Add(tuple[0], tuple[1]);
                    }
                }
            }
            return(dict);
        }
Ejemplo n.º 6
0
        private Dictionary <string, string> LoadDictionary(string dictPath)
        {
            var dict    = new Dictionary <string, string>();
            var charset = CharsetDetector.GuessCharsetOfFile(dictPath);

            // TODO: explain this
            if (charset == "GB2312")
            {
                charset = "UTF-8";
            }

            using var textReader = new StreamReader(dictPath, Encoding.GetEncoding(charset));
            foreach (var line in textReader.Lines())
            {
                var tuple = line.Split('=');
                if (tuple.Length == 2 && !dict.ContainsKey(tuple[0]))
                {
                    dict.Add(tuple[0], tuple[1]);
                }
            }
            return(dict);
        }
Ejemplo n.º 7
0
        private void RunButton_Clicked(object sender, EventArgs e)
        {
            if (!File.Exists(txtInputFilePath.Text))
            {
                MessageBox.Show("Đường dẫn đến file nguồn không đúng!", "Error",
                                MessageBoxButtons.OK, MessageBoxIcon.Hand);
                btnSelectInputFilePath.Focus();
                return;
            }

            if (string.IsNullOrEmpty(txtOutputDirPath.Text))
            {
                MessageBox.Show("Nhập thư mục chứa kết quả!", "Error",
                                MessageBoxButtons.OK, MessageBoxIcon.Hand);
                btnSelectOutputDirPath.Focus();
                return;
            }

            Directory.CreateDirectory(txtOutputDirPath.Text);

            if (radSplitIntoChunks.Checked)
            {
                var inputFile     = new FileStream(txtInputFilePath.Text, FileMode.Open, FileAccess.Read);
                var inputFileSize = inputFile.Length;

                var nChunks   = numChunks.Value;
                int chunkSize = (int)Math.Ceiling(inputFileSize / nChunks);

                int amountStrWidth = nChunks.ToString().Length;

                for (int i = 0; i < nChunks; i++)
                {
                    var chunk = new byte[chunkSize];

                    int nByteRead;
                    if ((nByteRead = inputFile.Read(chunk, 0, chunkSize)) > 0)
                    {
                        var outputFilePath = Path.Combine(
                            txtOutputDirPath.Text, i.ToString().PadLeft(amountStrWidth, '0') + ".txt");

                        var outputFile = new FileStream(outputFilePath, FileMode.OpenOrCreate, FileAccess.Write);

                        outputFile.Write(chunk, 0, nByteRead);

                        outputFile.Close();
                    }
                }
                inputFile.Close();
            }
            else
            {
                var charset = CharsetDetector.GuessCharsetOfFile(txtInputFilePath.Text);

                var inputLines = File.ReadAllLines(txtInputFilePath.Text, Encoding.GetEncoding(charset));

                int amountStrWidth = 4;

                var stringBuilder = new StringBuilder();

                var pattern =
                    radSplitIntoChapters.Checked ? DefaultPattern :
                    chkUseRegex.Checked ? txtSeparatorToken.Text : Regex.Escape(txtSeparatorToken.Text);
                var regex = new Regex(pattern, RegexOptions.Compiled);

                int i = 0;
                foreach (var line in inputLines)
                {
                    // normal line
                    if (stringBuilder.Length == 0 || !regex.IsMatch(line))
                    {
                        stringBuilder.AppendLine(line);
                        continue;
                    }

                    // separator line
                    if (radSplitBySeparatorToken.Checked)
                    {
                        stringBuilder.AppendLine(line);
                    }

                    var outputFilePath = Path.Combine(txtOutputDirPath.Text,
                                                      i.ToString().PadLeft(amountStrWidth, '0') + ".txt");
                    File.WriteAllText(outputFilePath, stringBuilder.ToString(), Encoding.GetEncoding(charset));

                    stringBuilder.Length = 0;

                    if (!radSplitBySeparatorToken.Checked)
                    {
                        stringBuilder.AppendLine(line);
                    }

                    i++;
                }

                if (stringBuilder.Length > 0)
                {
                    var outputFilePath = Path.Combine(txtOutputDirPath.Text,
                                                      i.ToString().PadLeft(amountStrWidth, '0') + ".txt");
                    File.WriteAllText(outputFilePath, stringBuilder.ToString(), Encoding.GetEncoding(charset));
                }
            }
            MessageBox.Show("Xong!!!", Text, MessageBoxButtons.OK, MessageBoxIcon.Asterisk);
        }