private string[] mergeSourceFiles(string[] sourceFiles, int mergeOption, out string[] mergedFileNames) { string name = CharsetDetector.GuessCharsetOfFile(sourceFiles[0]); List <string> list = new List <string>(); List <string> list2 = new List <string>(); StringBuilder stringBuilder = new StringBuilder(); StringBuilder stringBuilder2 = new StringBuilder(); bool @checked = changeFileNameCheckBox.Checked; int num = sourceFiles.Length; bool needMarkChapterHeaders = outputTypeComboBox.SelectedIndex == 2 || outputTypeComboBox.SelectedIndex == 4 || outputTypeComboBox.SelectedIndex == 6; for (int i = 0; i < num; i += mergeOption) { stringBuilder.Length = 0; stringBuilder2.Length = 0; int num2 = 0; while (num2 < mergeOption && sourceFiles.Length > i + num2) { stringBuilder.Append(Util.NormalizeTextAndRemoveIgnoredChinesePhrases(readFile(sourceFiles[i + num2], Encoding.GetEncoding(name), needMarkChapterHeaders))).Append("\n\n----------oOo----------\n\n"); if (num2 == 0) { stringBuilder2.Append(getOutputFileName(sourceFiles[i + num2], i + num2, num, @checked)); } else if (mergeOption != 1 && (num2 == mergeOption - 1 || i + num2 == sourceFiles.Length - 1)) { stringBuilder2.Append(" - ").Append(getOutputFileName(sourceFiles[i + num2], i + num2, num, @checked)); } num2++; } list.Add(stringBuilder.ToString()); list2.Add(stringBuilder2.ToString()); } mergedFileNames = list2.ToArray(); return(list.ToArray()); }
private string[] LoadFilterRules(string rulePath) { var charset = CharsetDetector.GuessCharsetOfFile(rulePath); if (charset == "GB2312") { charset = "UTF-8"; } return(File.ReadAllLines(rulePath, Encoding.GetEncoding(charset))); }
private Dictionary <string, string> LoadDictionaryAndGetFirstMeaningOfEach(string dictPath) { var dict = new Dictionary <string, string>(); var charSet = CharsetDetector.GuessCharsetOfFile(dictPath); using var textReader = new StreamReader(dictPath, Encoding.GetEncoding(charSet)); foreach (var line in textReader.Lines()) { var tuple = line.Split('='); if (tuple.Length == 2 && !dict.ContainsKey(tuple[0])) { dict.Add(tuple[0], tuple[1].Split('/', '|')[0]); } } return(dict); }
string ReadAllFiles(string[] files) { if (files.Length < 1) { return(string.Empty); } string name = CharsetDetector.GuessCharsetOfFile(files[0]); Encoding encoding = Encoding.GetEncoding(name); StringBuilder stringBuilder = new StringBuilder(); foreach (string filePath in files) { stringBuilder.AppendLine(ReadFile(filePath, encoding, false)); } return(stringBuilder.ToString()); }
private Dictionary <string, string> LoadDictionary(string dictPath) { var dict = new Dictionary <string, string>(); var charset = CharsetDetector.GuessCharsetOfFile(dictPath); using (var textReader = new StreamReader(dictPath, Encoding.GetEncoding(charset))) { string line; while ((line = textReader.ReadLine()) != null) { var tuple = line.Split('='); if (tuple.Length == 2 && !dict.ContainsKey(tuple[0])) { dict.Add(tuple[0], tuple[1]); } } } return(dict); }
private Dictionary <string, string> LoadDictionary(string dictPath) { var dict = new Dictionary <string, string>(); var charset = CharsetDetector.GuessCharsetOfFile(dictPath); // TODO: explain this if (charset == "GB2312") { charset = "UTF-8"; } using var textReader = new StreamReader(dictPath, Encoding.GetEncoding(charset)); foreach (var line in textReader.Lines()) { var tuple = line.Split('='); if (tuple.Length == 2 && !dict.ContainsKey(tuple[0])) { dict.Add(tuple[0], tuple[1]); } } return(dict); }
private void RunButton_Clicked(object sender, EventArgs e) { if (!File.Exists(txtInputFilePath.Text)) { MessageBox.Show("Đường dẫn đến file nguồn không đúng!", "Error", MessageBoxButtons.OK, MessageBoxIcon.Hand); btnSelectInputFilePath.Focus(); return; } if (string.IsNullOrEmpty(txtOutputDirPath.Text)) { MessageBox.Show("Nhập thư mục chứa kết quả!", "Error", MessageBoxButtons.OK, MessageBoxIcon.Hand); btnSelectOutputDirPath.Focus(); return; } Directory.CreateDirectory(txtOutputDirPath.Text); if (radSplitIntoChunks.Checked) { var inputFile = new FileStream(txtInputFilePath.Text, FileMode.Open, FileAccess.Read); var inputFileSize = inputFile.Length; var nChunks = numChunks.Value; int chunkSize = (int)Math.Ceiling(inputFileSize / nChunks); int amountStrWidth = nChunks.ToString().Length; for (int i = 0; i < nChunks; i++) { var chunk = new byte[chunkSize]; int nByteRead; if ((nByteRead = inputFile.Read(chunk, 0, chunkSize)) > 0) { var outputFilePath = Path.Combine( txtOutputDirPath.Text, i.ToString().PadLeft(amountStrWidth, '0') + ".txt"); var outputFile = new FileStream(outputFilePath, FileMode.OpenOrCreate, FileAccess.Write); outputFile.Write(chunk, 0, nByteRead); outputFile.Close(); } } inputFile.Close(); } else { var charset = CharsetDetector.GuessCharsetOfFile(txtInputFilePath.Text); var inputLines = File.ReadAllLines(txtInputFilePath.Text, Encoding.GetEncoding(charset)); int amountStrWidth = 4; var stringBuilder = new StringBuilder(); var pattern = radSplitIntoChapters.Checked ? DefaultPattern : chkUseRegex.Checked ? txtSeparatorToken.Text : Regex.Escape(txtSeparatorToken.Text); var regex = new Regex(pattern, RegexOptions.Compiled); int i = 0; foreach (var line in inputLines) { // normal line if (stringBuilder.Length == 0 || !regex.IsMatch(line)) { stringBuilder.AppendLine(line); continue; } // separator line if (radSplitBySeparatorToken.Checked) { stringBuilder.AppendLine(line); } var outputFilePath = Path.Combine(txtOutputDirPath.Text, i.ToString().PadLeft(amountStrWidth, '0') + ".txt"); File.WriteAllText(outputFilePath, stringBuilder.ToString(), Encoding.GetEncoding(charset)); stringBuilder.Length = 0; if (!radSplitBySeparatorToken.Checked) { stringBuilder.AppendLine(line); } i++; } if (stringBuilder.Length > 0) { var outputFilePath = Path.Combine(txtOutputDirPath.Text, i.ToString().PadLeft(amountStrWidth, '0') + ".txt"); File.WriteAllText(outputFilePath, stringBuilder.ToString(), Encoding.GetEncoding(charset)); } } MessageBox.Show("Xong!!!", Text, MessageBoxButtons.OK, MessageBoxIcon.Asterisk); }