private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType) { if (wordLibraryList.Count == 0) { return; } if (wordLibraryList[0].CodeType == CodeType.NoCode && codeType == CodeType.UserDefinePhrase) { codeType = CodeType.Pinyin; } IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType); if (generater == null) //未知编码方式,则不进行编码。 { return; } countWord = wordLibraryList.Count; currentStatus = 0; Regex spaceRegex = new Regex("(?=[^a-zA-Z])\\s+"); Regex numberRegex = new Regex("[0-90-9]+"); Regex englishRegex = new Regex("[a-zA-Za-zA-Z]+"); Regex fullWidthRegex = new Regex("[\uff00-\uff5e]+"); // Regex fullWidthRegex = new Regex("[a-zA-Z0-9]+"); // Regex punctuationRegex = new Regex("[-・·&%']"); Regex punctuationRegex = new Regex("[\u0021-\u002f\u003a-\u0040\u005b-\u0060\u007b-\u008f\u00a0-\u00bf\u00d7\u00f7\u2000-\u2bff\u3000-\u303f\u30a0\u30fb\uff01-\uff0f\uff1a-\uff20\uff5b-\uff65]"); foreach (WordLibrary wordLibrary in wordLibraryList) { currentStatus++; processMessage = "生成目标编码:" + currentStatus + "/" + countWord; if (wordLibrary.CodeType == codeType) { continue; } if (wordLibrary.CodeType == CodeType.English) { wordLibrary.SetCode(CodeType.English, wordLibrary.Word.ToLower()); continue; } try { string word_0 = wordLibrary.Word; string word = wordLibrary.Word; if (FilterConfig.FullWidth && fullWidthRegex.IsMatch(word)) { char[] c = word.ToCharArray(); for (int i = 0; i < c.Length; i++) { if (c[i] <= 0xff5e && c[i] >= 0xff00) { c[i] = (char)(c[i] - 65248); } } word = new String(c); } if (FilterConfig.KeepNumber_) { word = numberRegex.Replace(word, ""); } if (FilterConfig.KeepEnglish_) { word = englishRegex.Replace(word, ""); } if (FilterConfig.KeepSpace_) { if (FilterConfig.KeepSpace == false) { word = word.Replace(" ", ""); } else { word = spaceRegex.Replace(word, ""); } } if (FilterConfig.KeepPunctuation_) { word = punctuationRegex.Replace(word, ""); } if (FilterConfig.ChsNumber) { word = TranslateChineseNumber(word); } if ((englishRegex.IsMatch(word) && FilterConfig.KeepEnglish) || (numberRegex.IsMatch(word) && FilterConfig.KeepNumber) || (punctuationRegex.IsMatch(word) && FilterConfig.KeepPunctuation)) { StringBuilder input = new StringBuilder(); List <IList <string> > output = new List <IList <string> >(); int clipType = -1; int type = 0; foreach (char c in word) { if (c >= 0x30 && c <= 0x39) { type = 1; } else if (c >= 0x41 && c <= 0x5a) { type = 2; } else if (c >= 0x61 && c <= 0x7a) { type = 2; } else if (c == 0x20 && FilterConfig.KeepSpace && clipType == 2) { type = 2; } else if ("-・&%'".Contains(c)) { type = 3; } else if (punctuationRegex.IsMatch(c.ToString())) { type = 3; } else { type = 0; } if (input.Length < 1) { clipType = type; input.Append(c); } else if (type == clipType) { input.Append(c); } else { if (clipType == 2 && FilterConfig.KeepEnglish) { if (FilterConfig.needEnglishTag()) { output.Add(new List <string> { '_' + input.ToString() }); } else { output.Add(new List <string> { input.ToString() }); } } else if ((clipType == 1 && FilterConfig.KeepNumber) || (clipType == 3 && FilterConfig.KeepPunctuation)) { output.Add(new List <string> { input.ToString() }); } else { wordLibrary.Word = input.ToString(); wordLibrary.CodeType = CodeType.NoCode; generater.GetCodeOfWordLibrary(wordLibrary); output.AddRange(wordLibrary.Codes); } input.Clear(); input.Append(c); clipType = type; } } if (input.Length > 0) { if (clipType == 2 && FilterConfig.KeepEnglish) { if (FilterConfig.needEnglishTag()) { output.Add(new List <string> { '_' + input.ToString() }); } else { output.Add(new List <string> { input.ToString() }); } } else if ((clipType == 1 && FilterConfig.KeepNumber) || (clipType == 3 && FilterConfig.KeepPunctuation)) { output.Add(new List <string> { input.ToString() }); } else { wordLibrary.Word = input.ToString(); wordLibrary.CodeType = CodeType.NoCode; generater.GetCodeOfWordLibrary(wordLibrary); output.AddRange(wordLibrary.Codes); } } wordLibrary.Word = word_0; wordLibrary.Codes = new Code(output); } else { if (word.Equals(word_0)) { generater.GetCodeOfWordLibrary(wordLibrary); } else { wordLibrary.Word = word; generater.GetCodeOfWordLibrary(wordLibrary); wordLibrary.Word = word_0; } } } catch (Exception ex) { Debug.WriteLine("生成编码失败" + ex.Message); } if (codeType != CodeType.Unknown) { wordLibrary.CodeType = codeType; } } }