private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType) { var generater = CodeTypeHelper.GetGenerater(codeType); if (generater == null)//未知编码方式,则不进行编码。 { return; } countWord = wordLibraryList.Count; currentStatus = 0; foreach (WordLibrary wordLibrary in wordLibraryList) { currentStatus++; processMessage = "生成目标编码:" + currentStatus + "/" + countWord; if (wordLibrary.CodeType == codeType) { continue; } if (generater.IsBaseOnOldCode) { wordLibrary.SetCode(codeType, generater.GetCodeOfWordLibrary(wordLibrary)); } else { wordLibrary.SetCode(codeType, generater.GetCodeOfString(wordLibrary.Word)); } } }
private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType) { IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType); if (generater == null) //未知编码方式,则不进行编码。 { return; } countWord = wordLibraryList.Count; currentStatus = 0; foreach (WordLibrary wordLibrary in wordLibraryList) { currentStatus++; processMessage = "生成目标编码:" + currentStatus + "/" + countWord; if (wordLibrary.CodeType == codeType) { continue; } try { generater.GetCodeOfWordLibrary(wordLibrary); } catch (Exception ex) { Debug.WriteLine("生成编码失败" + ex.Message); } if (codeType != CodeType.Unknown) { wordLibrary.CodeType = codeType; } } }
public string ExportLine(WordLibrary wl) { var sb = new StringBuilder(); if (codeGenerater == null) { codeGenerater = CodeTypeHelper.GetGenerater(CodeType); } if (codeGenerater.Is1CharMutiCode) { IList <string> codes = codeGenerater.GetCodeOfString(wl.Word); int i = 0; foreach (string code in codes) { sb.Append(wl.Word); sb.Append("\t"); sb.Append(code); sb.Append("\t"); sb.Append(wl.Count); i++; if (i != codes.Count) { sb.Append("\r\n"); } } } else { sb.Append(wl.Word); sb.Append("\t"); if (CodeType == CodeType.Pinyin || CodeType == CodeType.TerraPinyin) { sb.Append(wl.GetPinYinString(" ", BuildType.None)); } else if (CodeType == wl.CodeType) { sb.Append(wl.Codes[0][0]); } else { if (codeGenerater.Is1Char1Code) { sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word), " ")); } else { sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word))); } } sb.Append("\t"); sb.Append(wl.Count); } return(sb.ToString()); }
public string Export(WordLibraryList wlList) { codeGenerater = CodeTypeHelper.GetGenerater(CodeType); var sb = new StringBuilder(); for (int i = 0; i < wlList.Count; i++) { sb.Append(ExportLine(wlList[i])); sb.Append(lineSplitString); } return(sb.ToString()); }
private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType) { if (wordLibraryList.Count == 0) { return; } if (wordLibraryList[0].CodeType == CodeType.NoCode && codeType == CodeType.UserDefinePhrase) { codeType = CodeType.Pinyin; } IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType); if (generater == null) //未知编码方式,则不进行编码。 { return; } countWord = wordLibraryList.Count; currentStatus = 0; foreach (WordLibrary wordLibrary in wordLibraryList) { currentStatus++; processMessage = "生成目标编码:" + currentStatus + "/" + countWord; if (wordLibrary.CodeType == codeType) { continue; } if (wordLibrary.CodeType == CodeType.English) { wordLibrary.SetCode(CodeType.English, wordLibrary.Word.ToLower()); continue; } try { generater.GetCodeOfWordLibrary(wordLibrary); } catch (Exception ex) { Debug.WriteLine("生成编码失败" + ex.Message); } if (codeType != CodeType.Unknown) { wordLibrary.CodeType = codeType; } } }
public IList <string> Export(WordLibraryList wlList) { codeGenerater = CodeTypeHelper.GetGenerater(CodeType); var sb = new StringBuilder(); for (int i = 0; i < wlList.Count; i++) { var line = ExportLine(wlList[i]); if (!string.IsNullOrEmpty(line)) { sb.Append(line); sb.Append(lineSplitString); } } return(new List <string>() { sb.ToString() }); }
public void Prepare() { codeGenerater = CodeTypeHelper.GetGenerater(this.UserDefiningPattern.CodeType); if (UserDefiningPattern.CodeType == CodeType.UserDefine) { if (string.IsNullOrEmpty(UserDefiningPattern.MappingTablePath)) { throw new Exception("未指定字符编码映射文件,无法对词库进行自定义编码的生成"); } IDictionary <char, IList <string> > dict = UserCodingHelper.GetCodingDict(UserDefiningPattern.MappingTablePath, UserDefiningPattern.TextEncoding); var g = codeGenerater as SelfDefiningCodeGenerater; g.MappingDictionary = dict; g.Is1Char1Code = UserDefiningPattern.IsPinyinFormat; g.MutiWordCodeFormat = UserDefiningPattern.MutiWordCodeFormat; } BuildLineFormat(); }
//private RimeConfigForm form; public string ExportLine(WordLibrary wl) { var sb = new StringBuilder(); if (this.CodeType == wl.CodeType && this.CodeType != CodeType.Pinyin && CodeType != CodeType.TerraPinyin) { return(wl.Word + "\t" + wl.Codes[0][0] + "\t" + wl.Rank); } if (codeGenerater == null) { codeGenerater = CodeTypeHelper.GetGenerater(CodeType); } try { codeGenerater.GetCodeOfWordLibrary(wl); } catch (Exception ex) { Debug.Fail(ex.Message); return(null); } if (codeGenerater.Is1CharMutiCode) { IList <string> codes = codeGenerater.GetCodeOfString(wl.Word).ToCodeString(" "); int i = 0; foreach (string code in codes) { sb.Append(wl.Word); sb.Append("\t"); sb.Append(code); sb.Append("\t"); sb.Append(wl.Rank); i++; if (i != codes.Count) { sb.Append(lineSplitString); } } } else { sb.Append(wl.Word); sb.Append("\t"); if (CodeType == CodeType.Pinyin || CodeType == CodeType.TerraPinyin) { sb.Append(wl.GetPinYinString(" ", BuildType.None)); } else if (CodeType == wl.CodeType) { sb.Append(wl.Codes[0][0]); } else { sb.Append(wl.Codes.ToCodeString(" ")[0]); } sb.Append("\t"); sb.Append(wl.Rank); } return(sb.ToString()); }
private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType) { if (wordLibraryList.Count == 0) { return; } if (wordLibraryList[0].CodeType == CodeType.NoCode && codeType == CodeType.UserDefinePhrase) { codeType = CodeType.Pinyin; } IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType); if (generater == null) //未知编码方式,则不进行编码。 { return; } countWord = wordLibraryList.Count; currentStatus = 0; Regex spaceRegex = new Regex("(?=[^a-zA-Z])\\s+"); Regex numberRegex = new Regex("[0-90-9]+"); Regex englishRegex = new Regex("[a-zA-Za-zA-Z]+"); Regex fullWidthRegex = new Regex("[\uff00-\uff5e]+"); // Regex fullWidthRegex = new Regex("[a-zA-Z0-9]+"); // Regex punctuationRegex = new Regex("[-・·&%']"); Regex punctuationRegex = new Regex("[\u0021-\u002f\u003a-\u0040\u005b-\u0060\u007b-\u008f\u00a0-\u00bf\u00d7\u00f7\u2000-\u2bff\u3000-\u303f\u30a0\u30fb\uff01-\uff0f\uff1a-\uff20\uff5b-\uff65]"); foreach (WordLibrary wordLibrary in wordLibraryList) { currentStatus++; processMessage = "生成目标编码:" + currentStatus + "/" + countWord; if (wordLibrary.CodeType == codeType) { continue; } if (wordLibrary.CodeType == CodeType.English) { wordLibrary.SetCode(CodeType.English, wordLibrary.Word.ToLower()); continue; } try { string word_0 = wordLibrary.Word; string word = wordLibrary.Word; if (FilterConfig.FullWidth && fullWidthRegex.IsMatch(word)) { char[] c = word.ToCharArray(); for (int i = 0; i < c.Length; i++) { if (c[i] <= 0xff5e && c[i] >= 0xff00) { c[i] = (char)(c[i] - 65248); } } word = new String(c); } if (FilterConfig.KeepNumber_) { word = numberRegex.Replace(word, ""); } if (FilterConfig.KeepEnglish_) { word = englishRegex.Replace(word, ""); } if (FilterConfig.KeepSpace_) { if (FilterConfig.KeepSpace == false) { word = word.Replace(" ", ""); } else { word = spaceRegex.Replace(word, ""); } } if (FilterConfig.KeepPunctuation_) { word = punctuationRegex.Replace(word, ""); } if (FilterConfig.ChsNumber) { word = TranslateChineseNumber(word); } if ((englishRegex.IsMatch(word) && FilterConfig.KeepEnglish) || (numberRegex.IsMatch(word) && FilterConfig.KeepNumber) || (punctuationRegex.IsMatch(word) && FilterConfig.KeepPunctuation)) { StringBuilder input = new StringBuilder(); List <IList <string> > output = new List <IList <string> >(); int clipType = -1; int type = 0; foreach (char c in word) { if (c >= 0x30 && c <= 0x39) { type = 1; } else if (c >= 0x41 && c <= 0x5a) { type = 2; } else if (c >= 0x61 && c <= 0x7a) { type = 2; } else if (c == 0x20 && FilterConfig.KeepSpace && clipType == 2) { type = 2; } else if ("-・&%'".Contains(c)) { type = 3; } else if (punctuationRegex.IsMatch(c.ToString())) { type = 3; } else { type = 0; } if (input.Length < 1) { clipType = type; input.Append(c); } else if (type == clipType) { input.Append(c); } else { if (clipType == 2 && FilterConfig.KeepEnglish) { if (FilterConfig.needEnglishTag()) { output.Add(new List <string> { '_' + input.ToString() }); } else { output.Add(new List <string> { input.ToString() }); } } else if ((clipType == 1 && FilterConfig.KeepNumber) || (clipType == 3 && FilterConfig.KeepPunctuation)) { output.Add(new List <string> { input.ToString() }); } else { wordLibrary.Word = input.ToString(); wordLibrary.CodeType = CodeType.NoCode; generater.GetCodeOfWordLibrary(wordLibrary); output.AddRange(wordLibrary.Codes); } input.Clear(); input.Append(c); clipType = type; } } if (input.Length > 0) { if (clipType == 2 && FilterConfig.KeepEnglish) { if (FilterConfig.needEnglishTag()) { output.Add(new List <string> { '_' + input.ToString() }); } else { output.Add(new List <string> { input.ToString() }); } } else if ((clipType == 1 && FilterConfig.KeepNumber) || (clipType == 3 && FilterConfig.KeepPunctuation)) { output.Add(new List <string> { input.ToString() }); } else { wordLibrary.Word = input.ToString(); wordLibrary.CodeType = CodeType.NoCode; generater.GetCodeOfWordLibrary(wordLibrary); output.AddRange(wordLibrary.Codes); } } wordLibrary.Word = word_0; wordLibrary.Codes = new Code(output); } else { if (word.Equals(word_0)) { generater.GetCodeOfWordLibrary(wordLibrary); } else { wordLibrary.Word = word; generater.GetCodeOfWordLibrary(wordLibrary); wordLibrary.Word = word_0; } } } catch (Exception ex) { Debug.WriteLine("生成编码失败" + ex.Message); } if (codeType != CodeType.Unknown) { wordLibrary.CodeType = codeType; } } }