//没有什么思路,接下来的代码写得乱七八糟的,但是好像还是对的。zengyi20101114 public string BuildWLString(WordLibrary wl) { string py = "", cp = ""; var sb = new StringBuilder(); if (ContainPinyin) { CodingString(wl, Factory); py = wl.GetPinYinString(PinyinSplitString, PinyinSplitType); } if (ContainCipin) { cp = wl.Count.ToString(); } var dic = new Dictionary<int, string>(); dic.Add(Sort[0], py); dic.Add(Sort[1], wl.Word); dic.Add(Sort[2], cp); var newSort = new List<int>(Sort); newSort.Sort(); foreach (int x in newSort) { if (dic[x] != "") { sb.Append(dic[x] + SplitString); } } string str = sb.ToString(); return str.Substring(0, str.LastIndexOf(SplitString)); }
public ParsePattern() { Sort = new List<int> {1, 2, 3}; sample = new WordLibrary(); sample.Count = 1234; sample.Word = "深蓝词库转换"; sample.PinYin = new[] {"shen", "lan", "ci", "ku", "zhuan", "huan"}; Factory = new WordPinyinGenerater(); }
public ParsePattern() { Sort = new List<int> {1, 2, 3}; sample = new WordLibrary(); sample.Count = 1234; sample.Word = "深蓝词库转换"; sample.PinYin = new[] {"shen", "lan", "ci", "ku", "zhuan", "huan"}; IsPinyinFormat = true; }
private void btnTest_Click(object sender, EventArgs e) { var lines = rtbFrom.Text.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); StringBuilder sb = new StringBuilder(); foreach (var line in lines) { WordLibrary wl = new WordLibrary() { Word = line.Trim(), Count = 1234 }; GenerateCode(wl); sb.Append(SelectedParsePattern.BuildWlString(wl) + "\r\n"); } rtbTo.Text = sb.ToString(); }
public WordLibraryList Import(string str) { WordLibraryList wlList = new WordLibraryList(); var lines = str.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < lines.Length; i++) { string line = lines[i]; var c = line.Split('\t'); WordLibrary wl = new WordLibrary(); wl.Word = c[0]; wl.Count = Convert.ToInt32(c[1]); wl.PinYin = c[2].Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); wlList.Add(wl); } return wlList; }
public WordLibraryList Import(string str) { WordLibraryList wlList = new WordLibraryList(); var lines = str.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < lines.Length; i++) { string line = lines[i]; var c = line.Split('\t'); WordLibrary wl = new WordLibrary(); wl.Word = c[0]; wl.Count = Convert.ToInt32(c[1]); wl.PinYin = c[2].Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); wlList.Add(wl); } return(wlList); }
private void GenerateCode(WordLibrary wl) { var word = wl.Word; if (SelectedParsePattern.IsPinyin && SelectedParsePattern.IsPinyinFormat) { var py = pyFactory.GetCodeOfString(word, SelectedParsePattern.CodeSplitString); wl.PinYin = CollectionHelper.ToArray(py); } else { if (!string.IsNullOrEmpty(SelectedParsePattern.MappingTablePath)) { SelectedParsePattern.MappingTable = UserCodingHelper.GetCodingDict(SelectedParsePattern.MappingTablePath); } selfFactory.MappingDictionary = SelectedParsePattern.MappingTable; selfFactory.Is1Char1Code = SelectedParsePattern.IsPinyinFormat; selfFactory.MutiWordCodeFormat = SelectedParsePattern.MutiWordCodeFormat; wl.SetCode(CodeType.UserDefine, selfFactory.GetCodeOfString(word, SelectedParsePattern.CodeSplitString)); } }
private void btnParse_Click(object sender, EventArgs e) { if (SelectedParsePattern == null) { MessageBox.Show("请点击右上角按钮选择匹配规则"); return; } rtbTo.Clear(); try { string[] fromList = rtbFrom.Text.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); foreach (string str in fromList) { string s = str.Trim(); WordLibrary wl = SelectedParsePattern.BuildWordLibrary(s); rtbTo.AppendText(wl.ToDisplayString() + "\r\n"); } } catch { MessageBox.Show("无法识别源内容,请确认源内容与自定义规则匹配!"); } }
private void btnConvertTest_Click(object sender, EventArgs e) { if (SelectedParsePattern == null) { MessageBox.Show("请点击右上角按钮选择匹配规则"); return; } IWordCodeGenerater factory = null; if (string.IsNullOrEmpty(txbFilePath.Text)) { factory = new WordPinyinGenerater(); } else { factory = new SelfDefiningCodeGenerater(); UserCodingHelper.FilePath = txbFilePath.Text; } SelectedParsePattern.Factory = factory; rtbTo.Clear(); string[] fromList = rtbFrom.Text.Split(new[] {'\r', '\n'}, StringSplitOptions.RemoveEmptyEntries); foreach (string str in fromList) { string s = str.Trim(); var wl = new WordLibrary {Word = s}; string result = SelectedParsePattern.BuildWLString(wl); rtbTo.AppendText(result + "\r\n"); } }
public void CodingString(WordLibrary wl, IWordCodeGenerater factory) { var codes = new List<string>(); foreach (char c in wl.Word) { string code = factory.GetCodeOfChar(c); codes.Add(code); } wl.PinYin = codes.ToArray(); }
public WordLibrary BuildWordLibrary(string line) { var wl = new WordLibrary(); string[] strlist = line.Split(new[] {SplitString}, StringSplitOptions.RemoveEmptyEntries); var newSort = new List<int>(Sort); newSort.Sort(); int index1 = Sort.FindIndex(i => i == newSort[0]); //最小的一个 if (index1 == 0 && ContainPinyin) //第一个是拼音 { wl.PinYinString = strlist[0]; } if (index1 == 1) { wl.Word = strlist[0]; } if (index1 == 2 && ContainCipin) { wl.Count = Convert.ToInt32(strlist[0]); } if (strlist.Length > 1) { int index2 = Sort.FindIndex(i => i == newSort[1]); //中间的一个 if (index2 == 0 && ContainPinyin) //第一个是拼音 { wl.PinYinString = strlist[1]; } if (index2 == 1) { wl.Word = strlist[1]; } if (index2 == 2 && ContainCipin) { wl.Count = Convert.ToInt32(strlist[1]); } } if (strlist.Length > 2) { int index2 = Sort.FindIndex(i => i == newSort[2]); //最大的一个 if (index2 == 0 && ContainPinyin) //第一个是拼音 { wl.PinYinString = strlist[2]; } if (index2 == 1) { wl.Word = strlist[2]; } if (index2 == 2 && ContainCipin) { wl.Count = Convert.ToInt32(strlist[2]); } } wl.PinYin = wl.PinYinString.Split(new[] {PinyinSplitString}, StringSplitOptions.RemoveEmptyEntries); return wl; }
public string BuildWLStringSample() { var samFactory = new SelfDefiningCodeGenerater(); samFactory.MappingDictionary = new Dictionary<char, string>() { {'深', "shen"}, {'蓝', "lan"}, {'词', "ci"}, {'库', "ku"}, {'转', "zhuan"}, {'换', "huan"} }; var temp = selfFactory; selfFactory = samFactory; string word = ""; string result = ""; List<string> codes = new List<string>(); foreach (var c in sample.Word) { word += c; codes.Add(sample.PinYin[word.Length - 1]); var s = new WordLibrary(); s.Count = 1234; s.Word = word; s.PinYin = codes.ToArray(); result += BuildWLString(s) + "\r\n"; } selfFactory = temp; return result; }
//没有什么思路,接下来的代码写得乱七八糟的,但是好像还是对的。zengyi20101114 //如果wl中提供了拼音数组,而且自定义格式也是拼音格式,那么就只转换格式即可。 public string BuildWLString(WordLibrary wl) { string py = "", cp = ""; var sb = new StringBuilder(); if (ContainCode) { if (IsPinyinFormat) { py = wl.GetPinYinString(CodeSplitString, CodeSplitType); } else { selfFactory.MutiWordCodeFormat = MutiWordCodeFormat; py = selfFactory.GetCodeOfString(wl.Word)[0]; } } if (ContainRank) { cp = wl.Count.ToString(); } var dic = new Dictionary<int, string>(); dic.Add(Sort[0], py); dic.Add(Sort[1], wl.Word); dic.Add(Sort[2], cp); var newSort = new List<int>(Sort); newSort.Sort(); foreach (int x in newSort) { if (dic[x] != "") { sb.Append(dic[x] + SplitString); } } string str = sb.ToString(); return str.Substring(0, str.LastIndexOf(SplitString)); }
private bool IsKeep(WordLibrary wordLibrary) { foreach (ISingleFilter filter in Filters) { if (!filter.IsKeep(wordLibrary)) { return false; } } return true; }