/// <summary> /// 传入一个字与码的集合,以及词频,根据用户设定的格式,生成一条词条字符串 /// </summary> /// <param name="charCodes"></param> /// <param name="rank"></param> /// <returns></returns> public string BuildWlString(IDictionary <char, string> charCodes, int rank, string word = "") { string code = ""; if (word == "") { foreach (var c in charCodes.Keys) { word += c; } } if (ContainCode) { if (IsPinyinFormat) { code = CollectionHelper.GetString(GetSelectWordCodes(word, charCodes), CodeSplitString, CodeSplitType); } else//多字一码,根据用户设置的编码规则,生成编码 { selfFactory.MutiWordCodeFormat = MutiWordCodeFormat; selfFactory.MappingDictionary = charCodes; code = selfFactory.GetCodeOfString(word)[0]; } } return(BuildWlString(word, code, rank)); }
public void TestGenerateCode() { SelfDefiningCodeGenerater generater = new SelfDefiningCodeGenerater(); generater.MappingDictionary = new Dictionary <char, IList <string> >(); generater.MappingDictionary.Add('深', new [] { "shen" }); generater.MappingDictionary.Add('蓝', new [] { "lan" }); generater.Is1Char1Code = false; generater.MutiWordCodeFormat = @"code_e2=p11+p12+p21+p22 code_e3=p11+p21+p31+p32 code_a4=p11+p21+p31+n11"; var result = generater.GetCodeOfString("深蓝").GetTop1Code(); Assert.AreEqual(result, "shla"); result = generater.GetCodeOfString("深深蓝").GetTop1Code(); Assert.AreEqual(result, "ssla"); result = generater.GetCodeOfString("深蓝深蓝").GetTop1Code(); Assert.AreEqual(result, "slsl"); }
public void TestGenerateCode() { SelfDefiningCodeGenerater generater = new SelfDefiningCodeGenerater(); generater.MappingDictionary = new Dictionary <char, string>(); generater.MappingDictionary.Add('深', "shen"); generater.MappingDictionary.Add('蓝', "lan"); generater.MutiWordCodeFormat = @"code_e2=p11+p12+p21+p22 code_e3=p11+p21+p31+p32 code_a4=p11+p21+p31+n11"; var result = generater.GetCodeOfString("深蓝"); Assert.AreEqual(result[0], "shla"); }
public void TestGenerateMutiPinyinFormatCode() { SelfDefiningCodeGenerater generater = new SelfDefiningCodeGenerater(); generater.MappingDictionary = new Dictionary <char, IList <string> >(); generater.MappingDictionary.Add('深', new[] { "ipws", "ebcd" }); generater.MappingDictionary.Add('蓝', new[] { "ajtl" }); generater.Is1Char1Code = true; var result = generater.GetCodeOfString("深蓝").ToCodeString(","); Assert.Contains("ipws,ajtl", result.ToArray()); //var codes = generater.GetCodeOfString("蓝深", ","); //Assert.AreEqual(codes[0], "ajtl,ipws"); }
public void TestGeneratePinyinFormatCode() { SelfDefiningCodeGenerater generater = new SelfDefiningCodeGenerater(); generater.MappingDictionary = new Dictionary <char, string>(); generater.MappingDictionary.Add('深', "ipws"); generater.MappingDictionary.Add('蓝', "ajtl"); generater.MutiWordCodeFormat = @"code_e2=p11+p12+p21+p22 code_e3=p11+p21+p31+p32 code_a4=p11+p21+p31+n11"; generater.Is1Char1Code = true; var result = generater.GetCodeOfString("深蓝"); Assert.AreEqual(result[0], "ipws"); Assert.AreEqual(result[1], "ajtl"); //var codes = generater.GetCodeOfString("蓝深", ","); //Assert.AreEqual(codes[0], "ajtl,ipws"); }
private void btnConvertTest_Click(object sender, EventArgs e) { if (!ReBuildUserPattern()) { return; } rtbTo.Clear(); string[] fromList = rtbFrom.Text.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); SelfDefiningCodeGenerater generater = new SelfDefiningCodeGenerater(); generater.MappingDictionary = UserCodingHelper.GetCodingDict(txbFilePath.Text); generater.MutiWordCodeFormat = SelectedParsePattern.MutiWordCodeFormat; foreach (string str in fromList) { string s = str.Trim(); var code = generater.GetCodeOfString(s); string result = SelectedParsePattern.BuildWlString(s, code[0], 1); rtbTo.AppendText(result + "\r\n"); } }
private void GenerateCode(WordLibrary wl) { var word = wl.Word; if (SelectedParsePattern.IsPinyin && SelectedParsePattern.IsPinyinFormat) { var py = pyFactory.GetCodeOfString(word, SelectedParsePattern.CodeSplitString); wl.PinYin = CollectionHelper.ToArray(py); } else { if (!string.IsNullOrEmpty(SelectedParsePattern.MappingTablePath)) { SelectedParsePattern.MappingTable = UserCodingHelper.GetCodingDict(SelectedParsePattern.MappingTablePath); } selfFactory.MappingDictionary = SelectedParsePattern.MappingTable; selfFactory.Is1Char1Code = SelectedParsePattern.IsPinyinFormat; selfFactory.MutiWordCodeFormat = SelectedParsePattern.MutiWordCodeFormat; wl.SetCode(CodeType.UserDefine, selfFactory.GetCodeOfString(word, SelectedParsePattern.CodeSplitString)); } }
private void btnConvertTest_Click(object sender, EventArgs e) { if (SelectedParsePattern == null) { MessageBox.Show("请点击右上角按钮选择匹配规则"); return; } if (string.IsNullOrEmpty(txbFilePath.Text)) { //不指定编码文件,那么必然是拼音 //if (!SelectedParsePattern.IsPinyinFormat) //{ // MessageBox.Show("不是拼音编码,那么必须指定编码文件"); // return; //} MessageBox.Show("请点击右上角按钮选择编码文件!如果源词库是拼音词库,那么可以不选择编码文件,直接以每个字的拼音作为其编码"); return; } else { SelectedParsePattern.MappingTablePath = txbFilePath.Text; } rtbTo.Clear(); string[] fromList = rtbFrom.Text.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); SelfDefiningCodeGenerater generater = new SelfDefiningCodeGenerater(); generater.MappingDictionary = UserCodingHelper.GetCodingDict(txbFilePath.Text); generater.MutiWordCodeFormat = SelectedParsePattern.MutiWordCodeFormat; foreach (string str in fromList) { string s = str.Trim(); var code = generater.GetCodeOfString(s); string result = SelectedParsePattern.BuildWLString(s, code[0], 1); rtbTo.AppendText(result + "\r\n"); } }