//将汉字转化为拼音 private string ConvertCnToPinyin(string str) { string ConStr = ""; //检测格式是否符合要求 //if (!CheckInputText(str)) //{ // return "要处理的内容必须是全中文"; //} //生成词典 Entity.PinyinDictionary dict = new Entity.PinyinDictionary(@filename); //只取词典中的中文词汇,无需拼音 List <string> wordList = dict.Dictionary.Keys.ToList <string>(); //进行正向分词 List <string> wordsLeft = Helper.Segmentation.SegMMLeftToRight(str, ref wordList); //判断分词是否正常返回 if (wordsLeft == null) { return("正向分词模块执行失败"); } //转为拼音 string pinyin = ""; foreach (string word in wordsLeft) { //如果是单字,要检测字典中是否包含该单字 if (word.Length == 1 && !dict.Dictionary.ContainsKey(word)) { //如果词典中不包含该中文单字,就要从微软的dll库读取拼音 pinyin = Helper.PinyinConvert.GetFirstPinYinCount(word.ToCharArray()[0]).ToLower(); } else { //一般情况不用检测,直接取词典中的拼音即可 pinyin = dict.Dictionary[word].ToLower(); } //但如果不需要声调,还必须去掉声调 if (!checkBoxWithTone.Checked) { //这个正则表达式表示,去掉字符串中的数字 pinyin = Regex.Replace(pinyin, @"\d", ""); } //将拼音显示出来 ConStr += pinyin; //结尾加个空格 ConStr += " "; } //去掉最后一个空格 ConStr = ConStr.Trim(); return(ConStr); }
//测试读取词典 private void ReadDict() { string dictext = ""; //读取词典 Entity.PinyinDictionary dict = new Entity.PinyinDictionary(@filename); //显示词典条数 dictext = "词典读取成功,获得词条:"; dictext += dict.Dictionary.Count; dictext += "条。"; dictext += "\r\n"; //提示只显示前100条,不然程序会卡死 if (dict.Dictionary.Count > 50) { dictext += "只显示词典的前50条,太多程序会卡死:"; dictext += "\r\n"; } else { dictext += "词典所有内容如下:"; dictext += "\r\n"; } //将词典内容显示在窗口中,只显示前50条 int i = 0; foreach (KeyValuePair <string, string> pair in dict.Dictionary) { dictext += pair.Key; dictext += ", "; dictext += pair.Value; dictext += "\r\n"; //只显示前50条 i++; if (i >= 50) { break; } } MessageBox.Show(text: dictext, caption: "词库字典", buttons: MessageBoxButtons.OK); }
//测试各个分词模块 private string SegWords(string str, int seglb) { string SegStr = ""; //生成词典 Entity.PinyinDictionary dict = new Entity.PinyinDictionary(@filename); //只取词典中的中文词汇,无需拼音 List <string> wordList = dict.Dictionary.Keys.ToList <string>(); if (seglb == 1) { //进行正向分词 List <string> wordsLeft = Helper.Segmentation.SegMMLeftToRight(str, ref wordList); //判断分词是否正常返回 if (wordsLeft == null) { return("正向分词模块执行失败"); } //将正向分词结果显示出来 SegStr += "正向分词:"; foreach (string word in wordsLeft) { SegStr += word; SegStr += ","; } //换行 //SegStr += "\r\n"; //SegStr += "\\"; return(SegStr); } else if (seglb == 2) { //进行逆向分词 List <string> wordsRight = Helper.Segmentation.SegMMRightToLeft(str, ref wordList); //判断分词是否正常返回 if (wordsRight == null) { return("逆向分词模块执行失败"); } //将正向分词结果显示出来 SegStr += "逆向分词:"; foreach (string word in wordsRight) { SegStr += word; SegStr += ","; } //换行 //SegStr += "\r\n"; // SegStr += "\\"; return(SegStr); } else { //进行双向分词 List <string> wordsDouble = Helper.Segmentation.SegMMDouble(str, ref wordList); //判断分词是否正常返回 if (wordsDouble == null) { return("双向分词模块执行失败"); } //将正向分词结果显示出来 SegStr += "双向分词:"; foreach (string word in wordsDouble) { SegStr += word; SegStr += ","; } return(SegStr); } }