public override void GetCodeOfWordLibrary(WordLibrary wl) { if (wl.CodeType == CodeType.Pinyin) { wl.SetCode(CodeType.UserDefinePhrase, wl.GetPinYinString("", BuildType.None)); } var codes = CollectionHelper.Descartes(wl.Codes); wl.SetCode(CodeType.UserDefinePhrase, codes[0]); }
public void GetCodeOfWordLibrary(WordLibrary wl) { if (wl.CodeType == CodeType.Pinyin) { var code = ChaoyinHelper.GetChaoyin(wl.PinYin); wl.SetCode(CodeType.Chaoyin, code); } else { wl.SetCode(CodeType.Chaoyin, GetCodeOfString(wl.Word)); } }
public override WordLibraryList ImportLine(string line) { string[] c = line.Split(' '); var wl = new WordLibrary(); string code = c[0]; wl.Word = c[1]; wl.Rank = DefaultRank; wl.SetCode(CodeType.Cangjie, pyGenerater.GetCodeOfString(wl.Word)); wl.SetCode(CodeType, code); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
//private IWordCodeGenerater pyGenerater=new PinyinGenerater(); public override WordLibraryList ImportLine(string line) { string[] lineArray = line.Split('\t'); string word = lineArray[0]; string code = lineArray[1]; var wl = new WordLibrary(); wl.Word = word; wl.Rank = Convert.ToInt32(lineArray[2]); if (CodeType == CodeType.Pinyin) { wl.PinYin = code.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); } else { //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word)); wl.SetCode(CodeType, code); } var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public override WordLibraryList ImportLine(string line) { var wlList = new WordLibraryList(); string[] strs = line.Split(' '); for (int i = 1; i < strs.Length; i++) { string oriWord = strs[i]; string word = oriWord.Replace(",", ""); //把汉字中带有逗号的都去掉逗号 //var list = pinyinFactory.GetCodeOfString(word); //for (int j = 0; j < list.Count; j++) //{ var wl = new WordLibrary(); wl.Word = oriWord; //if (IsWubi) //{ // wl.SetCode(CodeType.Wubi, strs[0]); //} //wl.PinYin = CollectionHelper.ToArray(list); wl.SetCode(CodeType, strs[0]); wlList.Add(wl); //} } return(wlList); }
private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition) { WordLibrary wl = new WordLibrary(); var magic = BinFileHelper.ReadInt32(fs); var hanzi_offset = BinFileHelper.ReadInt16(fs); wl.Rank = fs.ReadByte(); var x6 = fs.ReadByte(); //不知道干啥的 var unknown8 = BinFileHelper.ReadInt64(fs); //新增的,不知道什么意思 var pyBytesLen = hanzi_offset - 18; var pyBytes = BinFileHelper.ReadArray(fs, pyBytesLen); var wubiStr = Encoding.Unicode.GetString(pyBytes); var split = BinFileHelper.ReadInt16(fs); //00 00 分割拼音和汉字 var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00 var wordBytes = BinFileHelper.ReadArray(fs, wordBytesLen); BinFileHelper.ReadInt16(fs); //00 00分割 var word = Encoding.Unicode.GetString(wordBytes); wl.Word = word; try { wl.SetCode(CodeType.Wubi98, wubiStr); } catch { return(null); } wl.CodeType = CodeType.Wubi98; return(wl); }
public WordLibraryList ImportLine(string line) { string[] c = line.Split(' '); var wl = new WordLibrary(); string code = c[0]; wl.Word = c[1]; wl.Count = DefaultRank; wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word)); wl.SetCode(CodeType, code); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList ImportLine(string line) { var wl = new WordLibrary(); wl.Word = line.Split('\t')[1]; wl.CodeType = CodeType; wl.IsEnglish = IsEnglish(wl.Word); if (wl.IsEnglish) { wl.SetCode(CodeType.English, wl.Word); } var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
//private IWordCodeGenerater pyGenerater = new PinyinGenerater(); public WordLibraryList ImportLine(string str) { var list = new WordLibraryList(); string[] words = str.Split(' '); for (int i = 1; i < words.Length; i++) { string word = words[i]; var wl = new WordLibrary(); wl.Word = word; wl.Count = DefaultRank; wl.SetCode(CodeType, words[0]); //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(word)); list.Add(wl); } return(list); }
public override WordLibraryList ImportLine(string line) { string code = line.Split(' ')[0]; string word = line.Split(' ')[1]; var wl = new WordLibrary(); wl.Word = word; wl.Rank = DefaultRank; wl.SetCode(CodeType.Wubi, code); //wl.PinYin = CollectionHelper.ToArray(pinyinFactory.GetCodeOfString(word)); var wll = new WordLibraryList(); if (wl.PinYin.Length > 0) { wll.Add(wl); } return(wll); }
private WordLibrary ReadOnePhrase(FileStream fs, DictCodeType type) { WordLibrary wl = new WordLibrary(); var codeBytesLen = fs.ReadByte(); var wordBytesLen = fs.ReadByte(); var split = fs.ReadByte(); // 0x64对应正常词组(包含中英混拼,如"阿Q")。 Debug.Assert(split.Equals(0x64) || split.Equals(0x32) || split.Equals(0x10) || split.Equals(0x66) || split.Equals(0x67)); // 0x67: "$X[计算器]calc" var codeBytes = BinFileHelper.ReadArray(fs, codeBytesLen); var codeStr = Encoding.ASCII.GetString(codeBytes); var wordBytes = BinFileHelper.ReadArray(fs, wordBytesLen); var word = Encoding.Unicode.GetString(wordBytes); if (split.Equals(0x32)) // 如“醃(腌)”,后者是相应简化字? { word = word.Substring(0, 1); // 暂定只取首字 } Debug.Assert(word.IndexOf("(") < 0); wl.Word = word; try { if (type == DictCodeType.Pinyin) { wl.CodeType = CodeType.Pinyin; wl.SetPinyinString(codeStr); } else if (type == DictCodeType.Wubi98) { wl.CodeType = CodeType.Wubi98; wl.SetCode(CodeType.Wubi98, codeStr); } } catch { wl.CodeType = CodeType.NoCode; ImportLineErrorNotice?.Invoke(wl.Word + " 的编码缺失"); } return(wl); }
/// <summary> /// 根据Pattern设置的格式,对输入的一行该格式的字符串转换成WordLibrary /// </summary> /// <param name="line"></param> /// <returns></returns> public WordLibrary BuildWordLibrary(string line) { var wl = new WordLibrary(); wl.CodeType = UserDefiningPattern.CodeType; string[] strlist = line.Split(new[] { UserDefiningPattern.SplitString }, StringSplitOptions.RemoveEmptyEntries); var newSort = new List <int>(UserDefiningPattern.Sort); newSort.Sort(); string code = "", word = ""; int rank = 0; int index1 = UserDefiningPattern.Sort.FindIndex(i => i == newSort[0]); //最小的一个 if (index1 == 0 && UserDefiningPattern.ContainCode) //第一个是编码 { code = strlist[0]; } if (index1 == 1) //第一个是汉字 { word = strlist[0]; } if (index1 == 2 && UserDefiningPattern.ContainRank) //第一个是词频 { rank = Convert.ToInt32(strlist[0]); } if (strlist.Length > 1) { int index2 = UserDefiningPattern.Sort.FindIndex(i => i == newSort[1]); //中间的一个 if (index2 == 0 && UserDefiningPattern.ContainCode) //一个是Code { code = strlist[1]; } if (index2 == 1) { word = strlist[1]; } if (index2 == 2 && UserDefiningPattern.ContainRank) { rank = Convert.ToInt32(strlist[1]); } } if (strlist.Length > 2) { int index2 = UserDefiningPattern.Sort.FindIndex(i => i == newSort[2]); //最大的一个 if (index2 == 0 && UserDefiningPattern.ContainCode) //第一个是拼音 { code = strlist[2]; } if (index2 == 1) { word = strlist[2]; } if (index2 == 2 && UserDefiningPattern.ContainRank) { rank = Convert.ToInt32(strlist[2]); } } wl.Word = word; wl.Rank = rank; if (code != "") { if (UserDefiningPattern.IsPinyinFormat) { string[] codes = code.Split(new[] { UserDefiningPattern.CodeSplitString }, StringSplitOptions.RemoveEmptyEntries); wl.SetCode(UserDefiningPattern.CodeType, new List <string>(codes), UserDefiningPattern.IsPinyinFormat); } else { wl.SetCode(UserDefiningPattern.CodeType, code); } } return(wl); }