public string ExportLine(WordLibrary wl) { //StringBuilder sb = new StringBuilder(); string str = wl.GetPinYinString("'", BuildType.None) + "," + wl.Word; return str; }
public string ExportLine(WordLibrary wl) { var sb = new StringBuilder(); sb.Append(wl.GetPinYinString("", BuildType.None)); sb.Append(" "); sb.Append(wl.Word); return sb.ToString(); }
/// <summary> /// 将一行纯文本转换为对象 /// </summary> /// <param name="line"></param> /// <returns></returns> public virtual WordLibraryList ImportLine(string line) { var py = pinyinFactory.GetCodeOfString(line); var wl = new WordLibrary(); wl.Word = line; wl.PinYin = ToArray(py); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public string ExportLine(WordLibrary wl) { var sb = new StringBuilder(); sb.Append(wubiFactory.GetCodeOfString(wl.Word)[0]); sb.Append(" "); sb.Append(wl.Word); return sb.ToString(); }
public string ExportLine(WordLibrary wl) { var sb = new StringBuilder(); sb.Append(WubiHelper.GetStringWubi86Code(wl.Word)); sb.Append(" "); sb.Append(wl.Word); return sb.ToString(); }
public WordLibraryList ImportLine(string line) { string[] c = line.Split('\t'); var wl = new WordLibrary(); wl.Word = c[0]; wl.Count = Convert.ToInt32(c[1]); wl.PinYin = c[2].Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public WordLibraryList ImportLine(string line) { string py = line.Split(',')[0]; string word = line.Split(',')[1]; var wl = new WordLibrary(); wl.Word = word; wl.Count = 1; wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public void TestPinyin2TerraPinyin() { WordLibrary wl = new WordLibrary() { Word = "深蓝", Rank = 123, PinYin = new [] { "shen", "lan" }, CodeType = CodeType.Pinyin }; generater.GetCodeOfWordLibrary(wl); foreach (var py in wl.Codes) { Debug.WriteLine(py); } }
public void TestChar2TerraPinyin(string word, string pinyin) { WordLibrary wl = new WordLibrary() { Word = word, Rank = 123, CodeType = CodeType.NoCode }; generater.GetCodeOfWordLibrary(wl); foreach (var py in wl.Codes.ToCodeString(" ")) { Debug.WriteLine(py); } }
public WordLibraryList ImportLine(string line) { string[] lineArray = line.Split('\t'); string py = lineArray[1]; string word = lineArray[0]; var wl = new WordLibrary(); wl.Word = word; wl.Count = Convert.ToInt32(lineArray[2]); wl.PinYin = py.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
/// <summary> /// 将一行纯文本转换为对象 /// </summary> /// <param name="line"></param> /// <returns></returns> public virtual WordLibraryList ImportLine(string line) { //IList<string> py = pinyinFactory.GetCodeOfString(line); var wl = new WordLibrary(); wl.Word = line; wl.CodeType = CodeType; //wl.PinYin = CollectionHelper.ToArray(py); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList ImportLine(string line) { string[] wp = line.Split('\t'); string word = wp[0]; var wl = new WordLibrary(); wl.Word = word; wl.Count = Convert.ToInt32(wp[1]); wl.PinYin = new string[] {}; var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public string ExportLine(WordLibrary wl) { var sb = new StringBuilder(); sb.Append(wl.Word); sb.Append("\t"); sb.Append(wl.Rank); sb.Append("\t"); sb.Append(wl.GetPinYinString(" ", BuildType.None)); return(sb.ToString()); }
public WordLibraryList ImportLine(string line) { string[] c = line.Split('\t'); var wl = new WordLibrary(); wl.Word = c[0]; wl.Rank = Convert.ToInt32(c[1]); wl.PinYin = c[2].Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public IList <string> GetCodeOfWordLibrary(WordLibrary str, string charCodeSplit = "") { if (str.CodeType == CodeType.Pinyin) { return(new List <string>() { str.GetPinYinString("", BuildType.None) }); } else { return(CollectionHelper.Descartes(str.Codes)); } }
public WordLibraryList ImportLine(string line) { string py = line.Split('\t')[1]; string word = line.Split('\t')[0]; var wl = new WordLibrary(); wl.Word = word; wl.Count = 1; wl.PinYin = py.Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public override void GetCodeOfWordLibrary(WordLibrary wl) { if (wl.CodeType == CodeType.English) { wl.SetCode(CodeType.UserDefinePhrase, wl.Word); } else if (wl.CodeType == CodeType.Pinyin) { wl.SetCode(CodeType.UserDefinePhrase, wl.GetPinYinString("", BuildType.None)); } var codes = CollectionHelper.Descartes(wl.Codes); wl.SetCode(CodeType.UserDefinePhrase, codes[0]); }
public string ExportLine(WordLibrary wl) { var sb = new StringBuilder(); sb.Append(wl.Word); sb.Append("\t"); if (!wl.IsEnglish) { sb.Append(wl.GetPinYinString("'", BuildType.RightContain)); sb.Append("\t"); } sb.Append(wl.Count); return(sb.ToString()); }
public override void GetCodeOfWordLibrary(WordLibrary wl) { base.GetCodeOfWordLibrary(wl); for (int i = 0; i < wl.Codes.Count; i++) { var row = wl.Codes[i]; for (int j = 0; j < row.Count; j++) { string s = row[j]; string zy = ZhuyinHelper.GetZhuyin(s); wl.Codes[i][j] = zy; } } }
public string ExportLine(WordLibrary wl) { var sb = new StringBuilder(); sb.Append(wl.Word); sb.Append("\t"); if (!wl.IsEnglish) { sb.Append(wl.GetPinYinString("'", BuildType.RightContain)); sb.Append("\t"); } sb.Append(wl.Count); return sb.ToString(); }
public string ExportLine(WordLibrary wl) { var sb = new StringBuilder(); if (codeGenerater.Is1CharMutiCode) { IList <string> codes = codeGenerater.GetCodeOfString(wl.Word); int i = 0; foreach (string code in codes) { sb.Append(wl.Word); sb.Append("\t"); sb.Append(code); sb.Append("\t"); sb.Append(wl.Count); i++; if (i != codes.Count) { sb.Append("\r\n"); } } } else { sb.Append(wl.Word); sb.Append("\t"); if (CodeType == CodeType.Pinyin || CodeType == CodeType.TerraPinyin) { sb.Append(wl.GetPinYinString(" ", BuildType.None)); } else if (CodeType == wl.CodeType) { sb.Append(wl.Codes[0][0]); } else { if (codeGenerater.Is1Char1Code) { sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word), " ")); } else { sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word))); } } sb.Append("\t"); sb.Append(wl.Count); } return(sb.ToString()); }
public WordLibraryList ImportLine(string line) { string code = line.Split(' ')[0]; string word = line.Split(' ')[1]; var wl = new WordLibrary(); wl.Word = word; wl.Count = DefaultRank; wl.PinYin = ToArray(pinyinFactory.GetCodeOfString(word)); var wll = new WordLibraryList(); if (wl.PinYin.Length > 0) { wll.Add(wl); } return wll; }
public WordLibraryList ImportLine(string line) { string[] c = line.Split(' '); var wl = new WordLibrary(); string code = c[0]; wl.Word = c[1]; wl.Count = DefaultRank; wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word)); wl.SetCode(CodeType, code); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList ImportLine(string line) { string[] lineArray = line.Split('\t'); string py = lineArray[1]; string word = lineArray[0]; var wl = new WordLibrary(); wl.Word = word; wl.Count = Convert.ToInt32(lineArray[2]); wl.PinYin = py.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList ImportLine(string line) { line = line.Split(',')[0]; //如果有逗号,就只取第一个 string[] sp = line.Split(' '); string py = sp[0]; string word = sp[1]; int count = Convert.ToInt32(sp[2]); var wl = new WordLibrary(); wl.Word = word; wl.Count = count; wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public override WordLibraryList ImportLine(string line) { string[] c = line.Split(' '); var wl = new WordLibrary(); string code = c[0]; wl.Word = c[1]; wl.Rank = DefaultRank; wl.SetCode(CodeType.Cangjie, pyGenerater.GetCodeOfString(wl.Word)); wl.SetCode(CodeType, code); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public override WordLibraryList ImportLine(string line) { string[] wp = line.Split('\t'); string word = wp[0]; var wl = new WordLibrary(); wl.Word = word; wl.Rank = Convert.ToInt32(wp[1]); wl.PinYin = new string[] {}; var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
//public Dictionary<char,string > CharAndPinyin=new Dictionary<char, string>(); //private void AddWordAndPinyin(char word,string pinyin) //{ // if (!CharAndPinyin.ContainsKey(word)) // { // CharAndPinyin.Add(word,pinyin); // } //} /// <summary> /// 读取一个词语,格式为: /// 4字节的长度len /// len*2字节的拼音(对于每个字,声母、韵母各1字节) /// len*2字节的汉字(Unicode编码) /// </summary> /// <param name="fs"></param> /// <returns></returns> private WordLibrary ImportWord(FileStream fs) { int show = 0; var wordLibrary = new WordLibrary(); var temp = new byte[4]; fs.Read(temp, 0, 4); int len = BitConverter.ToInt32(temp, 0); if (len == 0) { Debug.WriteLine(fs.Position); return(null); //return SpecialWord(fs); } var pinyinList = new List <string>(); for (int i = 0; i < len; i++) { temp = new byte[2]; fs.Read(temp, 0, 2); try { string sm = Shengmu[temp[0]]; string ym = Yunmu[temp[1]]; pinyinList.Add(sm + ym); } catch (Exception e) { Debug.WriteLine(e.Message); show = temp[0]; } } wordLibrary.PinYin = pinyinList.ToArray(); temp = new byte[2 * len]; fs.Read(temp, 0, 2 * len); wordLibrary.Word = Encoding.Unicode.GetString(temp); //for (var i = 0; i < wordLibrary.Word.Length;i++ ) //{ // AddWordAndPinyin(wordLibrary.Word[i], wordLibrary.PinYin[i]); //} if (show > 0) { Debug.WriteLine(show + " " + wordLibrary.Word + "----" + wordLibrary.PinYinString); } return(wordLibrary); }
//private RimeConfigForm form; public string ExportLine(WordLibrary wl) { var sb = new StringBuilder(); if (codeGenerater == null) { codeGenerater = CodeTypeHelper.GetGenerater(CodeType); } codeGenerater.GetCodeOfWordLibrary(wl); if (codeGenerater.Is1CharMutiCode) { IList <string> codes = codeGenerater.GetCodeOfString(wl.Word).ToCodeString(" "); int i = 0; foreach (string code in codes) { sb.Append(wl.Word); sb.Append("\t"); sb.Append(code); sb.Append("\t"); sb.Append(wl.Rank); i++; if (i != codes.Count) { sb.Append(lineSplitString); } } } else { sb.Append(wl.Word); sb.Append("\t"); if (CodeType == CodeType.Pinyin || CodeType == CodeType.TerraPinyin) { sb.Append(wl.GetPinYinString(" ", BuildType.None)); } else if (CodeType == wl.CodeType) { sb.Append(wl.Codes[0][0]); } else { sb.Append(wl.Codes.ToCodeString(" ")[0]); } sb.Append("\t"); sb.Append(wl.Rank); } return(sb.ToString()); }
public WordLibraryList Import(string path) { var wordLibraryList = new WordLibraryList(); var fs = new FileStream(path, FileMode.Open, FileAccess.Read); fs.Position = 0x350; do { try { WordLibrary wl = ImportWord(fs); if (wl.Word != "" && wl.PinYin.Length > 0) { wordLibraryList.Add(wl); } } catch (Exception ex) { Debug.WriteLine(ex.Message); } } while (fs.Position != fs.Length); fs.Close(); //StreamWriter sw=new StreamWriter("D:\\py.txt",true,Encoding.Unicode); //SinglePinyin singlePinyin=new SinglePinyin(); //foreach (var cpy in CharAndPinyin) //{ // var py = ""; // try // { // py = singlePinyin.GetPinYinOfChar(cpy.Key)[0]; // } // catch // { // Debug.Write(cpy.Key); // } // sw.WriteLine(cpy.Key+"\t"+ py+"\t"+cpy.Value); //} //sw.Close(); //wordLibraryList.ForEach(delegate(WordLibrary wl) { if(wl.Word==""||wl.PinYin.Length==0) //{ // Debug.WriteLine(wl.ToDisplayString()); //} //}); return(wordLibraryList); }
public WordLibraryList ImportLine(string line) { var wl = new WordLibrary(); wl.Word = line.Split('\t')[1]; wl.CodeType = CodeType; wl.IsEnglish = IsEnglish(wl.Word); if (wl.IsEnglish) { wl.SetCode(CodeType.English, wl.Word); } var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public override WordLibraryList ImportLine(string line) { if (line.IndexOf("'") == 0) { string py = line.Split(' ')[0]; string word = line.Split(' ')[1]; var wl = new WordLibrary(); wl.Word = word; wl.Rank = 1; wl.PinYin = py.Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return(wll); } return(null); }
public string ExportLine(WordLibrary wl) { var sb = new StringBuilder(); string py = wl.GetPinYinString("'", BuildType.None); sb.Append(py); sb.Append(" "); sb.Append(wl.Word); sb.Append(" "); sb.Append(number); sb.Append(" Z, "); sb.Append(py); sb.Append(" "); sb.Append(number); return(sb.ToString()); }
public WordLibraryList ImportLine(string line) { string[] sp = line.Split(','); string word = sp[0]; int count = Convert.ToInt32(sp[1]); var wl = new WordLibrary(); wl.Word = word; wl.Count = count; wl.PinYin = new string[] {}; var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public void TestExport1() { WordLibraryList wl = new WordLibraryList(); var wl1 = new WordLibrary() { Word = "曾毅曾诚", PinYin = new string[] { "zeng", "yi", "zeng", "cheng" }, CodeType = CodeType.Pinyin }; wl.Add(wl1); var export = new Win10MsPinyinSelfStudy(); //export.ExportFilePath = "c:\\Temp\\win10selfstudy5.dat"; var filePath = export.Export(wl); Debug.WriteLine(filePath[0]); }
public WordLibraryList ImportLine(string line) { string code = line.Split(' ')[0]; string word = line.Split(' ')[1]; var wl = new WordLibrary(); wl.Word = word; wl.Count = DefaultRank; wl.PinYin = ToArray(pinyinFactory.GetCodeOfString(word)); var wll = new WordLibraryList(); if (wl.PinYin.Length > 0) { wll.Add(wl); } return(wll); }
//4字节使用同一个拼音的词条数x,2字节拼音长度n,n字节拼音的编号,(2字节汉字的长度y,y*2字节汉字的内容Unicode编码,2字节词频,2字节未知,4字节未知)*x #region IWordLibraryImport Members public WordLibraryList Import(string path) { var pyAndWord = new WordLibraryList(); var fs = new FileStream(path, FileMode.Open, FileAccess.Read); fs.Position = 0x18; CountWord = BinFileHelper.ReadInt32(fs); CurrentStatus = 0; fs.Position = 0x30; while (CurrentStatus < CountWord) { int samePyCount = BinFileHelper.ReadInt16(fs); int unkown1 = BinFileHelper.ReadInt16(fs); short pyLength = BinFileHelper.ReadInt16(fs); var pyArray = new string[pyLength / 2]; for (int i = 0; i < pyLength / 2; i++) { short idx = BinFileHelper.ReadInt16(fs); try { pyArray[i] = PinYinDic[idx]; } catch { pyArray[i] = "--"; } } for (int i = 0; i < samePyCount; i++) { short wordByteLength = BinFileHelper.ReadInt16(fs); var wordArray = new byte[wordByteLength]; fs.Read(wordArray, 0, wordByteLength); string word = Encoding.Unicode.GetString(wordArray); short count = BinFileHelper.ReadInt16(fs); short count2 = BinFileHelper.ReadInt16(fs); int unknown = BinFileHelper.ReadInt32(fs); //不知道干啥的 var wl = new WordLibrary { Count = count, Word = word, PinYin = pyArray }; pyAndWord.Add(wl); CurrentStatus++; } } return(pyAndWord); }
//private IWordCodeGenerater pyGenerater = new PinyinGenerater(); public WordLibraryList ImportLine(string str) { var list = new WordLibraryList(); string[] words = str.Split(' '); for (int i = 1; i < words.Length; i++) { string word = words[i]; var wl = new WordLibrary(); wl.Word = word; wl.Count = DefaultRank; wl.SetCode(CodeType, words[0]); //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(word)); list.Add(wl); } return(list); }
public string ExportLine(WordLibrary wl) { var codes = wl.Codes; if (IsShortCode) { codes = new Code(); foreach (var c in wl.Codes) { codes.Add(new List <string>() { c[0][0].ToString() }); } } return(string.Format(PhraseFormat, wl.Word, CollectionHelper.Descartes(codes)[0], wl.Rank == 0?DefaultRank:wl.Rank)); }
public virtual string ExportLine(WordLibrary wl) { var sb = new StringBuilder(); if (string.IsNullOrEmpty(wl.WubiCode)) { sb.Append(wubiFactory.GetCodeOfString(wl.Word)[0]); } else { sb.Append(wl.WubiCode); } sb.Append(" "); sb.Append(wl.Word); return(sb.ToString()); }
public WordLibraryList ImportLine(string line) { line = line.Split(',')[0]; //如果有逗号,就只取第一个 string[] sp = line.Split(' '); string py = sp[0]; string word = sp[1]; int count = Convert.ToInt32(sp[2]); var wl = new WordLibrary(); wl.Word = word; wl.Count = count; wl.PinYin = py.Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public virtual WordLibraryList ImportLine(string line) { var wlList = new WordLibraryList(); string[] strs = line.Split(' '); for (int i = 1; i < strs.Length; i++) { string word = strs[i].Replace(",", ""); //把汉字中带有逗号的都去掉逗号 var list = pinyinFactory.GetCodeOfString(word); for (int j = 0; j < list.Count; j++) { var wl = new WordLibrary(); wl.Word = word; wl.PinYin = ToArray(list); wlList.Add(wl); } } return wlList; }
public override WordLibraryList ImportLine(string line) { var wll = new WordLibraryList(); var array1 = line.Split('('); string word = array1[0]; string py = array1[1].Split(')')[0]; var wl = new WordLibrary(); wl.Word = word; wl.Rank = 1; wl.PinYin = py.Split(new[] { '|' }, StringSplitOptions.RemoveEmptyEntries); wll.Add(wl); return(wll); }
public string ExportLine(WordLibrary wl) { try { var sb = new StringBuilder(); string str = wl.Word; for (int j = 0; j < str.Length; j++) { sb.Append(str[j] + wl.PinYin[j]); } return sb.ToString(); } catch { return ""; } }
public WordLibraryList ImportLine(string line) { var wl = new WordLibrary(); string[] array = line.Split('\t'); wl.Word = array[0]; if (array.Length == 2) //English { wl.IsEnglish = true; wl.Count = Convert.ToInt32(array[1]); } else { string py = line.Split('\t')[1]; wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries); wl.Count = Convert.ToInt32(array[2]); } var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public WordLibraryList Import(string path) { IWordCodeGenerater pinyinFactory = new WordPinyinGenerater(); IList<string> words = Parse(path); var wll = new WordLibraryList(); foreach (string word in words) { var wl = new WordLibrary(); if (IsChinese(word)) //是中文就要进行注音 { var list = pinyinFactory.GetCodeOfString(word); wl.PinYin = ToArray(list); } else { wl.IsEnglish = true; } wl.Word = word; wl.Count = DefaultRank; wll.Add(wl); } return wll; }
private WordLibrary ImportWord(FileStream fs) { var wordLibrary = new WordLibrary(); var temp = new byte[2]; fs.Read(temp, 0, 2); short len = BitConverter.ToInt16(temp, 0); fs.Read(temp, 0, 2); //what's the meaning of these 2 bytes? var pinyinList = new List<string>(); for (int i = 0; i < len; i++) { temp = new byte[2]; fs.Read(temp, 0, 2); pinyinList.Add(Shengmu[temp[0]] + Yunmu[temp[1]]); } wordLibrary.PinYin = pinyinList.ToArray(); temp = new byte[2*len]; fs.Read(temp, 0, 2*len); wordLibrary.Word = Encoding.Unicode.GetString(temp); //for (var i = 0; i < wordLibrary.Word.Length;i++ ) //{ // AddWordAndPinyin(wordLibrary.Word[i], wordLibrary.PinYin[i]); //} return wordLibrary; }
public WordLibraryList ImportLine(string word) { string hz = ""; var py = new List<string>(); int j; for (j = 0; j < word.Length - 1; j++) { hz += word[j]; if (word[j + 1] > 'z') //而且后面跟的不是拼音 { py.Add(single.GetCodeOfChar(word[j])); } else //后面跟拼音 { int k = 1; string py1 = ""; while (j + k != word.Length && word[j + k] <= 'z') { py1 += word[j + k]; k++; } py.Add(py1); j += k - 1; //减1是因为接下来会运行j++ } } if (j == word.Length - 1) //最后一个字是汉字 { hz += word[j]; py.Add(single.GetCodeOfChar(word[j])); } var wl = new WordLibrary(); wl.PinYin = py.ToArray(); wl.Word = hz; var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public string ExportLine(WordLibrary wl) { return wl.Word + "\t" + (54999 + wl.Count); }
public bool IsKeep(WordLibrary wl) { return !englishRegex.IsMatch(wl.Word); }
public string ExportLine(WordLibrary wl) { string line = UserDefiningPattern.BuildWLString(wl); return line; }
private bool IsKeep(WordLibrary wordLibrary) { foreach (ISingleFilter filter in Filters) { if (!filter.IsKeep(wordLibrary)) { return false; } } return true; }
private void GenerateCode( WordLibrary wl) { var word = wl.Word; if (SelectedParsePattern.IsPinyin&&SelectedParsePattern.IsPinyinFormat) { var py = pyFactory.GetCodeOfString(word, SelectedParsePattern.CodeSplitString); wl.PinYin = CollectionHelper.ToArray(py); } else { if (!string.IsNullOrEmpty(SelectedParsePattern.MappingTablePath)) { SelectedParsePattern.MappingTable = UserCodingHelper.GetCodingDict(SelectedParsePattern.MappingTablePath); } selfFactory.MappingDictionary = SelectedParsePattern.MappingTable; selfFactory.Is1Char1Code = SelectedParsePattern.IsPinyinFormat; selfFactory.MutiWordCodeFormat = SelectedParsePattern.MutiWordCodeFormat; wl.SetCode(CodeType.UserDefine, selfFactory.GetCodeOfString(word, SelectedParsePattern.CodeSplitString)); } }
public WordLibraryList Import(string str) { GlobalCache.CharList.Clear(); GlobalCache.Stackes.Clear(); GlobalCache.WordList.Clear(); var fs = new FileStream(str, FileMode.Open, FileAccess.Read); ParseHeader(fs); TouchPalChar rootChar = TouchPalChar.Load(fs); //载入第一个字 LoadTree(fs, rootChar); fs.Close(); var wwl = new WordLibraryList(); foreach (int i in GlobalCache.WordList.Keys) { TouchPalWord w = GlobalCache.WordList[i]; var wl = new WordLibrary(); wl.Count = w.Count; wl.PinYin = w.PinYin.ToArray(); wl.Word = w.ChineseWord; //sb.AppendLine(py + "\t" + GlobalCache.WordList[i].ChineseWord + "\t" + GlobalCache.WordList[i].Count); wwl.Add(wl); } return wwl; }
/// <summary> /// 将一个词加入到最后一个节点 /// </summary> /// <param name="rootChar"></param> /// <param name="wl"></param> /// <param name="begin"></param> /// <returns></returns> private TouchPalChar AddWordLink2Char(TouchPalChar rootChar, WordLibrary wl, int begin) { var rootWord = new TouchPalWord {ChineseWord = wl.Word, Count = wl.Count}; TouchPalChar lastChar = rootChar; if (begin > 0) { lastChar = lastChar.Word.Chars[begin - 1]; } var chars = new TouchPalChar[wl.Word.Length]; rootWord.Chars = chars; for (int i = 0; i < begin; i++) { chars[i] = rootChar.Word.Chars[i]; } for (int i = begin; i < wl.Word.Length; i++) { char c = wl.Word[i]; string py = wl.PinYin[i]; var tpc = new TouchPalChar(); tpc.Char = c; tpc.PinyinCode = GlobalCache.PinyinIndexMapping[py]; tpc.WordIndex = i + 1; short s = tpc.IndexAndPinYin; AddChar2Next(lastChar, tpc); lastChar = tpc; chars[i] = lastChar; } lastChar.Word = rootWord; return lastChar; }
private void btnTest_Click(object sender, EventArgs e) { var lines= rtbFrom.Text.Split(new char[] {'\r', '\n'}, StringSplitOptions.RemoveEmptyEntries); StringBuilder sb=new StringBuilder(); foreach (var line in lines) { WordLibrary wl=new WordLibrary(){Word = line.Trim(),Count = 1234}; GenerateCode( wl); sb.Append(SelectedParsePattern.BuildWlString(wl)+"\r\n"); } rtbTo.Text = sb.ToString(); }
public WordLibraryList Import(string path) { var pyAndWord = new WordLibraryList(); var fs = new FileStream(path, FileMode.Open, FileAccess.Read); fs.Position = 0x18; CountWord = BinFileHelper.ReadInt32(fs); CurrentStatus = 0; fs.Position = 0x30; while (CurrentStatus < CountWord) { int samePyCount = BinFileHelper.ReadInt16(fs); int unkown1 = BinFileHelper.ReadInt16(fs); short pyLength = BinFileHelper.ReadInt16(fs); var pyArray = new string[pyLength/2]; for (int i = 0; i < pyLength/2; i++) { short idx = BinFileHelper.ReadInt16(fs); try { pyArray[i] = PinYinDic[idx]; } catch { pyArray[i] = "--"; } } for (int i = 0; i < samePyCount; i++) { short wordByteLength = BinFileHelper.ReadInt16(fs); var wordArray = new byte[wordByteLength]; fs.Read(wordArray, 0, wordByteLength); string word = Encoding.Unicode.GetString(wordArray); short count = BinFileHelper.ReadInt16(fs); short count2 = BinFileHelper.ReadInt16(fs); int unknown = BinFileHelper.ReadInt32(fs); //不知道干啥的 var wl = new WordLibrary {Count = count, Word = word, PinYin = pyArray}; pyAndWord.Add(wl); CurrentStatus++; } } return pyAndWord; }
public string ExportLine(WordLibrary wl) { throw new NotImplementedException(); }
/// <summary> /// 把一个词条一个字一个字的写入词库文件中 /// </summary> /// <param name="fs"></param> /// <param name="wl"></param> /// <param name="isLastWord"></param> /// <param name="from"></param> /// <returns></returns> public int WriteWord(FileStream fs, WordLibrary wl, bool isLastWord) { var beginPosition = (int) fs.Position; int wordLength = wl.Word.Length; int charIndex; TouchPalChar stackChar = FindBeginPosition(wl.Word, out charIndex); for (int i = charIndex; i < wordLength; i++) { var item = new TouchPalChar(); item.Char = wl.Word[i]; item.BeginPosition = (int) fs.Position; string py = wl.PinYin[i]; int pyIndex = GlobalCache.PinyinIndexMapping[py]; var code = (short) (((i + 1) << 11) + pyIndex); fs.Write(BitConverter.GetBytes(code), 0, 2); int p1 = 0; //词频位置 if (i == wordLength - 1) //最后一个字 { p1 = beginPosition + wordLength*26; } fs.Write(BitConverter.GetBytes(p1), 0, 4); int p2 = 0; //下个字位置 if (i != wordLength - 1) { p2 = beginPosition + (i + 1)*26; } fs.Write(BitConverter.GetBytes(p2), 0, 4); int p3 = 0; //跳转位置 if (!isLastWord && i == 0) { p3 = beginPosition + wordLength*28 + 5; } fs.Write(BitConverter.GetBytes(p3), 0, 4); int p4 = 0; //上个字位置 if (charIndex == 0) { if (i == 0) { p4 = GlobalCache.JumpChar.BeginPosition; GlobalCache.JumpChar = item; } else { p4 = beginPosition + (i - 1)*26; } } else { p4 = stackChar.BeginPosition; } fs.Write(BitConverter.GetBytes(p4), 0, 4); int p5 = 4; if (charIndex == 0) { if (i != 0) { p5 = p4; } } else { p5 = stackChar.PrevValidCharPosition; } item.PrevValidCharPosition = p5; fs.Write(BitConverter.GetBytes(p5), 0, 4); int p6 = 0; fs.Write(BitConverter.GetBytes(p6), 0, 4); GlobalCache.ExportStackes.Push(item); } int count = 96; // wl.Count; fs.Write(BitConverter.GetBytes(count), 0, 4); fs.WriteByte(0); //这个字节不知道干什么的 byte[] wordByte = Encoding.Unicode.GetBytes(wl.Word); fs.Write(wordByte, 0, wordByte.Length); return beginPosition; }
public virtual string ExportLine(WordLibrary wl) { return wl.Word; }