public WordLibraryList Import(string path) { WordLibraryList re = new WordLibraryList(); FileStream fp = File.OpenRead(path); int user_word_base = 0x2400; //get word num byte[] bytes = new byte[50]; fp.Seek(12, SeekOrigin.Begin); fp.Read(bytes, 0, 4); int cnt = bytesToIntLittle(bytes, 0, 4); //get each word for (int i = 0; i < cnt; i++) { int cur_idx = user_word_base + i * 60; //get word len fp.Seek(cur_idx + 10, SeekOrigin.Begin); fp.Read(bytes, 0, 1); int wordLen = bytesToIntLittle(bytes, 0, 1); //get word fp.Seek(cur_idx + 12, SeekOrigin.Begin); fp.Read(bytes, 0, wordLen * 2); string word = Encoding.Unicode.GetString(bytes, 0, wordLen * 2); re.Add(new WordLibrary() { Word = word, CodeType = this.CodeType, }); } fp.Close(); return(re); }
public override WordLibraryList ImportLine(string line) { var wlList = new WordLibraryList(); string[] strs = line.Split(' '); for (int i = 1; i < strs.Length; i++) { string oriWord = strs[i]; string word = oriWord.Replace(",", ""); //把汉字中带有逗号的都去掉逗号 //var list = pinyinFactory.GetCodeOfString(word); //for (int j = 0; j < list.Count; j++) //{ var wl = new WordLibrary(); wl.Word = oriWord; //if (IsWubi) //{ // wl.SetCode(CodeType.Wubi, strs[0]); //} //wl.PinYin = CollectionHelper.ToArray(list); wl.SetCode(CodeType, strs[0]); wlList.Add(wl); //} } return(wlList); }
public WordLibraryList Import(string path) { //IWordCodeGenerater pinyinFactory = new PinyinGenerater(); IList <string> words = Parse(path); var wll = new WordLibraryList(); foreach (string word in words) { var wl = new WordLibrary(); //词典转换,不进行注音操作,以提高速度 //if (IsChinese(word)) //是中文就要进行注音 //{ // var list = pinyinFactory.GetCodeOfString(word); // wl.PinYin = CollectionHelper.ToArray(list); //} //else { wl.IsEnglish = true; } wl.Word = word; wl.Count = DefaultRank; wll.Add(wl); } return(wll); }
public virtual WordLibraryList ImportLine(string line) { var wlList = new WordLibraryList(); string[] strs = line.Split(' '); for (int i = 1; i < strs.Length; i++) { var oriWord = strs[i]; string word = oriWord.Replace(",", ""); //把汉字中带有逗号的都去掉逗号 //var list = pinyinFactory.GetCodeOfString(word); //for (int j = 0; j < list.Count; j++) //{ var wl = new WordLibrary(); wl.Word = oriWord; //if (IsWubi) //{ // wl.SetCode(CodeType.Wubi, strs[0]); //} //wl.PinYin = CollectionHelper.ToArray(list); wl.SetCode(this.CodeType,strs[0]); wlList.Add(wl); //} } return wlList; }
public WordLibraryList ImportText(string str) { var xmlDoc = new XmlDocument(); xmlDoc.LoadXml(str); var wlList = new WordLibraryList(); XmlNodeList xns = xmlDoc.SelectNodes("//plist/array/dict"); CountWord = xns.Count; for (int i = 0; i < xns.Count; i++) { XmlNode xn = xns[i]; var nodes = xn.SelectNodes("string"); var wl = new WordLibrary(); wl.Word = nodes[0].InnerText; wl.Rank = 1; wl.SetPinyinString(nodes[1].InnerText); CurrentStatus = i; wlList.Add(wl); } return(wlList); }
public WordLibraryList ImportLine(string line) { string[] c = line.Split('\t'); var wl = new WordLibrary(); wl.Word = c[0]; wl.Count = DefaultRank; string zhuyin = c[1]; var pys = new List <string>(); foreach (string zy in zhuyin.Split(',')) { try { string py = ZhuyinHelper.GetPinyin(zy); pys.Add(py); } catch (Exception ex) { Debug.WriteLine(ex.Message); } } wl.PinYin = pys.ToArray(); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public void TestGeneratePinyinThen2String() { ParsePattern parser = new ParsePattern() { IsPinyinFormat = true, CodeSplitType = BuildType.FullContain, CodeSplitString = "~", ContainCode = true, ContainRank = true, SplitString = "|", CodeType = CodeType.Pinyin, LineSplitString = "\r", Sort = new List <int>() { 2, 1, 3 } }; WordLibraryList wll = new WordLibraryList(); WordLibrary wl = new WordLibrary() { Word = "深蓝", Rank = 123, CodeType = CodeType.UserDefine }; wl.Codes = new Code(); wl.Codes.Add(new[] { "sn" }); wl.Codes.Add(new[] { "ln" }); wll.Add(wl); selfDefining.UserDefiningPattern = parser; var str = selfDefining.Export(wll); Assert.AreEqual(str, "深蓝|~shen~lan~|123\r"); }
public WordLibraryList ImportLine(string line) { if (line.Length > 0 && line[0] == ';') { return(null); } string[] sp = line.Split(' '); string word = sp[0]; string[] py = new string[word.Length]; for (var i = 0; i < word.Length; i++) { py[i] = sp[i + 1]; } var wl = new WordLibrary(); wl.Word = word; wl.Count = 1; wl.PinYin = py; var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList Import(string str) { GlobalCache.CharList.Clear(); GlobalCache.Stackes.Clear(); GlobalCache.WordList.Clear(); var fs = new FileStream(str, FileMode.Open, FileAccess.Read); ParseHeader(fs); TouchPalChar rootChar = TouchPalChar.Load(fs); //载入第一个字 LoadTree(fs, rootChar); fs.Close(); var wwl = new WordLibraryList(); foreach (int i in GlobalCache.WordList.Keys) { TouchPalWord w = GlobalCache.WordList[i]; var wl = new WordLibrary(); wl.Count = w.Count; wl.PinYin = w.PinYin.ToArray(); wl.Word = w.ChineseWord; //sb.AppendLine(py + "\t" + GlobalCache.WordList[i].ChineseWord + "\t" + GlobalCache.WordList[i].Count); wwl.Add(wl); } return(wwl); }
public WordLibraryList Import(string path) { var pyAndWord = new WordLibraryList(); var fs = new FileStream(path, FileMode.Open, FileAccess.Read); fs.Position = 0x10; var phrase_offset_start = BinFileHelper.ReadInt32(fs); var phrase_start = BinFileHelper.ReadInt32(fs); var phrase_end = BinFileHelper.ReadInt32(fs); var phrase_count = BinFileHelper.ReadInt32(fs); fs.Position = phrase_offset_start; var offsets = ReadOffsets(fs, phrase_count); offsets.Add(phrase_end - phrase_start); fs.Position = phrase_start; for (var i = 0; i < phrase_count; i++) { var wl = ReadOnePhrase(fs, phrase_start + offsets[i + 1]); if (wl != null) { pyAndWord.Add(wl); } } return(pyAndWord); }
private WordLibraryList Filter(WordLibraryList wlList) { var result = new WordLibraryList(); IReplaceFilter replace = null; if (PinyinType != PinyinType.FullPinyin) { replace = new ShuangpinReplacer(PinyinType); } foreach (var wl in wlList) { if (replace != null) { replace.Replace(wl); } if (wl.GetPinYinLength() > 32) { continue; } if (wl.Word.Length > 64) { continue; } result.Add(wl); } return(result); }
public WordLibraryList ImportLine(string line) { var wlList = new WordLibraryList(); WordLibrary wl = UserDefiningPattern.BuildWordLibrary(line); wlList.Add(wl); return wlList; }
//private IWordCodeGenerater pyGenerater=new PinyinGenerater(); public override WordLibraryList ImportLine(string line) { string[] lineArray = line.Split('\t'); string word = lineArray[0]; string code = lineArray[1]; var wl = new WordLibrary(); wl.Word = word; wl.Rank = Convert.ToInt32(lineArray[2]); if (CodeType == CodeType.Pinyin) { wl.PinYin = code.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); } else { //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word)); wl.SetCode(CodeType, code); } var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList ImportText(string str) { var xmlDoc = new XmlDocument(); xmlDoc.LoadXml(str); var namespaceManager = new XmlNamespaceManager(xmlDoc.NameTable); namespaceManager.AddNamespace("ns1", "http://www.microsoft.com/ime/dctx"); var wlList = new WordLibraryList(); XmlNodeList xns = xmlDoc.SelectNodes("//ns1:Dictionary/ns1:DictionaryEntry", namespaceManager); CountWord = xns.Count; for (int i = 0; i < xns.Count; i++) { XmlNode xn = xns[i]; string py = xn.SelectSingleNode("ns1:InputString", namespaceManager).InnerText; string word = xn.SelectSingleNode("ns1:OutputString", namespaceManager).InnerText; var wl = new WordLibrary(); wl.Word = word; wl.Rank = 1; wl.PinYin = py.Split(new[] { ' ', '1', '2', '3', '4' }, StringSplitOptions.RemoveEmptyEntries); CurrentStatus = i; wlList.Add(wl); } return(wlList); }
public WordLibraryList ImportLine(string line) { var wlList = new WordLibraryList(); WordLibrary wl = BuildWordLibrary(line); wlList.Add(wl); return(wlList); }
public WordLibraryList Import(string path) { int endPosition = 0; var wordLibraryList = new WordLibraryList(); var fs = new FileStream(path, FileMode.Open, FileAccess.Read); fs.Position = 0x60; endPosition = BinFileHelper.ReadInt32(fs); fs.Position = 0x350; CurrentStatus = 0; do { //CurrentStatus++; try { WordLibrary wl = ImportWord(fs); if (wl == null) { break; } if (wl.Word != "" && wl.PinYin.Length > 0) { wordLibraryList.Add(wl); } } catch (Exception ex) { Debug.WriteLine(ex.Message); } } while (fs.Position != endPosition); //< fs.Length fs.Close(); //StreamWriter sw=new StreamWriter("D:\\py.txt",true,Encoding.Unicode); //SinglePinyin singlePinyin=new SinglePinyin(); //foreach (var cpy in CharAndPinyin) //{ // var py = ""; // try // { // py = singlePinyin.GetPinYinOfChar(cpy.Key)[0]; // } // catch // { // Debug.Write(cpy.Key); // } // sw.WriteLine(cpy.Key+"\t"+ py+"\t"+cpy.Value); //} //sw.Close(); //wordLibraryList.ForEach(delegate(WordLibrary wl) { if(wl.Word==""||wl.PinYin.Length==0) //{ // Debug.WriteLine(wl.ToDisplayString()); //} //}); return(wordLibraryList); }
/// <summary> /// 将一行纯文本转换为对象 /// </summary> /// <param name="line"></param> /// <returns></returns> public virtual WordLibraryList ImportLine(string line) { var py = pinyinFactory.GetCodeOfString(line); var wl = new WordLibrary(); wl.Word = line; wl.PinYin = ToArray(py); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
/// <summary> /// 将一行纯文本转换为对象 /// </summary> /// <param name="line"></param> /// <returns></returns> public virtual WordLibraryList ImportLine(string line) { //IList<string> py = pinyinFactory.GetCodeOfString(line); var wl = new WordLibrary(); wl.Word = line; wl.CodeType = CodeType; //wl.PinYin = CollectionHelper.ToArray(py); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public WordLibraryList ImportLine(string line) { string[] c = line.Split('\t'); var wl = new WordLibrary(); wl.Word = c[0]; wl.Rank = Convert.ToInt32(c[2]); wl.PinYin = c[1].Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public WordLibraryList ImportLine(string line) { string py = line.Split(' ')[0]; string word = line.Split(' ')[1]; var wl = new WordLibrary(); wl.Word = word; wl.Rank = 1; wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
//4字节使用同一个拼音的词条数x,2字节拼音长度n,n字节拼音的编号,(2字节汉字的长度y,y*2字节汉字的内容Unicode编码,2字节词频,2字节未知,4字节未知)*x public WordLibraryList Import(string path) { var pyAndWord = new WordLibraryList(); var fs = new FileStream(path, FileMode.Open, FileAccess.Read); fs.Position = 0x18; CountWord = BinFileHelper.ReadInt32(fs); CurrentStatus = 0; fs.Position = 0x30; while (CurrentStatus < CountWord) { int samePyCount = BinFileHelper.ReadInt16(fs); int unkown1 = BinFileHelper.ReadInt16(fs); short pyLength = BinFileHelper.ReadInt16(fs); var pyArray = new string[pyLength / 2]; for (int i = 0; i < pyLength / 2; i++) { short idx = BinFileHelper.ReadInt16(fs); try { pyArray[i] = PinYinDic[idx]; } catch { pyArray[i] = "--"; } } for (int i = 0; i < samePyCount; i++) { short wordByteLength = BinFileHelper.ReadInt16(fs); var wordArray = new byte[wordByteLength]; fs.Read(wordArray, 0, wordByteLength); string word = Encoding.Unicode.GetString(wordArray); short count = BinFileHelper.ReadInt16(fs); short count2 = BinFileHelper.ReadInt16(fs); int unknown = BinFileHelper.ReadInt32(fs); //不知道干啥的 if (pyArray.Length == word.Length) { var wl = new WordLibrary { Rank = count, Word = word, PinYin = pyArray }; pyAndWord.Add(wl); } else { Debug.WriteLine("Error data: word:[" + word + "] pinyin:[" + string.Join(",", pyArray) + "]"); } CurrentStatus++; } } return(pyAndWord); }
/// <summary> /// 将一行纯文本转换为对象 /// </summary> /// <param name="line"></param> /// <returns></returns> public virtual WordLibraryList ImportLine(string line) { var py = pinyinFactory.GetCodeOfString(line); var wl = new WordLibrary(); wl.Word = line; wl.PinYin = ToArray(py); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList ImportLine(string line) { string[] lineArray = line.Split('\t'); string py = lineArray[1]; string word = lineArray[0]; var wl = new WordLibrary(); wl.Word = word; wl.Count = Convert.ToInt32(lineArray[2]); wl.PinYin = py.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public WordLibraryList ImportLine(string line) { string[] wp = line.Split('\t'); string word = wp[0]; var wl = new WordLibrary(); wl.Word = word; wl.Count = Convert.ToInt32(wp[1]); wl.PinYin = new string[] {}; var wll = new WordLibraryList(); wll.Add(wl); return wll; }
/// <summary> /// 将一行纯文本转换为对象 /// </summary> /// <param name="line"></param> /// <returns></returns> public virtual WordLibraryList ImportLine(string line) { //IList<string> py = pinyinFactory.GetCodeOfString(line); var wl = new WordLibrary(); wl.Word = line; wl.CodeType = CodeType; //wl.PinYin = CollectionHelper.ToArray(py); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList ImportLine(string line) { string[] c = line.Split(' '); var wl = new WordLibrary(); string code = c[0]; wl.Word = c[1]; wl.Rank = DefaultRank; wl.SetCode(CodeType.Cangjie, pyGenerater.GetCodeOfString(wl.Word)); wl.SetCode(CodeType, code); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public WordLibraryList ImportLine(string line) { string[] c = line.Split('\t'); var wl = new WordLibrary(); wl.Word = c[0]; wl.Rank = Convert.ToInt32(c[2]); wl.PinYin = c[1].Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList ImportLine(string line) { string[] c = line.Split(' '); var wl = new WordLibrary(); string code = c[0]; wl.Word = c[1]; wl.Count = DefaultRank; wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word)); wl.AddCode(CodeType, code); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public WordLibraryList ImportLine(string line) { string[] sp = line.Split(','); string word = sp[0]; int count = Convert.ToInt32(sp[1]); var wl = new WordLibrary(); wl.Word = word; wl.Rank = count; wl.PinYin = new string[] {}; var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public WordLibraryList ImportLine(string line) { string[] sp = line.Split(' '); string py = sp[1]; string word = sp[0]; var wl = new WordLibrary {CodeType = CodeType.Pinyin}; wl.Word = word; wl.Rank = DefaultRank; wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public WordLibraryList ImportLine(string line) { string py = line.Split('\t')[1]; string word = line.Split('\t')[0]; var wl = new WordLibrary(); wl.Word = word; wl.Count = 1; wl.PinYin = py.Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList Import(string path) { var pyAndWord = new WordLibraryList(); var fs = new FileStream(path, FileMode.Open, FileAccess.Read); fs.Position = 0x00; var headerstr = "Freeime Dictionary"; var header = Encoding.ASCII.GetString(BinFileHelper.ReadArray(fs, headerstr.Length)); Debug.Assert(header.Equals(headerstr)); DictCodeType curType; fs.Position = 0x23; var headerTypeBytes = BinFileHelper.ReadArray(fs, 4); var headerTypeStr = Encoding.Unicode.GetString(headerTypeBytes); if (headerTypeStr.Equals("拼音")) { curType = DictCodeType.Pinyin; } else if (headerTypeStr.Equals("五笔")) { curType = DictCodeType.Wubi98; } else { throw new NotImplementedException("未知词库,请在反馈中提交文件"); } var phrase_start = 0x1B620; // 'a'词条所在 fs.Position = phrase_start; while (true) { var wl = ReadOnePhrase(fs, curType); if (wl != null) { pyAndWord.Add(wl); } if (fs.Length == fs.Position) //文件结束 { fs.Close(); break; } } return(pyAndWord); }
public override WordLibraryList ImportLine(string line) { string[] wp = line.Split('\t'); string word = wp[0]; var wl = new WordLibrary(); wl.Word = word; wl.Rank = Convert.ToInt32(wp[1]); wl.PinYin = new string[] {}; var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList ImportLine(string line) { string[] lineArray = line.Split('\t'); string py = lineArray[1]; string word = lineArray[0]; var wl = new WordLibrary(); wl.Word = word; wl.Count = Convert.ToInt32(lineArray[2]); wl.PinYin = py.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList ImportLine(string line) { string code = line.Split(' ')[0]; string word = line.Split(' ')[1]; var wl = new WordLibrary(); wl.Word = word; wl.Count = DefaultRank; wl.PinYin = ToArray(pinyinFactory.GetCodeOfString(word)); var wll = new WordLibraryList(); if (wl.PinYin.Length > 0) { wll.Add(wl); } return wll; }
public WordLibraryList Filter(WordLibraryList list) { if (Percentage == 100) { return list; } int count = list.Count*Percentage/100; list.Sort((a, b) => a.Rank - b.Rank); var result = new WordLibraryList(); for (int i = 0; i < count; i++) { result.Add(list[i]); } return result; }
public WordLibraryList ImportLine(string line) { line = line.Split(',')[0]; //如果有逗号,就只取第一个 string[] sp = line.Split(' '); string py = sp[0]; string word = sp[1]; int count = Convert.ToInt32(sp[2]); var wl = new WordLibrary(); wl.Word = word; wl.Rank = count; wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public override WordLibraryList ImportLine(string line) { string[] c = line.Split(' '); var wl = new WordLibrary(); string code = c[0]; wl.Word = c[1]; wl.Rank = DefaultRank; wl.SetCode(CodeType.Cangjie, pyGenerater.GetCodeOfString(wl.Word)); wl.SetCode(CodeType, code); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
private WordLibraryList Filter(WordLibraryList wlList) { var result = new WordLibraryList(); foreach (var wl in wlList) { if (wl.Word.Length > 12 || wl.Word.Length == 1)//最多支持12个字 { continue; } result.Add(wl); } return(result); }
public WordLibraryList ImportLine(string line) { string[] c = line.Split(' '); var wl = new WordLibrary(); string code = c[0]; wl.Word = c[1]; wl.Count = DefaultRank; wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word)); wl.SetCode(CodeType, code); var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public WordLibraryList ImportLine(string line) { string[] sp = line.Split(','); string word = sp[0]; int count = Convert.ToInt32(sp[1]); var wl = new WordLibrary(); wl.Word = word; wl.Count = count; wl.PinYin = new string[] {}; var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public override WordLibraryList ImportLine(string line) { if (line.IndexOf("'") == 0) { string py = line.Split(' ')[0]; string word = line.Split(' ')[1]; var wl = new WordLibrary(); wl.Word = word; wl.Rank = 1; wl.PinYin = py.Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); wll.Add(wl); return(wll); } return(null); }
public WordLibraryList ImportLine(string line) { var wl = new WordLibrary(); wl.Word = line.Split('\t')[1]; wl.CodeType = CodeType; wl.IsEnglish = IsEnglish(wl.Word); if (wl.IsEnglish) { wl.SetCode(CodeType.English, wl.Word); } var wll = new WordLibraryList(); wll.Add(wl); return(wll); }
public void TestExport1() { WordLibraryList wl = new WordLibraryList(); var wl1 = new WordLibrary() { Word = "曾毅曾诚", PinYin = new string[] { "zeng", "yi", "zeng", "cheng" }, CodeType = CodeType.Pinyin }; wl.Add(wl1); var export = new Win10MsPinyinSelfStudy(); //export.ExportFilePath = "c:\\Temp\\win10selfstudy5.dat"; var filePath = export.Export(wl); Debug.WriteLine(filePath[0]); }
public WordLibraryList ImportLine(string line) { string code = line.Split(' ')[0]; string word = line.Split(' ')[1]; var wl = new WordLibrary(); wl.Word = word; wl.Rank = DefaultRank; wl.SetCode(CodeType.Wubi, code); //wl.PinYin = CollectionHelper.ToArray(pinyinFactory.GetCodeOfString(word)); var wll = new WordLibraryList(); if (wl.PinYin.Length > 0) { wll.Add(wl); } return wll; }
//private IWordCodeGenerater pyGenerater = new PinyinGenerater(); public WordLibraryList ImportLine(string str) { var list = new WordLibraryList(); string[] words = str.Split(' '); for (int i = 1; i < words.Length; i++) { string word = words[i]; var wl = new WordLibrary(); wl.Word = word; wl.Count = DefaultRank; wl.SetCode(CodeType, words[0]); //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(word)); list.Add(wl); } return(list); }
public WordLibraryList Import(string str) { WordLibraryList wlList = new WordLibraryList(); var lines = str.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < lines.Length; i++) { string line = lines[i]; var c = line.Split('\t'); WordLibrary wl = new WordLibrary(); wl.Word = c[0]; wl.Count = Convert.ToInt32(c[1]); wl.PinYin = c[2].Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); wlList.Add(wl); } return wlList; }
public void TestExportExtCodeLots() { string str="深蓝词库转换测试代码"; var list = new WordLibraryList(); var ts = ""; foreach (var c in str) { ts += c; list.Add(new WordLibrary() {Count = 10, IsEnglish = false, Word = ts}); } export.UserDefiningPattern = InitPattern(); export.UserDefiningPattern.MappingTablePath = "Test\\array30.txt"; var x = export.Export(list); Debug.WriteLine(x); Assert.IsNotNullOrEmpty(str); }
public WordLibraryList ImportLine(string line) { string[] sp = line.Split('\t'); string word = sp[0]; string py = sp[1]; int count = 1; var wl = new WordLibrary(); wl.Word = word; wl.Rank = count; wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries); var wll = new WordLibraryList(); if (!string.IsNullOrEmpty(py)) { wll.Add(wl); } return wll; }
public virtual WordLibraryList ImportLine(string line) { var wlList = new WordLibraryList(); string[] strs = line.Split(' '); for (int i = 1; i < strs.Length; i++) { string word = strs[i].Replace(",", ""); //把汉字中带有逗号的都去掉逗号 var list = pinyinFactory.GetCodeOfString(word); for (int j = 0; j < list.Count; j++) { var wl = new WordLibrary(); wl.Word = word; wl.PinYin = ToArray(list); wlList.Add(wl); } } return wlList; }
public WordLibraryList ImportLine(string line) { if (line.Length > 0 && line[0] == ';') return null; string[] sp = line.Split(' '); string word = sp[0]; var py = new string[word.Length]; for (int i = 0; i < word.Length; i++) { py[i] = sp[i + 1]; } var wl = new WordLibrary(); wl.Word = word; wl.Rank = 1; wl.PinYin = py; var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public WordLibraryList ImportLine(string line) { var wll = new WordLibraryList(); try { string py = line.Split(' ')[1]; string word = line.Split(' ')[0]; var wl = new WordLibrary(); wl.Word = word; wl.Rank = 1; wl.PinYin = py.Split(new[] {'|'}, StringSplitOptions.RemoveEmptyEntries); wll.Add(wl); } catch (Exception ex) { Debug.WriteLine(line + "\t" + ex.Message); } return wll; }
public WordLibraryList ImportLine(string line) { var wl = new WordLibrary(); string[] array = line.Split('\t'); wl.Word = array[0]; if (array.Length == 2) //English { wl.IsEnglish = true; wl.Count = Convert.ToInt32(array[1]); } else { string py = line.Split('\t')[1]; wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries); wl.Count = Convert.ToInt32(array[2]); } var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public WordLibraryList Import(string path) { IWordCodeGenerater pinyinFactory = new WordPinyinGenerater(); IList<string> words = Parse(path); var wll = new WordLibraryList(); foreach (string word in words) { var wl = new WordLibrary(); if (IsChinese(word)) //是中文就要进行注音 { var list = pinyinFactory.GetCodeOfString(word); wl.PinYin = ToArray(list); } else { wl.IsEnglish = true; } wl.Word = word; wl.Count = DefaultRank; wll.Add(wl); } return wll; }
public WordLibraryList Import(string path) { //IWordCodeGenerater pinyinFactory = new PinyinGenerater(); IList<string> words = Parse(path); var wll = new WordLibraryList(); foreach (string word in words) { var wl = new WordLibrary(); //词典转换,不进行注音操作,以提高速度 //if (IsChinese(word)) //是中文就要进行注音 //{ // var list = pinyinFactory.GetCodeOfString(word); // wl.PinYin = CollectionHelper.ToArray(list); //} //else { wl.IsEnglish = true; } wl.Word = word; wl.Rank = DefaultRank; wll.Add(wl); } return wll; }
public void TestGeneratePinyinThen2String() { ParsePattern parser = new ParsePattern() { IsPinyinFormat = true, CodeSplitType = BuildType.FullContain, CodeSplitString = "~", ContainCode = true, ContainRank = true, SplitString = "|", CodeType = CodeType.Pinyin, LineSplitString = "\r", Sort = new List<int>() { 2, 1, 3 } }; WordLibraryList wll = new WordLibraryList(); WordLibrary wl = new WordLibrary() { Word = "深蓝", Rank = 123, CodeType = CodeType.UserDefine }; wl.Codes = new Code(); wl.Codes.Add(new[] { "sn" }); wl.Codes.Add( new[] { "ln" }); wll.Add(wl); selfDefining.UserDefiningPattern = parser; var str = selfDefining.Export(wll); Assert.AreEqual(str, "深蓝|~shen~lan~|123\r"); }
//private IWordCodeGenerater pyGenerater = new PinyinGenerater(); public WordLibraryList ImportLine(string str) { var list = new WordLibraryList(); string[] words = str.Split(' '); for (int i = 1; i < words.Length; i++) { string word = words[i]; var wl = new WordLibrary(); wl.Word = word; wl.Count = DefaultRank; wl.AddCode(CodeType, words[0]); //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(word)); list.Add(wl); } return list; }
public WordLibraryList ImportLine(string line) { string[] c = line.Split('\t'); var wl = new WordLibrary(); wl.Word = c[0]; wl.Rank = DefaultRank; string zhuyin = c[1]; var pys = new List<string>(); foreach (string zy in zhuyin.Split(',')) { try { string py = ZhuyinHelper.GetPinyin(zy); pys.Add(py); } catch (Exception ex) { Debug.WriteLine(ex.Message); } } wl.PinYin = pys.ToArray(); var wll = new WordLibraryList(); wll.Add(wl); return wll; }
public WordLibraryList Import(string path) { var wordLibraryList = new WordLibraryList(); var fs = new FileStream(path, FileMode.Open, FileAccess.Read); fs.Position = 0x350; do { try { WordLibrary wl = ImportWord(fs); if (wl.Word != "" && wl.PinYin.Length > 0) { wordLibraryList.Add(wl); } } catch (Exception ex) { Debug.WriteLine(ex.Message); } } while (fs.Position != fs.Length); fs.Close(); //StreamWriter sw=new StreamWriter("D:\\py.txt",true,Encoding.Unicode); //SinglePinyin singlePinyin=new SinglePinyin(); //foreach (var cpy in CharAndPinyin) //{ // var py = ""; // try // { // py = singlePinyin.GetPinYinOfChar(cpy.Key)[0]; // } // catch // { // Debug.Write(cpy.Key); // } // sw.WriteLine(cpy.Key+"\t"+ py+"\t"+cpy.Value); //} //sw.Close(); //wordLibraryList.ForEach(delegate(WordLibrary wl) { if(wl.Word==""||wl.PinYin.Length==0) //{ // Debug.WriteLine(wl.ToDisplayString()); //} //}); return wordLibraryList; }
private WordLibraryList Filter(WordLibraryList list) { var result = new WordLibraryList(); foreach (WordLibrary wordLibrary in list) { if (IsKeep(wordLibrary)) { result.Add(wordLibrary); } } return result; }