public WordLibraryList Import(string str) { GlobalCache.CharList.Clear(); GlobalCache.Stackes.Clear(); GlobalCache.WordList.Clear(); var fs = new FileStream(str, FileMode.Open, FileAccess.Read); ParseHeader(fs); TouchPalChar rootChar = TouchPalChar.Load(fs); //载入第一个字 LoadTree(fs, rootChar); fs.Close(); var wwl = new WordLibraryList(); foreach (int i in GlobalCache.WordList.Keys) { TouchPalWord w = GlobalCache.WordList[i]; var wl = new WordLibrary(); wl.Count = w.Count; wl.PinYin = w.PinYin.ToArray(); wl.Word = w.ChineseWord; //sb.AppendLine(py + "\t" + GlobalCache.WordList[i].ChineseWord + "\t" + GlobalCache.WordList[i].Count); wwl.Add(wl); } return(wwl); }
/// <summary> /// 将一个词加入到最后一个节点 /// </summary> /// <param name="rootChar"></param> /// <param name="wl"></param> /// <param name="begin"></param> /// <returns></returns> private TouchPalChar AddWordLink2Char(TouchPalChar rootChar, WordLibrary wl, int begin) { var rootWord = new TouchPalWord { ChineseWord = wl.Word, Count = wl.Count }; TouchPalChar lastChar = rootChar; if (begin > 0) { lastChar = lastChar.Word.Chars[begin - 1]; } var chars = new TouchPalChar[wl.Word.Length]; rootWord.Chars = chars; for (int i = 0; i < begin; i++) { chars[i] = rootChar.Word.Chars[i]; } for (int i = begin; i < wl.Word.Length; i++) { char c = wl.Word[i]; string py = wl.PinYin[i]; var tpc = new TouchPalChar(); tpc.Char = c; tpc.PinyinCode = GlobalCache.PinyinIndexMapping[py]; tpc.WordIndex = i + 1; short s = tpc.IndexAndPinYin; AddChar2Next(lastChar, tpc); lastChar = tpc; chars[i] = lastChar; } lastChar.Word = rootWord; return(lastChar); }
/// <summary> /// Load词频和中文词 /// </summary> /// <param name="wordLength"></param> /// <param name="fs"></param> public static TouchPalWord LoadCountAndWord(int wordLength, FileStream fs, int position) { if (position > 0) { fs.Position = position; } if (GlobalCache.WordList.ContainsKey(position)) { return(GlobalCache.WordList[position]); } var w = new TouchPalWord(); w.Position = position; var temp = new byte[4]; fs.Read(temp, 0, 4); w.Count = BitConverter.ToInt32(temp, 0); var unkonwByte = new byte[1]; fs.Read(unkonwByte, 0, 1); //这里一个字节不知道干什么的 temp = new byte[wordLength * 2]; fs.Read(temp, 0, wordLength * 2); w.ChineseWord = Encoding.Unicode.GetString(temp); GlobalCache.WordList.Add(position, w); return(w); }
private void LoadTree(FileStream fs, TouchPalChar root) { if (root.CountPosition > 0) { GlobalCache.Stackes.Push(root); int wordLength = GlobalCache.Stackes.Count; root.Word = TouchPalWord.LoadCountAndWord(wordLength, fs, root.CountPosition); root.Word.Chars = GlobalCache.Stackes.ToArray(); GlobalCache.Stackes.Pop(); } #if DEBUG DebugPrintData(root); #endif if (root.NextCharPosition > 0) { GlobalCache.Stackes.Push(root); root.NextChar = TouchPalChar.Load(fs, root.NextCharPosition); LoadTree(fs, root.NextChar); } if (root.NextCharPosition > 0 && root.JumpToPosition > 0) { GlobalCache.Stackes.Pop(); } if (root.JumpToPosition > 0) { root.JumpToChar = TouchPalChar.Load(fs, root.JumpToPosition); LoadTree(fs, root.JumpToChar); } if (root.NextCharPosition > 0 && root.JumpToPosition == 0) { GlobalCache.Stackes.Pop(); } //if (root.PrevCharPosition > 0) //{ // root.PrevChar = TouchPalChar.Load(fs, root.PrevCharPosition); //} //if (root.PrevValidCharPosition > 0) //{ // root.PrevValidChar = TouchPalChar.Load(fs, root.PrevValidCharPosition); //} }
/// <summary> /// Load词频和中文词 /// </summary> /// <param name="wordLength"></param> /// <param name="fs"></param> public static TouchPalWord LoadCountAndWord(int wordLength, FileStream fs, int position) { if (position > 0) { fs.Position = position; } if (GlobalCache.WordList.ContainsKey(position)) { return GlobalCache.WordList[position]; } var w = new TouchPalWord(); w.Position = position; var temp = new byte[4]; fs.Read(temp, 0, 4); w.Count = BitConverter.ToInt32(temp, 0); var unkonwByte = new byte[1]; fs.Read(unkonwByte, 0, 1); //这里一个字节不知道干什么的 temp = new byte[wordLength*2]; fs.Read(temp, 0, wordLength*2); w.ChineseWord = Encoding.Unicode.GetString(temp); GlobalCache.WordList.Add(position, w); return w; }
/// <summary> /// 将一个词加入到最后一个节点 /// </summary> /// <param name="rootChar"></param> /// <param name="wl"></param> /// <param name="begin"></param> /// <returns></returns> private TouchPalChar AddWordLink2Char(TouchPalChar rootChar, WordLibrary wl, int begin) { var rootWord = new TouchPalWord {ChineseWord = wl.Word, Count = wl.Count}; TouchPalChar lastChar = rootChar; if (begin > 0) { lastChar = lastChar.Word.Chars[begin - 1]; } var chars = new TouchPalChar[wl.Word.Length]; rootWord.Chars = chars; for (int i = 0; i < begin; i++) { chars[i] = rootChar.Word.Chars[i]; } for (int i = begin; i < wl.Word.Length; i++) { char c = wl.Word[i]; string py = wl.PinYin[i]; var tpc = new TouchPalChar(); tpc.Char = c; tpc.PinyinCode = GlobalCache.PinyinIndexMapping[py]; tpc.WordIndex = i + 1; short s = tpc.IndexAndPinYin; AddChar2Next(lastChar, tpc); lastChar = tpc; chars[i] = lastChar; } lastChar.Word = rootWord; return lastChar; }