Example #1
0
        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x10;
            var phrase_offset_start = BinFileHelper.ReadInt32(fs);
            var phrase_start        = BinFileHelper.ReadInt32(fs);
            var phrase_end          = BinFileHelper.ReadInt32(fs);
            var phrase_count        = BinFileHelper.ReadInt32(fs);

            fs.Position = phrase_offset_start;
            var offsets = ReadOffsets(fs, phrase_count);

            offsets.Add(phrase_end - phrase_start);

            fs.Position = phrase_start;
            for (var i = 0; i < phrase_count; i++)
            {
                var wl = ReadOnePhrase(fs, phrase_start + offsets[i + 1]);
                if (wl != null)
                {
                    pyAndWord.Add(wl);
                }
            }
            return(pyAndWord);
        }
Example #2
0
        private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition)
        {
            WordLibrary wl           = new WordLibrary();
            var         magic        = BinFileHelper.ReadInt32(fs);
            var         hanzi_offset = BinFileHelper.ReadInt16(fs);

            wl.Rank = fs.ReadByte();
            var x6           = fs.ReadByte();               //不知道干啥的
            var unknown8     = BinFileHelper.ReadInt64(fs); //新增的,不知道什么意思
            var pyBytesLen   = hanzi_offset - 18;
            var pyBytes      = BinFileHelper.ReadArray(fs, pyBytesLen);
            var pyStr        = Encoding.Unicode.GetString(pyBytes);
            var split        = BinFileHelper.ReadInt16(fs);              //00 00 分割拼音和汉字
            var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00
            var wordBytes    = BinFileHelper.ReadArray(fs, wordBytesLen);

            BinFileHelper.ReadInt16(fs); //00 00分割
            var word = Encoding.Unicode.GetString(wordBytes);

            wl.Word = word;
            try
            {
                wl.SetPinyinString(pyStr);
                wl.CodeType = CodeType.Pinyin;
            }
            catch
            {
                wl.CodeType = CodeType.NoCode;
                ImportLineErrorNotice?.Invoke(wl.Word + " 的编码缺失");
            }

            return(wl);
        }
Example #3
0
        private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition)
        {
            WordLibrary wl           = new WordLibrary();
            var         magic        = BinFileHelper.ReadInt32(fs);
            var         hanzi_offset = BinFileHelper.ReadInt16(fs);

            wl.Rank = fs.ReadByte();
            var x6           = fs.ReadByte();               //不知道干啥的
            var unknown8     = BinFileHelper.ReadInt64(fs); //新增的,不知道什么意思
            var pyBytesLen   = hanzi_offset - 18;
            var pyBytes      = BinFileHelper.ReadArray(fs, pyBytesLen);
            var wubiStr      = Encoding.Unicode.GetString(pyBytes);
            var split        = BinFileHelper.ReadInt16(fs);              //00 00 分割拼音和汉字
            var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00
            var wordBytes    = BinFileHelper.ReadArray(fs, wordBytesLen);

            BinFileHelper.ReadInt16(fs); //00 00分割
            var word = Encoding.Unicode.GetString(wordBytes);

            wl.Word = word;
            try
            {
                wl.SetCode(CodeType.Wubi98, wubiStr);
            }
            catch
            {
                return(null);
            }
            wl.CodeType = CodeType.Wubi98;
            return(wl);
        }
        //{0x05 2word

        //4字节使用同一个拼音的词条数x,2字节拼音长度n,n字节拼音的编号,(2字节汉字的长度y,y*2字节汉字的内容Unicode编码,2字节词频,2字节未知,4字节未知)*x

        #region IWordLibraryImport Members

        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x44;
            CountWord   = BinFileHelper.ReadInt32(fs);
            var segmentCount = BinFileHelper.ReadInt32(fs); //分为几段

            CurrentStatus = 0;
            for (int i = 0; i < segmentCount; i++)
            {
                try
                {
                    fs.Position = 0xC00 + 1024 * i;
                    Segment segment = new Segment(fs);
                    pyAndWord.AddWordLibraryList(segment.WordLibraryList);
                    CurrentStatus += segment.WordLibraryList.Count;
                }
                catch (Exception e)
                {
                    Debug.WriteLine(e.Message);
                }
            }



            return(pyAndWord);
        }
        private IList <WordLibrary> ReadAPinyinWord(FileStream fs)
        {
            var num = new byte[4];

            fs.Read(num, 0, 4);
            int samePYcount = num[0] + num[1] * 256;
            int pinyinLen   = num[2] + num[3] * 256;
            //接下来读拼音
            var str = new byte[256];

            for (int i = 0; i < pinyinLen; i++)
            {
                str[i] = (byte)fs.ReadByte();
            }
            var wordPY = new List <string>();

            for (int i = 0; i < pinyinLen / 2; i++)
            {
                int key = str[i * 2] + str[i * 2 + 1] * 256;
                //Debug.Assert(key < pyDic.Count);
                if (key < pyDic.Count)
                {
                    wordPY.Add(pyDic[key]);
                }
                else
                {
                    wordPY.Add(a2zchar[key - pyDic.Count].ToString());
                }
                //return null; // 用于调试,忽略编码异常的记录,不中止运行
            }
            //wordPY = wordPY.Remove(wordPY.Length - 1); //移除最后一个单引号
            //接下来读词语
            var pyAndWord = new List <WordLibrary>();

            for (int s = 0; s < samePYcount; s++) //同音词,使用前面相同的拼音
            {
                num = new byte[2];
                fs.Read(num, 0, 2);
                int hzBytecount = num[0] + num[1] * 256;
                str = new byte[hzBytecount];
                fs.Read(str, 0, hzBytecount);
                string word     = Encoding.Unicode.GetString(str);
                short  unknown1 = BinFileHelper.ReadInt16(fs); //全部是10,肯定不是词频,具体是什么不知道
                int    unknown2 = BinFileHelper.ReadInt32(fs); //每个字对应的数字不一样,不知道是不是词频
                pyAndWord.Add(new WordLibrary {
                    Word = word, PinYin = wordPY.ToArray(), Rank = DefaultRank
                });
                CurrentStatus++;
                //接下来10个字节什么意思呢?暂时先忽略了
                var temp = new byte[6];
                for (int i = 0; i < 6; i++)
                {
                    temp[i] = (byte)fs.ReadByte();
                }
            }
            return(pyAndWord);
        }
        public WordLibraryList Import(string path)
        {
            int endPosition     = 0;
            var wordLibraryList = new WordLibraryList();
            var fs = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position   = 0x60;
            endPosition   = BinFileHelper.ReadInt32(fs);
            fs.Position   = 0x350;
            CurrentStatus = 0;
            do
            {
                //CurrentStatus++;
                try
                {
                    WordLibrary wl = ImportWord(fs);
                    if (wl == null)
                    {
                        break;
                    }
                    if (wl.Word != "" && wl.PinYin.Length > 0)
                    {
                        wordLibraryList.Add(wl);
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
            } while (fs.Position != endPosition); //< fs.Length
            fs.Close();
            //StreamWriter sw=new StreamWriter("D:\\py.txt",true,Encoding.Unicode);
            //SinglePinyin singlePinyin=new SinglePinyin();

            //foreach (var cpy in CharAndPinyin)
            //{
            //    var py = "";
            //    try
            //    {
            //        py = singlePinyin.GetPinYinOfChar(cpy.Key)[0];
            //    }
            //    catch
            //    {
            //        Debug.Write(cpy.Key);
            //    }
            //    sw.WriteLine(cpy.Key+"\t"+ py+"\t"+cpy.Value);
            //}
            //sw.Close();

            //wordLibraryList.ForEach(delegate(WordLibrary wl) { if(wl.Word==""||wl.PinYin.Length==0)
            //{
            //    Debug.WriteLine(wl.ToDisplayString());
            //}
            //});

            return(wordLibraryList);
        }
        private IDictionary <string, string> ReadDictionary(FileStream fs, int offsetWithIndex)
        {
            fs.Position = offsetWithIndex;
            int type = BinFileHelper.ReadInt32(fs);

            Debug.WriteLine("词典类型:0x" + type);
            int limit       = BinFileHelper.ReadInt32(fs) + offsetWithIndex + 8;        //文件结束地址
            int offsetIndex = offsetWithIndex + 0x1C;                                   //索引开始的地址
            int offsetCompressedDataHeader = BinFileHelper.ReadInt32(fs) + offsetIndex; //索引结束,数据头地址
            int inflatedWordsIndexLength   = BinFileHelper.ReadInt32(fs);
            int inflatedWordsLength        = BinFileHelper.ReadInt32(fs);
            int inflatedXmlLength          = BinFileHelper.ReadInt32(fs);
            int definitions    = (offsetCompressedDataHeader - offsetIndex) / 4;
            var deflateStreams = new List <int>();

            fs.Position = offsetCompressedDataHeader + 8;
            int offset = BinFileHelper.ReadInt32(fs);

            while (offset + fs.Position < limit)
            {
                offset = BinFileHelper.ReadInt32(fs);
                deflateStreams.Add(offset);
            }
            long offsetCompressedData = fs.Position;

            Debug.WriteLine("索引词组数目:" + definitions);

            //CountWord = definitions;

            Debug.WriteLine("索引地址/大小:0x" + offsetIndex.ToString("x") + " / "
                            + (offsetCompressedDataHeader - offsetIndex).ToString("x") + " B");
            Debug.WriteLine("压缩数据地址/大小:0x" + (offsetCompressedData).ToString("x") + " / "
                            + (limit - offsetCompressedData).ToString("x") + " B");
            Debug.WriteLine("词组索引地址/大小(解压缩后):0x0 / " + inflatedWordsIndexLength.ToString("x") + " B");
            Debug.WriteLine("词组地址/大小(解压缩后):0x" + (inflatedWordsIndexLength).ToString("x") + " / "
                            + inflatedWordsLength.ToString("x") + " B");
            Debug.WriteLine("XML地址/大小(解压缩后):0x" + (inflatedWordsIndexLength + inflatedWordsLength).ToString("x")
                            + " / " + inflatedXmlLength.ToString("x") + " B");
            Debug.WriteLine("文件大小(解压缩后):" + (inflatedWordsIndexLength + inflatedWordsLength + inflatedXmlLength) / 1024
                            + " KB");

            byte[] inflatedFile = Inflate(fs, offsetCompressedData, deflateStreams);



            //fs.Position = offsetIndex;
            //var idxArray = new int[definitions];
            //for (int i = 0; i < definitions; i++)
            //{
            //    idxArray[i] = BinFileHelper.ReadInt32(fs);
            //}


            return(Extract(inflatedFile, inflatedWordsIndexLength,
                           inflatedWordsIndexLength + inflatedWordsLength));
        }
Example #8
0
        private IList <int> ReadOffsets(FileStream fs, int count)
        {
            var result = new List <int>();

            for (var i = 0; i < count; i++)
            {
                var offset = BinFileHelper.ReadInt32(fs);
                result.Add(offset);
            }
            return(result);
        }
Example #9
0
        //4字节使用同一个拼音的词条数x,2字节拼音长度n,n字节拼音的编号,(2字节汉字的长度y,y*2字节汉字的内容Unicode编码,2字节词频,2字节未知,4字节未知)*x

        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position   = 0x18;
            CountWord     = BinFileHelper.ReadInt32(fs);
            CurrentStatus = 0;
            fs.Position   = 0x30;

            while (CurrentStatus < CountWord)
            {
                int   samePyCount = BinFileHelper.ReadInt16(fs);
                int   unkown1     = BinFileHelper.ReadInt16(fs);
                short pyLength    = BinFileHelper.ReadInt16(fs);
                var   pyArray     = new string[pyLength / 2];
                for (int i = 0; i < pyLength / 2; i++)
                {
                    short idx = BinFileHelper.ReadInt16(fs);
                    try
                    {
                        pyArray[i] = PinYinDic[idx];
                    }
                    catch
                    {
                        pyArray[i] = "--";
                    }
                }
                for (int i = 0; i < samePyCount; i++)
                {
                    short wordByteLength = BinFileHelper.ReadInt16(fs);
                    var   wordArray      = new byte[wordByteLength];
                    fs.Read(wordArray, 0, wordByteLength);
                    string word    = Encoding.Unicode.GetString(wordArray);
                    short  count   = BinFileHelper.ReadInt16(fs);
                    short  count2  = BinFileHelper.ReadInt16(fs);
                    int    unknown = BinFileHelper.ReadInt32(fs); //不知道干啥的
                    if (pyArray.Length == word.Length)
                    {
                        var wl = new WordLibrary {
                            Rank = count, Word = word, PinYin = pyArray
                        };
                        pyAndWord.Add(wl);
                    }
                    else
                    {
                        Debug.WriteLine("Error data: word:[" + word + "] pinyin:[" + string.Join(",", pyArray) + "]");
                    }
                    CurrentStatus++;
                }
            }
            return(pyAndWord);
        }
            public void Parse(FileStream fs)
            {
                Offset    = BinFileHelper.ReadInt32(fs);
                Frequency = BinFileHelper.ReadUInt16(fs);
                AFlag     = BinFileHelper.ReadUInt16(fs);
                I8        = BinFileHelper.ReadUInt32(fs);
                P1        = BinFileHelper.ReadUInt16(fs);
                IE        = BinFileHelper.ReadInt32(fs);

                // Advance
                fs.Seek(4, SeekOrigin.Current);
            }
Example #11
0
        public IList <InternalWord> Parse(string ld2File)
        {
            using (var fs = new FileStream(ld2File, FileMode.Open, FileAccess.Read))
            {
                Debug.WriteLine("文件:" + ld2File);
                byte[] bs = BinFileHelper.ReadArray(fs, 4);
                string v  = Encoding.ASCII.GetString(bs);
                Debug.WriteLine("类型:" + v);
                fs.Position = 0x18;
                Debug.WriteLine("版本:" + BinFileHelper.ReadInt16(fs) + "." + BinFileHelper.ReadInt16(fs));
                Debug.WriteLine("ID: 0x" + (BinFileHelper.ReadInt64(fs).ToString("x")));

                fs.Position = 0x5c;
                int offsetData = BinFileHelper.ReadInt32(fs) + 0x60;
                if (fs.Length > offsetData)
                {
                    Debug.WriteLine("简介地址:0x" + (offsetData).ToString("x"));
                    fs.Position = offsetData;
                    int type = BinFileHelper.ReadInt32(fs);
                    Debug.WriteLine("简介类型:0x" + (type).ToString("x"));
                    fs.Position = offsetData + 4;
                    int offsetWithInfo = BinFileHelper.ReadInt32(fs) + offsetData + 12;
                    if (type == 3)
                    {
                        // without additional information
                        return(ReadDictionary(fs, offsetData));
                    }
                    else if (fs.Length > offsetWithInfo - 0x1C)
                    {
                        return(ReadDictionary(fs, offsetWithInfo));
                    }
                    else
                    {
                        Debug.WriteLine("文件不包含字典数据。网上字典?");
                    }
                }
                else
                {
                    Debug.WriteLine("文件不包含字典数据。网上字典?");
                }


                return(null);
            }
        }
Example #12
0
        public static Dictionary <string, string> ReadScelInfo(string path)
        {
            Dictionary <string, string> info = new Dictionary <string, string>();
            var fs = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x124;
            var CountWord = BinFileHelper.ReadInt32(fs);

            info.Add("CountWord", CountWord.ToString());

            info.Add("Name", readScelFieldText(fs, 0x130));
            info.Add("Type", readScelFieldText(fs, 0x338));
            info.Add("Info", readScelFieldText(fs, 0x540, 1024));
            info.Add("Sample", readScelFieldText(fs, 0xd40, 1024));

            fs.Close();
            return(info);
        }
        public Segment(Stream stream)
        {
            IndexNumber = BinFileHelper.ReadInt32(stream);
            var ff = BinFileHelper.ReadInt32(stream);

            WordLenEnums = BinFileHelper.ReadInt32(stream);
            WordByteLen  = BinFileHelper.ReadInt32(stream);

            WordLibraryList = new WordLibraryList();
            int  lenB   = 0;
            long startP = stream.Position;

            do
            {
                int l;
                var wl = Parse(stream, out l);
                lenB += l;
                if (wl != null)
                {
                    WordLibraryList.Add(wl);
                }
            } while (lenB < WordByteLen);
        }
Example #14
0
        private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition)
        {
            WordLibrary wl           = new WordLibrary();
            var         magic        = BinFileHelper.ReadInt32(fs);
            var         hanzi_offset = BinFileHelper.ReadInt16(fs);

            wl.Rank = fs.ReadByte();
            var x6           = fs.ReadByte();//不知道干啥的
            var pyBytesLen   = hanzi_offset - 10;
            var pyBytes      = BinFileHelper.ReadArray(fs, pyBytesLen);
            var pyStr        = Encoding.Unicode.GetString(pyBytes);
            var split        = BinFileHelper.ReadInt16(fs);              //00 00 分割拼音和汉字
            var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00
            var wordBytes    = BinFileHelper.ReadArray(fs, wordBytesLen);

            BinFileHelper.ReadInt16(fs);//00 00分割
            var word = Encoding.Unicode.GetString(wordBytes);

            wl.Word = word;
            wl.SetPinyinString(pyStr);
            wl.CodeType = CodeType.Pinyin;
            return(wl);
        }
 public void Parse(FileStream fs)
 {
     Offset      = BinFileHelper.ReadInt32(fs);
     Count       = BinFileHelper.ReadInt32(fs);
     EndPosition = fs.Position;
 }
Example #16
0
        private WordLibraryList ReadQcel(string path)
        {
            pyDic = new Dictionary <int, string>();
            //Dictionary<string, string> pyAndWord = new Dictionary<string, string>();
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);
            var str       = new byte[128];
            var outstr    = new byte[128];

            byte[] num;
            //以下代码调试用的
            //fs.Position = 0x2628;
            //byte[] debug = new byte[50000];
            //fs.Read(debug, 0, 50000);
            //string txt = Encoding.Unicode.GetString(debug);

            //调试用代码结束

            // int hzPosition = 0;
            fs.Read(str, 0, 128); //\x40\x15\x00\x00\x44\x43\x53\x01
            // if (str[4] == 0x44)
            // {
            //     hzPosition = 0x2628;
            // }
            // if (str[4] == 0x45)
            // {
            //     hzPosition = 0x26C4;
            // }

            fs.Position   = 0x124;
            CountWord     = BinFileHelper.ReadInt32(fs);
            CurrentStatus = 0;

            //fs.Position = 0x130;
            //fs.Read(str, 0, 64);
            //string txt = Encoding.Unicode.GetString(str);
            ////Console.WriteLine("字库名称:" + txt);
            //fs.Position = 0x338;
            //fs.Read(str, 0, 64);
            ////Console.WriteLine("字库类别:" + Encoding.Unicode.GetString(str));

            //fs.Position = 0x540;
            //fs.Read(str, 0, 64);
            ////Console.WriteLine("字库信息:" + Encoding.Unicode.GetString(str));

            //fs.Position = 0xd40;
            //fs.Read(str, 0, 64);
            ////Console.WriteLine("字库示例:" + Encoding.Unicode.GetString(str));

            fs.Position = 0x1540;
            str         = new byte[4];
            fs.Read(str, 0, 4); //\x9D\x01\x00\x00
            while (true)
            {
                num = new byte[4];
                fs.Read(num, 0, 4);
                int mark = num[0] + num[1] * 256;
                str = new byte[num[2]];
                fs.Read(str, 0, (num[2]));
                string py = Encoding.Unicode.GetString(str);
                //py = py.Substring(0, py.IndexOf('\0'));
                pyDic.Add(mark, py);
                if (py == "zuo") //最后一个拼音
                {
                    break;
                }
            }
            var s = new StringBuilder();

            foreach (string value in pyDic.Values)
            {
                s.Append(value + "\",\"");
            }
            Debug.WriteLine(s.ToString());


            fs.Position = 0x2628;
            //fs.Position = hzPosition;

            while (true)
            {
                try
                {
                    pyAndWord.AddRange(ReadAPinyinWord(fs));
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
                if (fs.Length == fs.Position) //判断文件结束
                {
                    fs.Close();
                    break;
                }
            }
            return(pyAndWord);
            //var sb = new StringBuilder();
            //foreach (WordLibrary w in pyAndWord)
            //{
            //    sb.AppendLine("'" + w.PinYinString + " " + w.Word); //以搜狗文本词库的方式返回
            //}
            //return sb.ToString();
        }
 public void Parse(FileStream fs)
 {
     Offset       = BinFileHelper.ReadUInt32(fs);
     DataSize     = BinFileHelper.ReadInt32(fs);
     UsedDataSize = BinFileHelper.ReadInt32(fs);
 }
Example #18
0
        private WordLibraryList ReadScel(string path)
        {
            pyDic = new Dictionary <int, string>();
            //Dictionary<string, string> pyAndWord = new Dictionary<string, string>();
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);
            var str       = new byte[128];
            var outstr    = new byte[128];

            byte[] num;

            //调试用代码结束

            int hzPosition = 0;

            fs.Read(str, 0, 128); //\x40\x15\x00\x00\x44\x43\x53\x01
            if (str[4] == 0x44)
            {
                hzPosition = 0x2628;
            }

            if (str[4] == 0x45)
            {
                hzPosition = 0x26C4;
            }

            fs.Position   = 0x124;
            CountWord     = BinFileHelper.ReadInt32(fs);
            CurrentStatus = 0;

            fs.Position = 0x1540;
            str         = new byte[4];
            fs.Read(str, 0, 4); //\x9D\x01\x00\x00
            while (true)
            {
                num = new byte[4];
                fs.Read(num, 0, 4);
                int mark = num[0] + num[1] * 256;
                str = new byte[128];
                fs.Read(str, 0, (num[2]));
                string py = Encoding.Unicode.GetString(str);
                py = py.Substring(0, py.IndexOf('\0'));
                pyDic.Add(mark, py);
                if (py == "zuo") //最后一个拼音
                {
                    break;
                }
            }

            var s = new StringBuilder();

            foreach (string value in pyDic.Values)
            {
                s.Append(value + "\",\"");
            }

            Debug.WriteLine(s.ToString());


            //fs.Position = 0x2628;
            fs.Position = hzPosition;

            while (true)
            {
                try
                {
                    pyAndWord.AddRange(ReadAPinyinWord(fs));
                }
                catch (System.Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }

                if (fs.Length == fs.Position) //判断文件结束
                {
                    fs.Close();
                    break;
                }
            }

            return(pyAndWord);
        }
Example #19
0
        private string ParseQpyd(string qqydFile)
        {
            var fs = new FileStream(qqydFile, FileMode.Open, FileAccess.Read);

            fs.Position = 0x38;
            var startAddressByte = new byte[4];

            fs.Read(startAddressByte, 0, 4);
            int startAddress = BitConverter.ToInt32(startAddressByte, 0);

            fs.Position = 0x44;
            int wordCount = BinFileHelper.ReadInt32(fs);

            CountWord     = wordCount;
            CurrentStatus = 0;

            fs.Position = startAddress;
            var zipStream = new InflaterInputStream(fs);


            int bufferSize = 2048; //缓冲区大小
            int readCount  = 0;    //读入缓冲区的实际字节
            var buffer     = new byte[bufferSize];
            var byteList   = new List <byte>();

            readCount = zipStream.Read(buffer, 0, bufferSize);
            while (readCount > 0)
            {
                for (int i = 0; i < readCount; i++)
                {
                    byteList.Add(buffer[i]);
                }
                readCount = zipStream.Read(buffer, 0, bufferSize);
            }
            zipStream.Close();
            zipStream.Dispose();
            fs.Close();

            byte[] byteArray = byteList.ToArray();

            int unzippedDictStartAddr = -1;
            int idx = 0;
            var sb  = new StringBuilder();

            while (unzippedDictStartAddr == -1 || idx < unzippedDictStartAddr)
            {
                // read word

                int pinyinStartAddr = BitConverter.ToInt32(byteArray, idx + 0x6);
                int pinyinLength    = BitConverter.ToInt32(byteArray, idx + 0x0) & 0xff;
                int wordStartAddr   = pinyinStartAddr + pinyinLength;
                int wordLength      = BitConverter.ToInt32(byteArray, idx + 0x1) & 0xff;
                if (unzippedDictStartAddr == -1)
                {
                    unzippedDictStartAddr = pinyinStartAddr;
                    Debug.WriteLine("词库地址(解压后):0x" + unzippedDictStartAddr.ToString("0x") + "\n");
                }

                string pinyin = Encoding.UTF8.GetString(byteArray, pinyinStartAddr, pinyinLength);
                string word   = Encoding.Unicode.GetString(byteArray, wordStartAddr, wordLength);
                sb.Append(word + "\t" + pinyin + "\n");
                Debug.WriteLine(word + "\t" + pinyin);
                CurrentStatus++;
                // step up
                idx += 0xa;
            }
            return(sb.ToString());
        }