Exemplo n.º 1
0
        private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition)
        {
            WordLibrary wl           = new WordLibrary();
            var         magic        = BinFileHelper.ReadInt32(fs);
            var         hanzi_offset = BinFileHelper.ReadInt16(fs);

            wl.Rank = fs.ReadByte();
            var x6           = fs.ReadByte();               //不知道干啥的
            var unknown8     = BinFileHelper.ReadInt64(fs); //新增的,不知道什么意思
            var pyBytesLen   = hanzi_offset - 18;
            var pyBytes      = BinFileHelper.ReadArray(fs, pyBytesLen);
            var wubiStr      = Encoding.Unicode.GetString(pyBytes);
            var split        = BinFileHelper.ReadInt16(fs);              //00 00 分割拼音和汉字
            var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00
            var wordBytes    = BinFileHelper.ReadArray(fs, wordBytesLen);

            BinFileHelper.ReadInt16(fs); //00 00分割
            var word = Encoding.Unicode.GetString(wordBytes);

            wl.Word = word;
            try
            {
                wl.SetCode(CodeType.Wubi98, wubiStr);
            }
            catch
            {
                return(null);
            }
            wl.CodeType = CodeType.Wubi98;
            return(wl);
        }
Exemplo n.º 2
0
        //{0x05 2word

        //4字节使用同一个拼音的词条数x,2字节拼音长度n,n字节拼音的编号,(2字节汉字的长度y,y*2字节汉字的内容Unicode编码,2字节词频,2字节未知,4字节未知)*x

        #region IWordLibraryImport Members

        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x44;
            CountWord   = BinFileHelper.ReadInt32(fs);
            var segmentCount = BinFileHelper.ReadInt32(fs); //分为几段

            CurrentStatus = 0;
            for (int i = 0; i < segmentCount; i++)
            {
                try
                {
                    fs.Position = 0xC00 + 1024 * i;
                    Segment segment = new Segment(fs);
                    pyAndWord.AddWordLibraryList(segment.WordLibraryList);
                    CurrentStatus += segment.WordLibraryList.Count;
                }
                catch (Exception e)
                {
                    Debug.WriteLine(e.Message);
                }
            }



            return(pyAndWord);
        }
Exemplo n.º 3
0
        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x10;
            var phrase_offset_start = BinFileHelper.ReadInt32(fs);
            var phrase_start        = BinFileHelper.ReadInt32(fs);
            var phrase_end          = BinFileHelper.ReadInt32(fs);
            var phrase_count        = BinFileHelper.ReadInt32(fs);

            fs.Position = phrase_offset_start;
            var offsets = ReadOffsets(fs, phrase_count);

            offsets.Add(phrase_end - phrase_start);

            fs.Position = phrase_start;
            for (var i = 0; i < phrase_count; i++)
            {
                var wl = ReadOnePhrase(fs, phrase_start + offsets[i + 1]);
                if (wl != null)
                {
                    pyAndWord.Add(wl);
                }
            }
            return(pyAndWord);
        }
Exemplo n.º 4
0
        private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition)
        {
            WordLibrary wl           = new WordLibrary();
            var         magic        = BinFileHelper.ReadInt32(fs);
            var         hanzi_offset = BinFileHelper.ReadInt16(fs);

            wl.Rank = fs.ReadByte();
            var x6           = fs.ReadByte();               //不知道干啥的
            var unknown8     = BinFileHelper.ReadInt64(fs); //新增的,不知道什么意思
            var pyBytesLen   = hanzi_offset - 18;
            var pyBytes      = BinFileHelper.ReadArray(fs, pyBytesLen);
            var pyStr        = Encoding.Unicode.GetString(pyBytes);
            var split        = BinFileHelper.ReadInt16(fs);              //00 00 分割拼音和汉字
            var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00
            var wordBytes    = BinFileHelper.ReadArray(fs, wordBytesLen);

            BinFileHelper.ReadInt16(fs); //00 00分割
            var word = Encoding.Unicode.GetString(wordBytes);

            wl.Word = word;
            try
            {
                wl.SetPinyinString(pyStr);
                wl.CodeType = CodeType.Pinyin;
            }
            catch
            {
                wl.CodeType = CodeType.NoCode;
                ImportLineErrorNotice?.Invoke(wl.Word + " 的编码缺失");
            }

            return(wl);
        }
 public void Parse(FileStream fs)
 {
     fs.Seek(fs.Position + 56, SeekOrigin.Begin);
     P2 = BinFileHelper.ReadUInt32(fs);
     P3 = BinFileHelper.ReadUInt32(fs);
     //fs.Seek(fs.Position + 12, SeekOrigin.Begin);
 }
Exemplo n.º 6
0
 private void PrepareSendPackage()
 {
     BinFileHelper.PrepareFile(TxtSelectedFile.Text);
     BinFileHelper.SetReadLength(int.Parse(TxtReadLength.Text));
     BinFileHelper.SetFixBytes(GetPrefixBytes(), GetTailfixBytes());
     BinFileHelper.PrepareNextBytes();
     DisplayPreparedPackageString();
 }
Exemplo n.º 7
0
        private IList <WordLibrary> ReadAPinyinWord(FileStream fs)
        {
            var num = new byte[4];

            fs.Read(num, 0, 4);
            int samePYcount = num[0] + num[1] * 256;
            int pinyinLen   = num[2] + num[3] * 256;
            //接下来读拼音
            var str = new byte[256];

            for (int i = 0; i < pinyinLen; i++)
            {
                str[i] = (byte)fs.ReadByte();
            }
            var wordPY = new List <string>();

            for (int i = 0; i < pinyinLen / 2; i++)
            {
                int key = str[i * 2] + str[i * 2 + 1] * 256;
                //Debug.Assert(key < pyDic.Count);
                if (key < pyDic.Count)
                {
                    wordPY.Add(pyDic[key]);
                }
                else
                {
                    wordPY.Add(a2zchar[key - pyDic.Count].ToString());
                }
                //return null; // 用于调试,忽略编码异常的记录,不中止运行
            }
            //wordPY = wordPY.Remove(wordPY.Length - 1); //移除最后一个单引号
            //接下来读词语
            var pyAndWord = new List <WordLibrary>();

            for (int s = 0; s < samePYcount; s++) //同音词,使用前面相同的拼音
            {
                num = new byte[2];
                fs.Read(num, 0, 2);
                int hzBytecount = num[0] + num[1] * 256;
                str = new byte[hzBytecount];
                fs.Read(str, 0, hzBytecount);
                string word     = Encoding.Unicode.GetString(str);
                short  unknown1 = BinFileHelper.ReadInt16(fs); //全部是10,肯定不是词频,具体是什么不知道
                int    unknown2 = BinFileHelper.ReadInt32(fs); //每个字对应的数字不一样,不知道是不是词频
                pyAndWord.Add(new WordLibrary {
                    Word = word, PinYin = wordPY.ToArray(), Rank = DefaultRank
                });
                CurrentStatus++;
                //接下来10个字节什么意思呢?暂时先忽略了
                var temp = new byte[6];
                for (int i = 0; i < 6; i++)
                {
                    temp[i] = (byte)fs.ReadByte();
                }
            }
            return(pyAndWord);
        }
        public WordLibraryList Import(string path)
        {
            int endPosition     = 0;
            var wordLibraryList = new WordLibraryList();
            var fs = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position   = 0x60;
            endPosition   = BinFileHelper.ReadInt32(fs);
            fs.Position   = 0x350;
            CurrentStatus = 0;
            do
            {
                //CurrentStatus++;
                try
                {
                    WordLibrary wl = ImportWord(fs);
                    if (wl == null)
                    {
                        break;
                    }
                    if (wl.Word != "" && wl.PinYin.Length > 0)
                    {
                        wordLibraryList.Add(wl);
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
            } while (fs.Position != endPosition); //< fs.Length
            fs.Close();
            //StreamWriter sw=new StreamWriter("D:\\py.txt",true,Encoding.Unicode);
            //SinglePinyin singlePinyin=new SinglePinyin();

            //foreach (var cpy in CharAndPinyin)
            //{
            //    var py = "";
            //    try
            //    {
            //        py = singlePinyin.GetPinYinOfChar(cpy.Key)[0];
            //    }
            //    catch
            //    {
            //        Debug.Write(cpy.Key);
            //    }
            //    sw.WriteLine(cpy.Key+"\t"+ py+"\t"+cpy.Value);
            //}
            //sw.Close();

            //wordLibraryList.ForEach(delegate(WordLibrary wl) { if(wl.Word==""||wl.PinYin.Length==0)
            //{
            //    Debug.WriteLine(wl.ToDisplayString());
            //}
            //});

            return(wordLibraryList);
        }
Exemplo n.º 9
0
        private IDictionary <string, string> ReadDictionary(FileStream fs, int offsetWithIndex)
        {
            fs.Position = offsetWithIndex;
            int type = BinFileHelper.ReadInt32(fs);

            Debug.WriteLine("词典类型:0x" + type);
            int limit       = BinFileHelper.ReadInt32(fs) + offsetWithIndex + 8;        //文件结束地址
            int offsetIndex = offsetWithIndex + 0x1C;                                   //索引开始的地址
            int offsetCompressedDataHeader = BinFileHelper.ReadInt32(fs) + offsetIndex; //索引结束,数据头地址
            int inflatedWordsIndexLength   = BinFileHelper.ReadInt32(fs);
            int inflatedWordsLength        = BinFileHelper.ReadInt32(fs);
            int inflatedXmlLength          = BinFileHelper.ReadInt32(fs);
            int definitions    = (offsetCompressedDataHeader - offsetIndex) / 4;
            var deflateStreams = new List <int>();

            fs.Position = offsetCompressedDataHeader + 8;
            int offset = BinFileHelper.ReadInt32(fs);

            while (offset + fs.Position < limit)
            {
                offset = BinFileHelper.ReadInt32(fs);
                deflateStreams.Add(offset);
            }
            long offsetCompressedData = fs.Position;

            Debug.WriteLine("索引词组数目:" + definitions);

            //CountWord = definitions;

            Debug.WriteLine("索引地址/大小:0x" + offsetIndex.ToString("x") + " / "
                            + (offsetCompressedDataHeader - offsetIndex).ToString("x") + " B");
            Debug.WriteLine("压缩数据地址/大小:0x" + (offsetCompressedData).ToString("x") + " / "
                            + (limit - offsetCompressedData).ToString("x") + " B");
            Debug.WriteLine("词组索引地址/大小(解压缩后):0x0 / " + inflatedWordsIndexLength.ToString("x") + " B");
            Debug.WriteLine("词组地址/大小(解压缩后):0x" + (inflatedWordsIndexLength).ToString("x") + " / "
                            + inflatedWordsLength.ToString("x") + " B");
            Debug.WriteLine("XML地址/大小(解压缩后):0x" + (inflatedWordsIndexLength + inflatedWordsLength).ToString("x")
                            + " / " + inflatedXmlLength.ToString("x") + " B");
            Debug.WriteLine("文件大小(解压缩后):" + (inflatedWordsIndexLength + inflatedWordsLength + inflatedXmlLength) / 1024
                            + " KB");

            byte[] inflatedFile = Inflate(fs, offsetCompressedData, deflateStreams);



            //fs.Position = offsetIndex;
            //var idxArray = new int[definitions];
            //for (int i = 0; i < definitions; i++)
            //{
            //    idxArray[i] = BinFileHelper.ReadInt32(fs);
            //}


            return(Extract(inflatedFile, inflatedWordsIndexLength,
                           inflatedWordsIndexLength + inflatedWordsLength));
        }
Exemplo n.º 10
0
        private IList <int> ReadOffsets(FileStream fs, int count)
        {
            var result = new List <int>();

            for (var i = 0; i < count; i++)
            {
                var offset = BinFileHelper.ReadInt32(fs);
                result.Add(offset);
            }
            return(result);
        }
Exemplo n.º 11
0
        /// <summary>
        /// 读取一个词汇的词和解释
        /// </summary>
        /// <param name="inflatedBytes"></param>
        /// <param name="offsetWords"></param>
        /// <param name="offsetXml"></param>
        /// <param name="dataLen"></param>
        /// <param name="wordStringDecoder"></param>
        /// <param name="xmlStringDecoder"></param>
        /// <param name="i"></param>
        /// <returns></returns>
        private InternalWord ReadDefinitionData(byte[] inflatedBytes, int offsetWords,
                                                int offsetXml, int dataLen, Encoding wordStringDecoder,
                                                Encoding xmlStringDecoder, int i)
        {
            var idxData = new int[6];

            GetIdxData(inflatedBytes, dataLen * i, idxData);
            int lastWordPos       = idxData[0];
            int lastXmlPos        = idxData[1];
            int flags             = idxData[2];
            int refs              = idxData[3]; //这个词有多少种解释
            int currentWordOffset = idxData[4]; //词的Offset位置
            int currenXmlOffset   = idxData[5]; //解释XML的Offset位置

            InternalWord word = new InternalWord();

            string xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos);

            if (!string.IsNullOrEmpty(xml))
            {
                word.Descriptions.Add(currenXmlOffset, xml);
            }
            while (refs-- > 0)
            {
                int position = (offsetWords + lastWordPos);
                int ref1     = BitConverter.ToInt32(inflatedBytes, position);
                GetIdxData(inflatedBytes, dataLen * ref1, idxData);
                lastXmlPos      = idxData[1];
                currenXmlOffset = idxData[5];
                //if (string.IsNullOrEmpty(xml))
                //{
                xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos);
                word.Descriptions.Add(currenXmlOffset, xml);
                //}
                //else
                //{
                //    xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos);
                //}
                lastWordPos += 4;
            }
            //defData[1] = xml;

            int position1 = offsetWords + lastWordPos;

            byte[] w = BinFileHelper.ReadArray(inflatedBytes, position1, currentWordOffset - lastWordPos);
            word.Word = wordStringDecoder.GetString(w);
            //if (word == "buy" || word == "bought")
            //{
            //    Debug.Write("Refs:" + currenXmlOffset);
            //}
            //defData[0] = word;
            //return new KeyValuePair<string, string>(word, xml);
            return(word);
        }
            public void Parse(FileStream fs)
            {
                Offset    = BinFileHelper.ReadInt32(fs);
                Frequency = BinFileHelper.ReadUInt16(fs);
                AFlag     = BinFileHelper.ReadUInt16(fs);
                I8        = BinFileHelper.ReadUInt32(fs);
                P1        = BinFileHelper.ReadUInt16(fs);
                IE        = BinFileHelper.ReadInt32(fs);

                // Advance
                fs.Seek(4, SeekOrigin.Current);
            }
Exemplo n.º 13
0
        //4字节使用同一个拼音的词条数x,2字节拼音长度n,n字节拼音的编号,(2字节汉字的长度y,y*2字节汉字的内容Unicode编码,2字节词频,2字节未知,4字节未知)*x

        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position   = 0x18;
            CountWord     = BinFileHelper.ReadInt32(fs);
            CurrentStatus = 0;
            fs.Position   = 0x30;

            while (CurrentStatus < CountWord)
            {
                int   samePyCount = BinFileHelper.ReadInt16(fs);
                int   unkown1     = BinFileHelper.ReadInt16(fs);
                short pyLength    = BinFileHelper.ReadInt16(fs);
                var   pyArray     = new string[pyLength / 2];
                for (int i = 0; i < pyLength / 2; i++)
                {
                    short idx = BinFileHelper.ReadInt16(fs);
                    try
                    {
                        pyArray[i] = PinYinDic[idx];
                    }
                    catch
                    {
                        pyArray[i] = "--";
                    }
                }
                for (int i = 0; i < samePyCount; i++)
                {
                    short wordByteLength = BinFileHelper.ReadInt16(fs);
                    var   wordArray      = new byte[wordByteLength];
                    fs.Read(wordArray, 0, wordByteLength);
                    string word    = Encoding.Unicode.GetString(wordArray);
                    short  count   = BinFileHelper.ReadInt16(fs);
                    short  count2  = BinFileHelper.ReadInt16(fs);
                    int    unknown = BinFileHelper.ReadInt32(fs); //不知道干啥的
                    if (pyArray.Length == word.Length)
                    {
                        var wl = new WordLibrary {
                            Rank = count, Word = word, PinYin = pyArray
                        };
                        pyAndWord.Add(wl);
                    }
                    else
                    {
                        Debug.WriteLine("Error data: word:[" + word + "] pinyin:[" + string.Join(",", pyArray) + "]");
                    }
                    CurrentStatus++;
                }
            }
            return(pyAndWord);
        }
Exemplo n.º 14
0
        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x00;
            var headerstr = "Freeime Dictionary";
            var header    = Encoding.ASCII.GetString(BinFileHelper.ReadArray(fs, headerstr.Length));

            Debug.Assert(header.Equals(headerstr));

            DictCodeType curType;

            fs.Position = 0x23;
            var headerTypeBytes = BinFileHelper.ReadArray(fs, 4);
            var headerTypeStr   = Encoding.Unicode.GetString(headerTypeBytes);

            if (headerTypeStr.Equals("拼音"))
            {
                curType = DictCodeType.Pinyin;
            }
            else if (headerTypeStr.Equals("五笔"))
            {
                curType = DictCodeType.Wubi98;
            }
            else
            {
                throw new NotImplementedException("未知词库,请在反馈中提交文件");
            }

            var phrase_start = 0x1B620; // 'a'词条所在

            fs.Position = phrase_start;
            while (true)
            {
                var wl = ReadOnePhrase(fs, curType);
                if (wl != null)
                {
                    pyAndWord.Add(wl);
                }

                if (fs.Length == fs.Position) //文件结束
                {
                    fs.Close();
                    break;
                }
            }
            return(pyAndWord);
        }
Exemplo n.º 15
0
        private IList <WordLibrary> ReadAPinyinWord(FileStream fs)
        {
            var num = new byte[4];

            fs.Read(num, 0, 4);
            int samePYcount = num[0] + num[1] * 256;
            int count       = num[2] + num[3] * 256;
            //接下来读拼音
            var str = new byte[256];

            for (int i = 0; i < count; i++)
            {
                str[i] = (byte)fs.ReadByte();
            }
            var wordPY = new List <string>();

            for (int i = 0; i < count / 2; i++)
            {
                int key = str[i * 2] + str[i * 2 + 1] * 256;
                wordPY.Add(pyDic[key]);
            }
            //wordPY = wordPY.Remove(wordPY.Length - 1); //移除最后一个单引号
            //接下来读词语
            var pyAndWord = new List <WordLibrary>();

            for (int s = 0; s < samePYcount; s++) //同音词,使用前面相同的拼音
            {
                num = new byte[2];
                fs.Read(num, 0, 2);
                int hzBytecount = num[0] + num[1] * 256;
                str = new byte[hzBytecount];
                fs.Read(str, 0, hzBytecount);
                string word    = Encoding.Unicode.GetString(str);
                short  wlcount = BinFileHelper.ReadInt16(fs);
                pyAndWord.Add(new WordLibrary {
                    Word = word, PinYin = wordPY.ToArray(), Count = wlcount
                });
                CurrentStatus++;
                //接下来10个字节什么意思呢?暂时先忽略了
                var temp = new byte[10];
                for (int i = 0; i < 10; i++)
                {
                    temp[i] = (byte)fs.ReadByte();
                }
            }
            return(pyAndWord);
        }
Exemplo n.º 16
0
        /// <summary>
        /// 读取一个词汇的词和解释
        /// </summary>
        /// <param name="inflatedBytes"></param>
        /// <param name="offsetWords"></param>
        /// <param name="offsetXml"></param>
        /// <param name="dataLen"></param>
        /// <param name="wordStringDecoder"></param>
        /// <param name="xmlStringDecoder"></param>
        /// <param name="i"></param>
        /// <returns>Key为词汇,Value为解释</returns>
        private KeyValuePair <string, string> ReadDefinitionData(byte[] inflatedBytes, int offsetWords,
                                                                 int offsetXml, int dataLen, Encoding wordStringDecoder,
                                                                 Encoding xmlStringDecoder, int i)
        {
            var idxData = new int[6];

            GetIdxData(inflatedBytes, dataLen * i, idxData);
            int lastWordPos       = idxData[0];
            int lastXmlPos        = idxData[1];
            int flags             = idxData[2];
            int refs              = idxData[3];
            int currentWordOffset = idxData[4];
            int currenXmlOffset   = idxData[5];


            string xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos);

            while (refs-- > 0)
            {
                int position = (offsetWords + lastWordPos);
                int ref1     = BitConverter.ToInt32(inflatedBytes, position);
                GetIdxData(inflatedBytes, dataLen * ref1, idxData);
                lastXmlPos      = idxData[1];
                currenXmlOffset = idxData[5];
                if (string.IsNullOrEmpty(xml))
                {
                    xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos);
                }
                else
                {
                    xml =
                        xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos) +
                        ", " + xml;
                }
                lastWordPos += 4;
            }
            //defData[1] = xml;

            int position1 = offsetWords + lastWordPos;

            byte[] w    = BinFileHelper.ReadArray(inflatedBytes, position1, currentWordOffset - lastWordPos);
            string word = wordStringDecoder.GetString(w);

            //defData[0] = word;
            return(new KeyValuePair <string, string>(word, xml));
        }
Exemplo n.º 17
0
        public IList <InternalWord> Parse(string ld2File)
        {
            using (var fs = new FileStream(ld2File, FileMode.Open, FileAccess.Read))
            {
                Debug.WriteLine("文件:" + ld2File);
                byte[] bs = BinFileHelper.ReadArray(fs, 4);
                string v  = Encoding.ASCII.GetString(bs);
                Debug.WriteLine("类型:" + v);
                fs.Position = 0x18;
                Debug.WriteLine("版本:" + BinFileHelper.ReadInt16(fs) + "." + BinFileHelper.ReadInt16(fs));
                Debug.WriteLine("ID: 0x" + (BinFileHelper.ReadInt64(fs).ToString("x")));

                fs.Position = 0x5c;
                int offsetData = BinFileHelper.ReadInt32(fs) + 0x60;
                if (fs.Length > offsetData)
                {
                    Debug.WriteLine("简介地址:0x" + (offsetData).ToString("x"));
                    fs.Position = offsetData;
                    int type = BinFileHelper.ReadInt32(fs);
                    Debug.WriteLine("简介类型:0x" + (type).ToString("x"));
                    fs.Position = offsetData + 4;
                    int offsetWithInfo = BinFileHelper.ReadInt32(fs) + offsetData + 12;
                    if (type == 3)
                    {
                        // without additional information
                        return(ReadDictionary(fs, offsetData));
                    }
                    else if (fs.Length > offsetWithInfo - 0x1C)
                    {
                        return(ReadDictionary(fs, offsetWithInfo));
                    }
                    else
                    {
                        Debug.WriteLine("文件不包含字典数据。网上字典?");
                    }
                }
                else
                {
                    Debug.WriteLine("文件不包含字典数据。网上字典?");
                }


                return(null);
            }
        }
Exemplo n.º 18
0
        public static Dictionary <string, string> ReadScelInfo(string path)
        {
            Dictionary <string, string> info = new Dictionary <string, string>();
            var fs = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x124;
            var CountWord = BinFileHelper.ReadInt32(fs);

            info.Add("CountWord", CountWord.ToString());

            info.Add("Name", readScelFieldText(fs, 0x130));
            info.Add("Type", readScelFieldText(fs, 0x338));
            info.Add("Info", readScelFieldText(fs, 0x540, 1024));
            info.Add("Sample", readScelFieldText(fs, 0xd40, 1024));

            fs.Close();
            return(info);
        }
Exemplo n.º 19
0
        private WordLibrary ReadOnePhrase(FileStream fs, DictCodeType type)
        {
            WordLibrary wl           = new WordLibrary();
            var         codeBytesLen = fs.ReadByte();
            var         wordBytesLen = fs.ReadByte();
            var         split        = fs.ReadByte();

            // 0x64对应正常词组(包含中英混拼,如"阿Q")。
            Debug.Assert(split.Equals(0x64) || split.Equals(0x32) ||
                         split.Equals(0x10) || split.Equals(0x66) ||
                         split.Equals(0x67)); // 0x67: "$X[计算器]calc"
            var codeBytes = BinFileHelper.ReadArray(fs, codeBytesLen);
            var codeStr   = Encoding.ASCII.GetString(codeBytes);

            var wordBytes = BinFileHelper.ReadArray(fs, wordBytesLen);
            var word      = Encoding.Unicode.GetString(wordBytes);

            if (split.Equals(0x32))          // 如“醃(腌)”,后者是相应简化字?
            {
                word = word.Substring(0, 1); // 暂定只取首字
            }
            Debug.Assert(word.IndexOf("(") < 0);
            wl.Word = word;
            try
            {
                if (type == DictCodeType.Pinyin)
                {
                    wl.CodeType = CodeType.Pinyin;
                    wl.SetPinyinString(codeStr);
                }
                else if (type == DictCodeType.Wubi98)
                {
                    wl.CodeType = CodeType.Wubi98;
                    wl.SetCode(CodeType.Wubi98, codeStr);
                }
            }
            catch
            {
                wl.CodeType = CodeType.NoCode;
                ImportLineErrorNotice?.Invoke(wl.Word + " 的编码缺失");
            }
            return(wl);
        }
Exemplo n.º 20
0
 private void SendPreparedBytes()
 {
     if (!BinFileHelper.SendBytesPrepared)
     {
         LblMessages.Content = $"小姐姐们还没准备好!事情是这样的:{BinFileHelper.GetOperateMessage()}。时间:{DateTime.Now:HH:mm:ss fff}";
         return;
     }
     if (!SerialPortHelper.SendBytes(BinFileHelper.GetPreparedBytes()))
     {
         LblMessages.Content = $"发送失败了,你猜是为什么?消息是这样的:{SerialPortHelper.LastException()}。时间:{DateTime.Now:HH:mm:ss fff}";
         return;
     }
     TxtSendBytes.AppendText(BinFileHelper.GetPreparedString() + "\r\n\r\n");
     TxtSendBytes.ScrollToEnd();
     LblSendedBytesCount.Content = BinFileHelper.CountAlreadySend().ToString();
     LblWaitForSend.Content      = BinFileHelper.CountExisted().ToString();
     BinFileHelper.PrepareNextBytes();
     DisplayPreparedPackageString();
 }
Exemplo n.º 21
0
        public Segment(Stream stream)
        {
            IndexNumber = BinFileHelper.ReadInt32(stream);
            var ff = BinFileHelper.ReadInt32(stream);

            WordLenEnums = BinFileHelper.ReadInt32(stream);
            WordByteLen  = BinFileHelper.ReadInt32(stream);

            WordLibraryList = new WordLibraryList();
            int  lenB   = 0;
            long startP = stream.Position;

            do
            {
                int l;
                var wl = Parse(stream, out l);
                lenB += l;
                if (wl != null)
                {
                    WordLibraryList.Add(wl);
                }
            } while (lenB < WordByteLen);
        }
Exemplo n.º 22
0
        private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition)
        {
            WordLibrary wl           = new WordLibrary();
            var         magic        = BinFileHelper.ReadInt32(fs);
            var         hanzi_offset = BinFileHelper.ReadInt16(fs);

            wl.Rank = fs.ReadByte();
            var x6           = fs.ReadByte();//不知道干啥的
            var pyBytesLen   = hanzi_offset - 10;
            var pyBytes      = BinFileHelper.ReadArray(fs, pyBytesLen);
            var pyStr        = Encoding.Unicode.GetString(pyBytes);
            var split        = BinFileHelper.ReadInt16(fs);              //00 00 分割拼音和汉字
            var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00
            var wordBytes    = BinFileHelper.ReadArray(fs, wordBytesLen);

            BinFileHelper.ReadInt16(fs);//00 00分割
            var word = Encoding.Unicode.GetString(wordBytes);

            wl.Word = word;
            wl.SetPinyinString(pyStr);
            wl.CodeType = CodeType.Pinyin;
            return(wl);
        }
Exemplo n.º 23
0
 public void Parse(FileStream fs)
 {
     Offset       = BinFileHelper.ReadUInt32(fs);
     DataSize     = BinFileHelper.ReadInt32(fs);
     UsedDataSize = BinFileHelper.ReadInt32(fs);
 }
Exemplo n.º 24
0
        private WordLibraryList ReadQcel(string path)
        {
            pyDic = new Dictionary <int, string>();
            //Dictionary<string, string> pyAndWord = new Dictionary<string, string>();
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);
            var str       = new byte[128];
            var outstr    = new byte[128];

            byte[] num;
            //以下代码调试用的
            //fs.Position = 0x2628;
            //byte[] debug = new byte[50000];
            //fs.Read(debug, 0, 50000);
            //string txt = Encoding.Unicode.GetString(debug);

            //调试用代码结束

            // int hzPosition = 0;
            fs.Read(str, 0, 128); //\x40\x15\x00\x00\x44\x43\x53\x01
            // if (str[4] == 0x44)
            // {
            //     hzPosition = 0x2628;
            // }
            // if (str[4] == 0x45)
            // {
            //     hzPosition = 0x26C4;
            // }

            fs.Position   = 0x124;
            CountWord     = BinFileHelper.ReadInt32(fs);
            CurrentStatus = 0;

            //fs.Position = 0x130;
            //fs.Read(str, 0, 64);
            //string txt = Encoding.Unicode.GetString(str);
            ////Console.WriteLine("字库名称:" + txt);
            //fs.Position = 0x338;
            //fs.Read(str, 0, 64);
            ////Console.WriteLine("字库类别:" + Encoding.Unicode.GetString(str));

            //fs.Position = 0x540;
            //fs.Read(str, 0, 64);
            ////Console.WriteLine("字库信息:" + Encoding.Unicode.GetString(str));

            //fs.Position = 0xd40;
            //fs.Read(str, 0, 64);
            ////Console.WriteLine("字库示例:" + Encoding.Unicode.GetString(str));

            fs.Position = 0x1540;
            str         = new byte[4];
            fs.Read(str, 0, 4); //\x9D\x01\x00\x00
            while (true)
            {
                num = new byte[4];
                fs.Read(num, 0, 4);
                int mark = num[0] + num[1] * 256;
                str = new byte[num[2]];
                fs.Read(str, 0, (num[2]));
                string py = Encoding.Unicode.GetString(str);
                //py = py.Substring(0, py.IndexOf('\0'));
                pyDic.Add(mark, py);
                if (py == "zuo") //最后一个拼音
                {
                    break;
                }
            }
            var s = new StringBuilder();

            foreach (string value in pyDic.Values)
            {
                s.Append(value + "\",\"");
            }
            Debug.WriteLine(s.ToString());


            fs.Position = 0x2628;
            //fs.Position = hzPosition;

            while (true)
            {
                try
                {
                    pyAndWord.AddRange(ReadAPinyinWord(fs));
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
                if (fs.Length == fs.Position) //判断文件结束
                {
                    fs.Close();
                    break;
                }
            }
            return(pyAndWord);
            //var sb = new StringBuilder();
            //foreach (WordLibrary w in pyAndWord)
            //{
            //    sb.AppendLine("'" + w.PinYinString + " " + w.Word); //以搜狗文本词库的方式返回
            //}
            //return sb.ToString();
        }
Exemplo n.º 25
0
        private WordLibraryList ReadScel(string path)
        {
            pyDic = new Dictionary <int, string>();
            //Dictionary<string, string> pyAndWord = new Dictionary<string, string>();
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);
            var str       = new byte[128];
            var outstr    = new byte[128];

            byte[] num;

            //调试用代码结束

            int hzPosition = 0;

            fs.Read(str, 0, 128); //\x40\x15\x00\x00\x44\x43\x53\x01
            if (str[4] == 0x44)
            {
                hzPosition = 0x2628;
            }

            if (str[4] == 0x45)
            {
                hzPosition = 0x26C4;
            }

            fs.Position   = 0x124;
            CountWord     = BinFileHelper.ReadInt32(fs);
            CurrentStatus = 0;

            fs.Position = 0x1540;
            str         = new byte[4];
            fs.Read(str, 0, 4); //\x9D\x01\x00\x00
            while (true)
            {
                num = new byte[4];
                fs.Read(num, 0, 4);
                int mark = num[0] + num[1] * 256;
                str = new byte[128];
                fs.Read(str, 0, (num[2]));
                string py = Encoding.Unicode.GetString(str);
                py = py.Substring(0, py.IndexOf('\0'));
                pyDic.Add(mark, py);
                if (py == "zuo") //最后一个拼音
                {
                    break;
                }
            }

            var s = new StringBuilder();

            foreach (string value in pyDic.Values)
            {
                s.Append(value + "\",\"");
            }

            Debug.WriteLine(s.ToString());


            //fs.Position = 0x2628;
            fs.Position = hzPosition;

            while (true)
            {
                try
                {
                    pyAndWord.AddRange(ReadAPinyinWord(fs));
                }
                catch (System.Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }

                if (fs.Length == fs.Position) //判断文件结束
                {
                    fs.Close();
                    break;
                }
            }

            return(pyAndWord);
        }
Exemplo n.º 26
0
        private string ParseQpyd(string qqydFile)
        {
            var fs = new FileStream(qqydFile, FileMode.Open, FileAccess.Read);

            fs.Position = 0x38;
            var startAddressByte = new byte[4];

            fs.Read(startAddressByte, 0, 4);
            int startAddress = BitConverter.ToInt32(startAddressByte, 0);

            fs.Position = 0x44;
            int wordCount = BinFileHelper.ReadInt32(fs);

            CountWord     = wordCount;
            CurrentStatus = 0;

            fs.Position = startAddress;
            var zipStream = new InflaterInputStream(fs);


            int bufferSize = 2048; //缓冲区大小
            int readCount  = 0;    //读入缓冲区的实际字节
            var buffer     = new byte[bufferSize];
            var byteList   = new List <byte>();

            readCount = zipStream.Read(buffer, 0, bufferSize);
            while (readCount > 0)
            {
                for (int i = 0; i < readCount; i++)
                {
                    byteList.Add(buffer[i]);
                }
                readCount = zipStream.Read(buffer, 0, bufferSize);
            }
            zipStream.Close();
            zipStream.Dispose();
            fs.Close();

            byte[] byteArray = byteList.ToArray();

            int unzippedDictStartAddr = -1;
            int idx = 0;
            var sb  = new StringBuilder();

            while (unzippedDictStartAddr == -1 || idx < unzippedDictStartAddr)
            {
                // read word

                int pinyinStartAddr = BitConverter.ToInt32(byteArray, idx + 0x6);
                int pinyinLength    = BitConverter.ToInt32(byteArray, idx + 0x0) & 0xff;
                int wordStartAddr   = pinyinStartAddr + pinyinLength;
                int wordLength      = BitConverter.ToInt32(byteArray, idx + 0x1) & 0xff;
                if (unzippedDictStartAddr == -1)
                {
                    unzippedDictStartAddr = pinyinStartAddr;
                    Debug.WriteLine("词库地址(解压后):0x" + unzippedDictStartAddr.ToString("0x") + "\n");
                }

                string pinyin = Encoding.UTF8.GetString(byteArray, pinyinStartAddr, pinyinLength);
                string word   = Encoding.Unicode.GetString(byteArray, wordStartAddr, wordLength);
                sb.Append(word + "\t" + pinyin + "\n");
                Debug.WriteLine(word + "\t" + pinyin);
                CurrentStatus++;
                // step up
                idx += 0xa;
            }
            return(sb.ToString());
        }
Exemplo n.º 27
0
 private void DisplayPreparedPackageString()
 {
     TxtPreNextPackage.Text = BinFileHelper.GetPreparedString();
 }
 public void Parse(FileStream fs)
 {
     Offset      = BinFileHelper.ReadInt32(fs);
     Count       = BinFileHelper.ReadInt32(fs);
     EndPosition = fs.Position;
 }