Пример #1
0
        private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition)
        {
            WordLibrary wl           = new WordLibrary();
            var         magic        = BinFileHelper.ReadInt32(fs);
            var         hanzi_offset = BinFileHelper.ReadInt16(fs);

            wl.Rank = fs.ReadByte();
            var x6           = fs.ReadByte();               //不知道干啥的
            var unknown8     = BinFileHelper.ReadInt64(fs); //新增的,不知道什么意思
            var pyBytesLen   = hanzi_offset - 18;
            var pyBytes      = BinFileHelper.ReadArray(fs, pyBytesLen);
            var pyStr        = Encoding.Unicode.GetString(pyBytes);
            var split        = BinFileHelper.ReadInt16(fs);              //00 00 分割拼音和汉字
            var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00
            var wordBytes    = BinFileHelper.ReadArray(fs, wordBytesLen);

            BinFileHelper.ReadInt16(fs); //00 00分割
            var word = Encoding.Unicode.GetString(wordBytes);

            wl.Word = word;
            try
            {
                wl.SetPinyinString(pyStr);
                wl.CodeType = CodeType.Pinyin;
            }
            catch
            {
                wl.CodeType = CodeType.NoCode;
                ImportLineErrorNotice?.Invoke(wl.Word + " 的编码缺失");
            }

            return(wl);
        }
Пример #2
0
        private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition)
        {
            WordLibrary wl           = new WordLibrary();
            var         magic        = BinFileHelper.ReadInt32(fs);
            var         hanzi_offset = BinFileHelper.ReadInt16(fs);

            wl.Rank = fs.ReadByte();
            var x6           = fs.ReadByte();               //不知道干啥的
            var unknown8     = BinFileHelper.ReadInt64(fs); //新增的,不知道什么意思
            var pyBytesLen   = hanzi_offset - 18;
            var pyBytes      = BinFileHelper.ReadArray(fs, pyBytesLen);
            var wubiStr      = Encoding.Unicode.GetString(pyBytes);
            var split        = BinFileHelper.ReadInt16(fs);              //00 00 分割拼音和汉字
            var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00
            var wordBytes    = BinFileHelper.ReadArray(fs, wordBytesLen);

            BinFileHelper.ReadInt16(fs); //00 00分割
            var word = Encoding.Unicode.GetString(wordBytes);

            wl.Word = word;
            try
            {
                wl.SetCode(CodeType.Wubi98, wubiStr);
            }
            catch
            {
                return(null);
            }
            wl.CodeType = CodeType.Wubi98;
            return(wl);
        }
Пример #3
0
        /// <summary>
        /// 读取一个词汇的词和解释
        /// </summary>
        /// <param name="inflatedBytes"></param>
        /// <param name="offsetWords"></param>
        /// <param name="offsetXml"></param>
        /// <param name="dataLen"></param>
        /// <param name="wordStringDecoder"></param>
        /// <param name="xmlStringDecoder"></param>
        /// <param name="i"></param>
        /// <returns></returns>
        private InternalWord ReadDefinitionData(byte[] inflatedBytes, int offsetWords,
                                                int offsetXml, int dataLen, Encoding wordStringDecoder,
                                                Encoding xmlStringDecoder, int i)
        {
            var idxData = new int[6];

            GetIdxData(inflatedBytes, dataLen * i, idxData);
            int lastWordPos       = idxData[0];
            int lastXmlPos        = idxData[1];
            int flags             = idxData[2];
            int refs              = idxData[3]; //这个词有多少种解释
            int currentWordOffset = idxData[4]; //词的Offset位置
            int currenXmlOffset   = idxData[5]; //解释XML的Offset位置

            InternalWord word = new InternalWord();

            string xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos);

            if (!string.IsNullOrEmpty(xml))
            {
                word.Descriptions.Add(currenXmlOffset, xml);
            }
            while (refs-- > 0)
            {
                int position = (offsetWords + lastWordPos);
                int ref1     = BitConverter.ToInt32(inflatedBytes, position);
                GetIdxData(inflatedBytes, dataLen * ref1, idxData);
                lastXmlPos      = idxData[1];
                currenXmlOffset = idxData[5];
                //if (string.IsNullOrEmpty(xml))
                //{
                xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos);
                word.Descriptions.Add(currenXmlOffset, xml);
                //}
                //else
                //{
                //    xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos);
                //}
                lastWordPos += 4;
            }
            //defData[1] = xml;

            int position1 = offsetWords + lastWordPos;

            byte[] w = BinFileHelper.ReadArray(inflatedBytes, position1, currentWordOffset - lastWordPos);
            word.Word = wordStringDecoder.GetString(w);
            //if (word == "buy" || word == "bought")
            //{
            //    Debug.Write("Refs:" + currenXmlOffset);
            //}
            //defData[0] = word;
            //return new KeyValuePair<string, string>(word, xml);
            return(word);
        }
Пример #4
0
        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x00;
            var headerstr = "Freeime Dictionary";
            var header    = Encoding.ASCII.GetString(BinFileHelper.ReadArray(fs, headerstr.Length));

            Debug.Assert(header.Equals(headerstr));

            DictCodeType curType;

            fs.Position = 0x23;
            var headerTypeBytes = BinFileHelper.ReadArray(fs, 4);
            var headerTypeStr   = Encoding.Unicode.GetString(headerTypeBytes);

            if (headerTypeStr.Equals("拼音"))
            {
                curType = DictCodeType.Pinyin;
            }
            else if (headerTypeStr.Equals("五笔"))
            {
                curType = DictCodeType.Wubi98;
            }
            else
            {
                throw new NotImplementedException("未知词库,请在反馈中提交文件");
            }

            var phrase_start = 0x1B620; // 'a'词条所在

            fs.Position = phrase_start;
            while (true)
            {
                var wl = ReadOnePhrase(fs, curType);
                if (wl != null)
                {
                    pyAndWord.Add(wl);
                }

                if (fs.Length == fs.Position) //文件结束
                {
                    fs.Close();
                    break;
                }
            }
            return(pyAndWord);
        }
Пример #5
0
        /// <summary>
        /// 读取一个词汇的词和解释
        /// </summary>
        /// <param name="inflatedBytes"></param>
        /// <param name="offsetWords"></param>
        /// <param name="offsetXml"></param>
        /// <param name="dataLen"></param>
        /// <param name="wordStringDecoder"></param>
        /// <param name="xmlStringDecoder"></param>
        /// <param name="i"></param>
        /// <returns>Key为词汇,Value为解释</returns>
        private KeyValuePair <string, string> ReadDefinitionData(byte[] inflatedBytes, int offsetWords,
                                                                 int offsetXml, int dataLen, Encoding wordStringDecoder,
                                                                 Encoding xmlStringDecoder, int i)
        {
            var idxData = new int[6];

            GetIdxData(inflatedBytes, dataLen * i, idxData);
            int lastWordPos       = idxData[0];
            int lastXmlPos        = idxData[1];
            int flags             = idxData[2];
            int refs              = idxData[3];
            int currentWordOffset = idxData[4];
            int currenXmlOffset   = idxData[5];


            string xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos);

            while (refs-- > 0)
            {
                int position = (offsetWords + lastWordPos);
                int ref1     = BitConverter.ToInt32(inflatedBytes, position);
                GetIdxData(inflatedBytes, dataLen * ref1, idxData);
                lastXmlPos      = idxData[1];
                currenXmlOffset = idxData[5];
                if (string.IsNullOrEmpty(xml))
                {
                    xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos);
                }
                else
                {
                    xml =
                        xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos) +
                        ", " + xml;
                }
                lastWordPos += 4;
            }
            //defData[1] = xml;

            int position1 = offsetWords + lastWordPos;

            byte[] w    = BinFileHelper.ReadArray(inflatedBytes, position1, currentWordOffset - lastWordPos);
            string word = wordStringDecoder.GetString(w);

            //defData[0] = word;
            return(new KeyValuePair <string, string>(word, xml));
        }
Пример #6
0
        public IList <InternalWord> Parse(string ld2File)
        {
            using (var fs = new FileStream(ld2File, FileMode.Open, FileAccess.Read))
            {
                Debug.WriteLine("文件:" + ld2File);
                byte[] bs = BinFileHelper.ReadArray(fs, 4);
                string v  = Encoding.ASCII.GetString(bs);
                Debug.WriteLine("类型:" + v);
                fs.Position = 0x18;
                Debug.WriteLine("版本:" + BinFileHelper.ReadInt16(fs) + "." + BinFileHelper.ReadInt16(fs));
                Debug.WriteLine("ID: 0x" + (BinFileHelper.ReadInt64(fs).ToString("x")));

                fs.Position = 0x5c;
                int offsetData = BinFileHelper.ReadInt32(fs) + 0x60;
                if (fs.Length > offsetData)
                {
                    Debug.WriteLine("简介地址:0x" + (offsetData).ToString("x"));
                    fs.Position = offsetData;
                    int type = BinFileHelper.ReadInt32(fs);
                    Debug.WriteLine("简介类型:0x" + (type).ToString("x"));
                    fs.Position = offsetData + 4;
                    int offsetWithInfo = BinFileHelper.ReadInt32(fs) + offsetData + 12;
                    if (type == 3)
                    {
                        // without additional information
                        return(ReadDictionary(fs, offsetData));
                    }
                    else if (fs.Length > offsetWithInfo - 0x1C)
                    {
                        return(ReadDictionary(fs, offsetWithInfo));
                    }
                    else
                    {
                        Debug.WriteLine("文件不包含字典数据。网上字典?");
                    }
                }
                else
                {
                    Debug.WriteLine("文件不包含字典数据。网上字典?");
                }


                return(null);
            }
        }
Пример #7
0
        private WordLibrary ReadOnePhrase(FileStream fs, DictCodeType type)
        {
            WordLibrary wl           = new WordLibrary();
            var         codeBytesLen = fs.ReadByte();
            var         wordBytesLen = fs.ReadByte();
            var         split        = fs.ReadByte();

            // 0x64对应正常词组(包含中英混拼,如"阿Q")。
            Debug.Assert(split.Equals(0x64) || split.Equals(0x32) ||
                         split.Equals(0x10) || split.Equals(0x66) ||
                         split.Equals(0x67)); // 0x67: "$X[计算器]calc"
            var codeBytes = BinFileHelper.ReadArray(fs, codeBytesLen);
            var codeStr   = Encoding.ASCII.GetString(codeBytes);

            var wordBytes = BinFileHelper.ReadArray(fs, wordBytesLen);
            var word      = Encoding.Unicode.GetString(wordBytes);

            if (split.Equals(0x32))          // 如“醃(腌)”,后者是相应简化字?
            {
                word = word.Substring(0, 1); // 暂定只取首字
            }
            Debug.Assert(word.IndexOf("(") < 0);
            wl.Word = word;
            try
            {
                if (type == DictCodeType.Pinyin)
                {
                    wl.CodeType = CodeType.Pinyin;
                    wl.SetPinyinString(codeStr);
                }
                else if (type == DictCodeType.Wubi98)
                {
                    wl.CodeType = CodeType.Wubi98;
                    wl.SetCode(CodeType.Wubi98, codeStr);
                }
            }
            catch
            {
                wl.CodeType = CodeType.NoCode;
                ImportLineErrorNotice?.Invoke(wl.Word + " 的编码缺失");
            }
            return(wl);
        }
Пример #8
0
        private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition)
        {
            WordLibrary wl           = new WordLibrary();
            var         magic        = BinFileHelper.ReadInt32(fs);
            var         hanzi_offset = BinFileHelper.ReadInt16(fs);

            wl.Rank = fs.ReadByte();
            var x6           = fs.ReadByte();//不知道干啥的
            var pyBytesLen   = hanzi_offset - 10;
            var pyBytes      = BinFileHelper.ReadArray(fs, pyBytesLen);
            var pyStr        = Encoding.Unicode.GetString(pyBytes);
            var split        = BinFileHelper.ReadInt16(fs);              //00 00 分割拼音和汉字
            var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00
            var wordBytes    = BinFileHelper.ReadArray(fs, wordBytesLen);

            BinFileHelper.ReadInt16(fs);//00 00分割
            var word = Encoding.Unicode.GetString(wordBytes);

            wl.Word = word;
            wl.SetPinyinString(pyStr);
            wl.CodeType = CodeType.Pinyin;
            return(wl);
        }