Пример #1
0
        private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition)
        {
            WordLibrary wl           = new WordLibrary();
            var         magic        = BinFileHelper.ReadInt32(fs);
            var         hanzi_offset = BinFileHelper.ReadInt16(fs);

            wl.Rank = fs.ReadByte();
            var x6           = fs.ReadByte();               //不知道干啥的
            var unknown8     = BinFileHelper.ReadInt64(fs); //新增的,不知道什么意思
            var pyBytesLen   = hanzi_offset - 18;
            var pyBytes      = BinFileHelper.ReadArray(fs, pyBytesLen);
            var pyStr        = Encoding.Unicode.GetString(pyBytes);
            var split        = BinFileHelper.ReadInt16(fs);              //00 00 分割拼音和汉字
            var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00
            var wordBytes    = BinFileHelper.ReadArray(fs, wordBytesLen);

            BinFileHelper.ReadInt16(fs); //00 00分割
            var word = Encoding.Unicode.GetString(wordBytes);

            wl.Word = word;
            try
            {
                wl.SetPinyinString(pyStr);
                wl.CodeType = CodeType.Pinyin;
            }
            catch
            {
                wl.CodeType = CodeType.NoCode;
                ImportLineErrorNotice?.Invoke(wl.Word + " 的编码缺失");
            }

            return(wl);
        }
Пример #2
0
        public WordLibraryList ImportText(string str)
        {
            var xmlDoc = new XmlDocument();

            xmlDoc.LoadXml(str);

            var         wlList = new WordLibraryList();
            XmlNodeList xns    = xmlDoc.SelectNodes("//plist/array/dict");

            CountWord = xns.Count;
            for (int i = 0; i < xns.Count; i++)
            {
                XmlNode xn    = xns[i];
                var     nodes = xn.SelectNodes("string");

                var wl = new WordLibrary();
                wl.Word = nodes[0].InnerText;
                wl.Rank = 1;
                wl.SetPinyinString(nodes[1].InnerText);
                CurrentStatus = i;
                wlList.Add(wl);
            }

            return(wlList);
        }
Пример #3
0
        private WordLibrary ReadOnePhrase(FileStream fs, DictCodeType type)
        {
            WordLibrary wl           = new WordLibrary();
            var         codeBytesLen = fs.ReadByte();
            var         wordBytesLen = fs.ReadByte();
            var         split        = fs.ReadByte();

            // 0x64对应正常词组(包含中英混拼,如"阿Q")。
            Debug.Assert(split.Equals(0x64) || split.Equals(0x32) ||
                         split.Equals(0x10) || split.Equals(0x66) ||
                         split.Equals(0x67)); // 0x67: "$X[计算器]calc"
            var codeBytes = BinFileHelper.ReadArray(fs, codeBytesLen);
            var codeStr   = Encoding.ASCII.GetString(codeBytes);

            var wordBytes = BinFileHelper.ReadArray(fs, wordBytesLen);
            var word      = Encoding.Unicode.GetString(wordBytes);

            if (split.Equals(0x32))          // 如“醃(腌)”,后者是相应简化字?
            {
                word = word.Substring(0, 1); // 暂定只取首字
            }
            Debug.Assert(word.IndexOf("(") < 0);
            wl.Word = word;
            try
            {
                if (type == DictCodeType.Pinyin)
                {
                    wl.CodeType = CodeType.Pinyin;
                    wl.SetPinyinString(codeStr);
                }
                else if (type == DictCodeType.Wubi98)
                {
                    wl.CodeType = CodeType.Wubi98;
                    wl.SetCode(CodeType.Wubi98, codeStr);
                }
            }
            catch
            {
                wl.CodeType = CodeType.NoCode;
                ImportLineErrorNotice?.Invoke(wl.Word + " 的编码缺失");
            }
            return(wl);
        }
Пример #4
0
        private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition)
        {
            WordLibrary wl           = new WordLibrary();
            var         magic        = BinFileHelper.ReadInt32(fs);
            var         hanzi_offset = BinFileHelper.ReadInt16(fs);

            wl.Rank = fs.ReadByte();
            var x6           = fs.ReadByte();//不知道干啥的
            var pyBytesLen   = hanzi_offset - 10;
            var pyBytes      = BinFileHelper.ReadArray(fs, pyBytesLen);
            var pyStr        = Encoding.Unicode.GetString(pyBytes);
            var split        = BinFileHelper.ReadInt16(fs);              //00 00 分割拼音和汉字
            var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00
            var wordBytes    = BinFileHelper.ReadArray(fs, wordBytesLen);

            BinFileHelper.ReadInt16(fs);//00 00分割
            var word = Encoding.Unicode.GetString(wordBytes);

            wl.Word = word;
            wl.SetPinyinString(pyStr);
            wl.CodeType = CodeType.Pinyin;
            return(wl);
        }