private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition) { WordLibrary wl = new WordLibrary(); var magic = BinFileHelper.ReadInt32(fs); var hanzi_offset = BinFileHelper.ReadInt16(fs); wl.Rank = fs.ReadByte(); var x6 = fs.ReadByte(); //不知道干啥的 var unknown8 = BinFileHelper.ReadInt64(fs); //新增的,不知道什么意思 var pyBytesLen = hanzi_offset - 18; var pyBytes = BinFileHelper.ReadArray(fs, pyBytesLen); var pyStr = Encoding.Unicode.GetString(pyBytes); var split = BinFileHelper.ReadInt16(fs); //00 00 分割拼音和汉字 var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00 var wordBytes = BinFileHelper.ReadArray(fs, wordBytesLen); BinFileHelper.ReadInt16(fs); //00 00分割 var word = Encoding.Unicode.GetString(wordBytes); wl.Word = word; try { wl.SetPinyinString(pyStr); wl.CodeType = CodeType.Pinyin; } catch { wl.CodeType = CodeType.NoCode; ImportLineErrorNotice?.Invoke(wl.Word + " 的编码缺失"); } return(wl); }
private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition) { WordLibrary wl = new WordLibrary(); var magic = BinFileHelper.ReadInt32(fs); var hanzi_offset = BinFileHelper.ReadInt16(fs); wl.Rank = fs.ReadByte(); var x6 = fs.ReadByte(); //不知道干啥的 var unknown8 = BinFileHelper.ReadInt64(fs); //新增的,不知道什么意思 var pyBytesLen = hanzi_offset - 18; var pyBytes = BinFileHelper.ReadArray(fs, pyBytesLen); var wubiStr = Encoding.Unicode.GetString(pyBytes); var split = BinFileHelper.ReadInt16(fs); //00 00 分割拼音和汉字 var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00 var wordBytes = BinFileHelper.ReadArray(fs, wordBytesLen); BinFileHelper.ReadInt16(fs); //00 00分割 var word = Encoding.Unicode.GetString(wordBytes); wl.Word = word; try { wl.SetCode(CodeType.Wubi98, wubiStr); } catch { return(null); } wl.CodeType = CodeType.Wubi98; return(wl); }
/// <summary> /// 读取一个词汇的词和解释 /// </summary> /// <param name="inflatedBytes"></param> /// <param name="offsetWords"></param> /// <param name="offsetXml"></param> /// <param name="dataLen"></param> /// <param name="wordStringDecoder"></param> /// <param name="xmlStringDecoder"></param> /// <param name="i"></param> /// <returns></returns> private InternalWord ReadDefinitionData(byte[] inflatedBytes, int offsetWords, int offsetXml, int dataLen, Encoding wordStringDecoder, Encoding xmlStringDecoder, int i) { var idxData = new int[6]; GetIdxData(inflatedBytes, dataLen * i, idxData); int lastWordPos = idxData[0]; int lastXmlPos = idxData[1]; int flags = idxData[2]; int refs = idxData[3]; //这个词有多少种解释 int currentWordOffset = idxData[4]; //词的Offset位置 int currenXmlOffset = idxData[5]; //解释XML的Offset位置 InternalWord word = new InternalWord(); string xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos); if (!string.IsNullOrEmpty(xml)) { word.Descriptions.Add(currenXmlOffset, xml); } while (refs-- > 0) { int position = (offsetWords + lastWordPos); int ref1 = BitConverter.ToInt32(inflatedBytes, position); GetIdxData(inflatedBytes, dataLen * ref1, idxData); lastXmlPos = idxData[1]; currenXmlOffset = idxData[5]; //if (string.IsNullOrEmpty(xml)) //{ xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos); word.Descriptions.Add(currenXmlOffset, xml); //} //else //{ // xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos); //} lastWordPos += 4; } //defData[1] = xml; int position1 = offsetWords + lastWordPos; byte[] w = BinFileHelper.ReadArray(inflatedBytes, position1, currentWordOffset - lastWordPos); word.Word = wordStringDecoder.GetString(w); //if (word == "buy" || word == "bought") //{ // Debug.Write("Refs:" + currenXmlOffset); //} //defData[0] = word; //return new KeyValuePair<string, string>(word, xml); return(word); }
public WordLibraryList Import(string path) { var pyAndWord = new WordLibraryList(); var fs = new FileStream(path, FileMode.Open, FileAccess.Read); fs.Position = 0x00; var headerstr = "Freeime Dictionary"; var header = Encoding.ASCII.GetString(BinFileHelper.ReadArray(fs, headerstr.Length)); Debug.Assert(header.Equals(headerstr)); DictCodeType curType; fs.Position = 0x23; var headerTypeBytes = BinFileHelper.ReadArray(fs, 4); var headerTypeStr = Encoding.Unicode.GetString(headerTypeBytes); if (headerTypeStr.Equals("拼音")) { curType = DictCodeType.Pinyin; } else if (headerTypeStr.Equals("五笔")) { curType = DictCodeType.Wubi98; } else { throw new NotImplementedException("未知词库,请在反馈中提交文件"); } var phrase_start = 0x1B620; // 'a'词条所在 fs.Position = phrase_start; while (true) { var wl = ReadOnePhrase(fs, curType); if (wl != null) { pyAndWord.Add(wl); } if (fs.Length == fs.Position) //文件结束 { fs.Close(); break; } } return(pyAndWord); }
/// <summary> /// 读取一个词汇的词和解释 /// </summary> /// <param name="inflatedBytes"></param> /// <param name="offsetWords"></param> /// <param name="offsetXml"></param> /// <param name="dataLen"></param> /// <param name="wordStringDecoder"></param> /// <param name="xmlStringDecoder"></param> /// <param name="i"></param> /// <returns>Key为词汇,Value为解释</returns> private KeyValuePair <string, string> ReadDefinitionData(byte[] inflatedBytes, int offsetWords, int offsetXml, int dataLen, Encoding wordStringDecoder, Encoding xmlStringDecoder, int i) { var idxData = new int[6]; GetIdxData(inflatedBytes, dataLen * i, idxData); int lastWordPos = idxData[0]; int lastXmlPos = idxData[1]; int flags = idxData[2]; int refs = idxData[3]; int currentWordOffset = idxData[4]; int currenXmlOffset = idxData[5]; string xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos); while (refs-- > 0) { int position = (offsetWords + lastWordPos); int ref1 = BitConverter.ToInt32(inflatedBytes, position); GetIdxData(inflatedBytes, dataLen * ref1, idxData); lastXmlPos = idxData[1]; currenXmlOffset = idxData[5]; if (string.IsNullOrEmpty(xml)) { xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos); } else { xml = xmlStringDecoder.GetString(inflatedBytes, offsetXml + lastXmlPos, currenXmlOffset - lastXmlPos) + ", " + xml; } lastWordPos += 4; } //defData[1] = xml; int position1 = offsetWords + lastWordPos; byte[] w = BinFileHelper.ReadArray(inflatedBytes, position1, currentWordOffset - lastWordPos); string word = wordStringDecoder.GetString(w); //defData[0] = word; return(new KeyValuePair <string, string>(word, xml)); }
public IList <InternalWord> Parse(string ld2File) { using (var fs = new FileStream(ld2File, FileMode.Open, FileAccess.Read)) { Debug.WriteLine("文件:" + ld2File); byte[] bs = BinFileHelper.ReadArray(fs, 4); string v = Encoding.ASCII.GetString(bs); Debug.WriteLine("类型:" + v); fs.Position = 0x18; Debug.WriteLine("版本:" + BinFileHelper.ReadInt16(fs) + "." + BinFileHelper.ReadInt16(fs)); Debug.WriteLine("ID: 0x" + (BinFileHelper.ReadInt64(fs).ToString("x"))); fs.Position = 0x5c; int offsetData = BinFileHelper.ReadInt32(fs) + 0x60; if (fs.Length > offsetData) { Debug.WriteLine("简介地址:0x" + (offsetData).ToString("x")); fs.Position = offsetData; int type = BinFileHelper.ReadInt32(fs); Debug.WriteLine("简介类型:0x" + (type).ToString("x")); fs.Position = offsetData + 4; int offsetWithInfo = BinFileHelper.ReadInt32(fs) + offsetData + 12; if (type == 3) { // without additional information return(ReadDictionary(fs, offsetData)); } else if (fs.Length > offsetWithInfo - 0x1C) { return(ReadDictionary(fs, offsetWithInfo)); } else { Debug.WriteLine("文件不包含字典数据。网上字典?"); } } else { Debug.WriteLine("文件不包含字典数据。网上字典?"); } return(null); } }
private WordLibrary ReadOnePhrase(FileStream fs, DictCodeType type) { WordLibrary wl = new WordLibrary(); var codeBytesLen = fs.ReadByte(); var wordBytesLen = fs.ReadByte(); var split = fs.ReadByte(); // 0x64对应正常词组(包含中英混拼,如"阿Q")。 Debug.Assert(split.Equals(0x64) || split.Equals(0x32) || split.Equals(0x10) || split.Equals(0x66) || split.Equals(0x67)); // 0x67: "$X[计算器]calc" var codeBytes = BinFileHelper.ReadArray(fs, codeBytesLen); var codeStr = Encoding.ASCII.GetString(codeBytes); var wordBytes = BinFileHelper.ReadArray(fs, wordBytesLen); var word = Encoding.Unicode.GetString(wordBytes); if (split.Equals(0x32)) // 如“醃(腌)”,后者是相应简化字? { word = word.Substring(0, 1); // 暂定只取首字 } Debug.Assert(word.IndexOf("(") < 0); wl.Word = word; try { if (type == DictCodeType.Pinyin) { wl.CodeType = CodeType.Pinyin; wl.SetPinyinString(codeStr); } else if (type == DictCodeType.Wubi98) { wl.CodeType = CodeType.Wubi98; wl.SetCode(CodeType.Wubi98, codeStr); } } catch { wl.CodeType = CodeType.NoCode; ImportLineErrorNotice?.Invoke(wl.Word + " 的编码缺失"); } return(wl); }
private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition) { WordLibrary wl = new WordLibrary(); var magic = BinFileHelper.ReadInt32(fs); var hanzi_offset = BinFileHelper.ReadInt16(fs); wl.Rank = fs.ReadByte(); var x6 = fs.ReadByte();//不知道干啥的 var pyBytesLen = hanzi_offset - 10; var pyBytes = BinFileHelper.ReadArray(fs, pyBytesLen); var pyStr = Encoding.Unicode.GetString(pyBytes); var split = BinFileHelper.ReadInt16(fs); //00 00 分割拼音和汉字 var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00 var wordBytes = BinFileHelper.ReadArray(fs, wordBytesLen); BinFileHelper.ReadInt16(fs);//00 00分割 var word = Encoding.Unicode.GetString(wordBytes); wl.Word = word; wl.SetPinyinString(pyStr); wl.CodeType = CodeType.Pinyin; return(wl); }