public override string Query(string word) { // Log($"-- Query : '{word}'"); var idx = findInLevel1(word); if (idx < 0) { return(string.Empty); } const int bufSize = 32768; using var fs = new FileStream(this.DictFileName, FileMode.Open, FileAccess.Read, FileShare.Read, bufSize); var w = word; if (!DictHeader.KeyCaseSensitiveBool) { w = word.ToUpperInvariant(); } var kw1 = _seg1.IndexList[idx]; var l2 = readLevel2Block(idx, kw1, fs); KwIndex2 result = null; // = l2.Find(kw2 => kw2.Keyword == word); var size = -1; for (var x = 0; x < l2.Count; x++) { if (l2[x].Keyword.ToUpperInvariant() != w) { continue; } result = l2[x]; if (x < l2.Count - 1) { var nextOne = l2[x + 1]; size = (int)(nextOne.RelOffsetUL - result.RelOffsetUL); } break; } if (result == null) { return(string.Empty); } Console.WriteLine($"<!-- kw1 block #{idx}: {kw1} -->"); Console.WriteLine($"<!-- kw2 found: {result} @ kw1 block #{idx} -->"); Console.WriteLine($"<!-- unzipped offset: 0x{result.RelOffsetUL:X8}|{result.RelOffsetUL} -->"); foreach (var ci in DictLargeContentIndexTable.Indexes) { if (result.RelOffsetUL > ci.OffsetUncomp + ci.UncompressedSize) { continue; } Console.WriteLine( $"<!-- ctt: block found ({ci}) | Seg2ContentBlockOffset = 0x{_seg2.Seg2ContentBlockOffset:X8}, ContentIndexTableEndPos = 0x{ContentIndexTableEndPos:X8} -->"); var block = seekAndLoadCttBlock(fs, ci); var ofs = (int)(result.RelOffsetUL - ci.OffsetUncomp); var data = new byte[size > 0 ? size : block.Length - ofs]; Array.Copy(block, ofs, data, 0, data.Length); Console.WriteLine( $"<!-- ctt entry '{word}': ofs = 0x{ofs:X8}/{ofs}, len = 0x{data.Length:X8}/{data.Length} -->"); var html = DictHeader.LanguageMode.GetString(data, 0, data.Length); html = Normalize(html); return(html); } return(string.Empty); }
public virtual string LoadContentByKeyword(KwIndex2 kwi2) { throw new NotImplementedException(); }
private KwIndex2List readLevel2Block(int idx, KwIndex1 kw1, Stream fs) { var pos = Seg2BeginPos; // ulong unzipped = 0; for (var kwiIdx = 0; kwiIdx < idx; kwiIdx++) { var kwi = _seg1.IndexList[kwiIdx]; pos += kwi.CompressedSize; // unzipped += kwi.UncompressedSize; // Console.WriteLine( // $"seg 1, block {kwiIdx}, end pos = {pos}/0x{pos:X8} | unzipped offset: {unzipped} | kwi = {kwi}"); } // Console.WriteLine($"seg 1, block {idx}, start pos = {pos}/0x{pos:X8} | kwi = {kw1}"); var list2 = new KwIndex2List(); fs.Seek((long)pos, SeekOrigin.Begin); var magicNum = readUInt32(fs); var j2 = readUInt32(fs); var rawData = new byte[kw1.CompressedSize - 8]; fs.Read(rawData, 0, rawData.Length); if (magicNum == 0x02000000) { #region InflaterDecompress try { // var txt = CompressUtil.InflaterDecompress(rawData, 0, rawData.Length, false); // TODO need review and debug var txt = Zipper.InflateBufferWithPureZlib(rawData, rawData.Length); #region log & trying to parse 2nd level indexes var ofs = 0; while (ofs < txt.Length) { var kwi2 = new KwIndex2 { RelOffsetUL = readUInt64(txt, ofs) }; ofs += 8; var ofs0 = ofs; if (Equals(DictHeader.LanguageMode, Encoding.Unicode)) { uint x9 = 1; while (x9 != 0) { x9 = readUInt16(txt, ofs); ofs += 2; } } else { while (txt[ofs] != 0) { ofs++; } ofs++; } kwi2.Keyword = DictHeader.LanguageMode.GetString(txt, ofs0, ofs - ofs0).TrimEnd('\0'); list2.Add(kwi2); } #endregion } catch (Exception ex) { ErrorLog(ex.ToString()); throw; } #endregion } else if (magicNum == 0x01000000) { #region LZO.Decompress (V2.0 & V1.2) #region v1.2: 提取二级索引表 var ofs = 0; var cZipped = rawData[ofs]; //预览一个word var zipped = cZipped != (byte)0; //如果头一个byte为0,则实际上该块未压缩。 byte[] decompressedData; if (!zipped) { decompressedData = rawData; } else { #region TODO: unzip decompressedData = new byte[kw1.UncompressedSize]; var ok = false; // var in_len = rawData.Length - 3; // int out_len = BitConverter.ToInt16(rawData, 1); // if (!ok) { try { Zipper.MiniLzoDecompress(rawData, 0, (int)kw1.CompressedSize, decompressedData); ok = decompressedData.Length == (int)kw1.UncompressedSize; // Debug.Assert(ok, "M-dict 1.2 lzo decompressed failed."); } catch (Exception ex) { Debug.WriteLine(ex.ToString()); } } if (!ok) { ErrorLog("BAD, 1.1"); } #endregion } #region 解释二级索引表 while (ofs < (int)kw1.UncompressedSize) { var kwi2 = new KwIndex2 { RelOffsetUL = readUInt32(decompressedData, ofs) }; ofs += 4; // if (x1 < 0) { Debug.WriteLine(">>> ???"); } var ofs0 = ofs; if (Equals(DictHeader.LanguageMode, Encoding.Unicode)) { uint x9 = 1; while (x9 != 0) { x9 = readUInt16(decompressedData, ofs); ofs += 2; } } else { while (decompressedData[ofs] != 0) { ofs++; } ofs++; } kwi2.Keyword = DictHeader.LanguageMode.GetString(decompressedData, ofs0, ofs - ofs0) .TrimEnd('\0'); list2.Add(kwi2); } #endregion #endregion #endregion } else if (magicNum == 0) { #region log & trying to parse 2nd level indexes var ofs = 0; while (ofs < rawData.Length) { var kwi2 = new KwIndex2 { RelOffsetUL = readUInt32(rawData, ofs) }; ofs += 4; var ofs0 = ofs; if (Equals(DictHeader.LanguageMode, Encoding.Unicode)) { uint x9 = 1; while (x9 != 0) { x9 = readUInt16(rawData, ofs); ofs += 2; } } else { while (rawData[ofs] != 0) { ofs++; } ofs++; } kwi2.Keyword = DictHeader.LanguageMode.GetString(rawData, ofs0, ofs - ofs0).TrimEnd('\0'); list2.Add(kwi2); Log($" > {kwi2}"); } #endregion } else { throw new Exception( $"提取KWIndex2[]时,期望正确的算法标志0x2000000/0x1000000,然而遇到了{magicNum}/0x{magicNum:X}"); } return(list2); }
public virtual byte[] LoadContentBytesByKeyword(KwIndex2 kwi2) { throw new NotImplementedException(); }