예제 #1
0
        public override string Query(string word)
        {
            // Log($"-- Query : '{word}'");

            var idx = findInLevel1(word);

            if (idx < 0)
            {
                return(string.Empty);
            }

            const int bufSize = 32768;

            using var fs = new FileStream(this.DictFileName, FileMode.Open, FileAccess.Read, FileShare.Read, bufSize);

            var w = word;

            if (!DictHeader.KeyCaseSensitiveBool)
            {
                w = word.ToUpperInvariant();
            }

            var      kw1    = _seg1.IndexList[idx];
            var      l2     = readLevel2Block(idx, kw1, fs);
            KwIndex2 result = null; // = l2.Find(kw2 => kw2.Keyword == word);
            var      size   = -1;

            for (var x = 0; x < l2.Count; x++)
            {
                if (l2[x].Keyword.ToUpperInvariant() != w)
                {
                    continue;
                }

                result = l2[x];
                if (x < l2.Count - 1)
                {
                    var nextOne = l2[x + 1];
                    size = (int)(nextOne.RelOffsetUL - result.RelOffsetUL);
                }

                break;
            }

            if (result == null)
            {
                return(string.Empty);
            }

            Console.WriteLine($"<!-- kw1 block #{idx}: {kw1} -->");
            Console.WriteLine($"<!-- kw2 found: {result} @ kw1 block #{idx} -->");

            Console.WriteLine($"<!-- unzipped offset: 0x{result.RelOffsetUL:X8}|{result.RelOffsetUL} -->");
            foreach (var ci in DictLargeContentIndexTable.Indexes)
            {
                if (result.RelOffsetUL > ci.OffsetUncomp + ci.UncompressedSize)
                {
                    continue;
                }

                Console.WriteLine(
                    $"<!-- ctt: block found ({ci}) | Seg2ContentBlockOffset = 0x{_seg2.Seg2ContentBlockOffset:X8}, ContentIndexTableEndPos = 0x{ContentIndexTableEndPos:X8} -->");

                var block = seekAndLoadCttBlock(fs, ci);
                var ofs   = (int)(result.RelOffsetUL - ci.OffsetUncomp);
                var data  = new byte[size > 0 ? size : block.Length - ofs];
                Array.Copy(block, ofs, data, 0, data.Length);
                Console.WriteLine(
                    $"<!-- ctt entry '{word}': ofs = 0x{ofs:X8}/{ofs}, len = 0x{data.Length:X8}/{data.Length} -->");

                var html = DictHeader.LanguageMode.GetString(data, 0, data.Length);
                html = Normalize(html);
                return(html);
            }

            return(string.Empty);
        }
예제 #2
0
 public virtual string LoadContentByKeyword(KwIndex2 kwi2)
 {
     throw new NotImplementedException();
 }
예제 #3
0
        private KwIndex2List readLevel2Block(int idx, KwIndex1 kw1, Stream fs)
        {
            var pos = Seg2BeginPos;

            // ulong unzipped = 0;
            for (var kwiIdx = 0; kwiIdx < idx; kwiIdx++)
            {
                var kwi = _seg1.IndexList[kwiIdx];
                pos += kwi.CompressedSize;
                // unzipped += kwi.UncompressedSize;
                // Console.WriteLine(
                // $"seg 1, block {kwiIdx}, end pos = {pos}/0x{pos:X8} | unzipped offset: {unzipped} | kwi = {kwi}");
            }

            // Console.WriteLine($"seg 1, block {idx}, start pos = {pos}/0x{pos:X8} | kwi = {kw1}");

            var list2 = new KwIndex2List();

            fs.Seek((long)pos, SeekOrigin.Begin);
            var magicNum = readUInt32(fs);
            var j2       = readUInt32(fs);
            var rawData  = new byte[kw1.CompressedSize - 8];

            fs.Read(rawData, 0, rawData.Length);

            if (magicNum == 0x02000000)
            {
                #region InflaterDecompress

                try
                {
                    // var txt = CompressUtil.InflaterDecompress(rawData, 0, rawData.Length, false);
                    // TODO need review and debug
                    var txt = Zipper.InflateBufferWithPureZlib(rawData, rawData.Length);

                    #region log & trying to parse 2nd level indexes

                    var ofs = 0;
                    while (ofs < txt.Length)
                    {
                        var kwi2 = new KwIndex2 {
                            RelOffsetUL = readUInt64(txt, ofs)
                        };
                        ofs += 8;
                        var ofs0 = ofs;
                        if (Equals(DictHeader.LanguageMode, Encoding.Unicode))
                        {
                            uint x9 = 1;
                            while (x9 != 0)
                            {
                                x9   = readUInt16(txt, ofs);
                                ofs += 2;
                            }
                        }
                        else
                        {
                            while (txt[ofs] != 0)
                            {
                                ofs++;
                            }
                            ofs++;
                        }

                        kwi2.Keyword = DictHeader.LanguageMode.GetString(txt, ofs0, ofs - ofs0).TrimEnd('\0');
                        list2.Add(kwi2);
                    }

                    #endregion
                }
                catch (Exception ex)
                {
                    ErrorLog(ex.ToString());
                    throw;
                }

                #endregion
            }
            else if (magicNum == 0x01000000)
            {
                #region LZO.Decompress (V2.0 & V1.2)

                #region v1.2: 提取二级索引表

                var    ofs     = 0;
                var    cZipped = rawData[ofs];       //预览一个word
                var    zipped  = cZipped != (byte)0; //如果头一个byte为0,则实际上该块未压缩。
                byte[] decompressedData;

                if (!zipped)
                {
                    decompressedData = rawData;
                }
                else
                {
                    #region TODO: unzip

                    decompressedData = new byte[kw1.UncompressedSize];
                    var ok = false;
                    // var in_len = rawData.Length - 3;
                    // int out_len = BitConverter.ToInt16(rawData, 1);

                    // if (!ok)
                    {
                        try
                        {
                            Zipper.MiniLzoDecompress(rawData, 0, (int)kw1.CompressedSize, decompressedData);
                            ok = decompressedData.Length == (int)kw1.UncompressedSize;
                            // Debug.Assert(ok, "M-dict 1.2 lzo decompressed failed.");
                        }
                        catch (Exception ex)
                        {
                            Debug.WriteLine(ex.ToString());
                        }
                    }

                    if (!ok)
                    {
                        ErrorLog("BAD, 1.1");
                    }

                    #endregion
                }

                #region 解释二级索引表

                while (ofs < (int)kw1.UncompressedSize)
                {
                    var kwi2 = new KwIndex2 {
                        RelOffsetUL = readUInt32(decompressedData, ofs)
                    };
                    ofs += 4; // if (x1 < 0) { Debug.WriteLine(">>> ???"); }
                    var ofs0 = ofs;
                    if (Equals(DictHeader.LanguageMode, Encoding.Unicode))
                    {
                        uint x9 = 1;
                        while (x9 != 0)
                        {
                            x9   = readUInt16(decompressedData, ofs);
                            ofs += 2;
                        }
                    }
                    else
                    {
                        while (decompressedData[ofs] != 0)
                        {
                            ofs++;
                        }
                        ofs++;
                    }

                    kwi2.Keyword = DictHeader.LanguageMode.GetString(decompressedData, ofs0, ofs - ofs0)
                                   .TrimEnd('\0');
                    list2.Add(kwi2);
                }

                #endregion

                #endregion

                #endregion
            }
            else if (magicNum == 0)
            {
                #region log & trying to parse 2nd level indexes

                var ofs = 0;
                while (ofs < rawData.Length)
                {
                    var kwi2 = new KwIndex2 {
                        RelOffsetUL = readUInt32(rawData, ofs)
                    };
                    ofs += 4;
                    var ofs0 = ofs;
                    if (Equals(DictHeader.LanguageMode, Encoding.Unicode))
                    {
                        uint x9 = 1;
                        while (x9 != 0)
                        {
                            x9   = readUInt16(rawData, ofs);
                            ofs += 2;
                        }
                    }
                    else
                    {
                        while (rawData[ofs] != 0)
                        {
                            ofs++;
                        }
                        ofs++;
                    }

                    kwi2.Keyword = DictHeader.LanguageMode.GetString(rawData, ofs0, ofs - ofs0).TrimEnd('\0');
                    list2.Add(kwi2);
                    Log($"    > {kwi2}");
                }

                #endregion
            }
            else
            {
                throw new Exception(
                          $"提取KWIndex2[]时,期望正确的算法标志0x2000000/0x1000000,然而遇到了{magicNum}/0x{magicNum:X}");
            }

            return(list2);
        }
예제 #4
0
 public virtual byte[] LoadContentBytesByKeyword(KwIndex2 kwi2)
 {
     throw new NotImplementedException();
 }