Пример #1
0
 /// <summary>
 /// Ctor: init from compiled binary file.
 /// </summary>
 public HeadwordInfo(string dataFileName)
 {
     this.dataFileName = dataFileName;
     using (BinReader br = new BinReader(dataFileName))
     {
         // Start of dictionary index
         int dictStartPos = br.ReadInt();
         // Number of characters
         int chrCnt = br.ReadInt();
         // File pointer of each character's info
         for (int i = 0; i != chrCnt; ++i)
         {
             short chrVal  = br.ReadShort();
             char  chr     = (char)chrVal;
             int   filePos = br.ReadInt();
             chrPoss[(int)chr] = filePos;
         }
         // Read sorted list of hash chain pointers
         br.Position = dictStartPos;
         int hashCnt = br.ReadInt();
         hashPtrs = new HashChainPointer[hashCnt];
         for (int i = 0; i != hashCnt; ++i)
         {
             int hash = br.ReadInt();
             int cdp  = br.ReadInt();
             int hdp  = br.ReadInt();
             hashPtrs[i] = new HashChainPointer(hash, cdp, hdp);
         }
     }
 }
Пример #2
0
        /// <summary>
        /// See <see cref="IHeadwordInfo.GetEntries"/>.
        /// </summary>
        public void GetEntries(string simp, out CedictEntry[] ced, out CedictEntry[] hdd)
        {
            List <CedictEntry> cedList = new List <CedictEntry>();
            List <CedictEntry> hddList = new List <CedictEntry>();
            int hash = CedictEntry.Hash(simp);
            // Do we have this hash?
            HashChainPointer hcp = new HashChainPointer(hash);
            int pos = Array.BinarySearch(hashPtrs, hcp, new HashComp());

            using (BinReader br = new BinReader(dataFileName))
            {
                // CEDICT entries
                if (pos >= 0 && hashPtrs[pos].CedictPos != 0)
                {
                    int binPos = hashPtrs[pos].CedictPos;
                    while (binPos != 0)
                    {
                        br.Position = binPos;
                        // Next in chain
                        binPos = br.ReadInt();
                        // Entry
                        CedictEntry entry = new CedictEntry(br);
                        // Only keep if simplified really is identical
                        // Could be a hash collision
                        if (entry.ChSimpl == simp)
                        {
                            cedList.Add(entry);
                        }
                    }
                }
                // HanDeDict entries
                if (pos >= 0 && hashPtrs[pos].HanDeDictPos != 0)
                {
                    int binPos = hashPtrs[pos].HanDeDictPos;
                    while (binPos != 0)
                    {
                        br.Position = binPos;
                        // Next in chain
                        binPos = br.ReadInt();
                        // Entry
                        CedictEntry entry = new CedictEntry(br);
                        // Only keep if simplified really is identical
                        // Could be a hash collision
                        if (entry.ChSimpl == simp)
                        {
                            hddList.Add(entry);
                        }
                    }
                }
            }
            // Our results
            ced = cedList.ToArray();
            hdd = hddList.ToArray();
        }
Пример #3
0
        /// <summary>
        /// See <see cref="ZD.Common.IHeadwordInfo.GetPossibleHeadwords"/>.
        /// </summary>
        public HeadwordSyll[][] GetPossibleHeadwords(string simp, bool unihanFilter)
        {
            int hash = CedictEntry.Hash(simp);
            // Do we have this hash?
            HashChainPointer hcp = new HashChainPointer(hash);
            int pos = Array.BinarySearch(hashPtrs, hcp, new HashComp());

            if (pos < 0 || hashPtrs[pos].CedictPos == 0)
            {
                return(new HeadwordSyll[0][]);
            }
            // Yes! Read all entries with this hash from chain; keep those where simplified really matches.
            List <HeadwordSyll[]> cdHeads = new List <HeadwordSyll[]>();

            using (BinReader br = new BinReader(dataFileName))
            {
                int binPos = hashPtrs[pos].CedictPos;
                while (binPos != 0)
                {
                    br.Position = binPos;
                    // Next in chain
                    binPos = br.ReadInt();
                    // Entry
                    CedictEntry entry = new CedictEntry(br);
                    // Only keep if simplified really is identical
                    // Could be a hash collision
                    if (entry.ChSimpl == simp)
                    {
                        addHeadIfNew(cdHeads, entry, unihanFilter);
                    }
                }
            }
            if (cdHeads.Count == 0)
            {
                return(new HeadwordSyll[0][]);
            }
            return(cdHeads.ToArray());
        }