示例#1
0
            /// <summary>
            /// Add Pinyin to DB's index/instance tables.
            /// </summary>
            private void indexPinyin(CedictEntry entry, int entryId)
            {
                // Count only one occurrence
                List <PinyinSyllable> uniqueList = new List <PinyinSyllable>();

                foreach (PinyinSyllable ps in entry.Pinyin)
                {
                    // Normalize to lower case
                    PinyinSyllable normps = new PinyinSyllable(ps.Text.ToLowerInvariant(), ps.Tone);
                    // Add one instance
                    bool onList = false;
                    foreach (PinyinSyllable x in uniqueList)
                    {
                        if (x.Text == normps.Text && x.Tone == normps.Tone)
                        {
                            onList = true; break;
                        }
                    }
                    if (!onList)
                    {
                        uniqueList.Add(normps);
                    }
                }
                // Index each item we have on unique list
                cmdInsPinyinInstance.Parameters["@syll_count"].Value = uniqueList.Count;
                cmdInsPinyinInstance.Parameters["@blob_id"].Value    = entryId;
                foreach (PinyinSyllable ps in uniqueList)
                {
                    int hash = CedictEntry.Hash(ps.Text);
                    cmdInsPinyinInstance.Parameters["@pinyin_hash"].Value = hash;
                    cmdInsPinyinInstance.Parameters["@tone"].Value        = ps.Tone;
                    cmdInsPinyinInstance.ExecuteNonQuery();
                }
            }
示例#2
0
 protected int storeEntry(string simp, string head, string trg, int binId)
 {
     cmdInsEntry.Parameters["@hw"].Value        = head;
     cmdInsEntry.Parameters["@trg"].Value       = trg;
     cmdInsEntry.Parameters["@simp_hash"].Value = CedictEntry.Hash(simp);
     cmdInsEntry.Parameters["@status"].Value    = 0;
     cmdInsEntry.Parameters["@deleted"].Value   = 0;
     cmdInsEntry.Parameters["@bin_id"].Value    = binId;
     cmdInsEntry.ExecuteNonQuery();
     return((int)cmdInsEntry.LastInsertedId);
 }
示例#3
0
        /// <summary>
        /// See <see cref="IHeadwordInfo.GetEntries"/>.
        /// </summary>
        public void GetEntries(string simp, out CedictEntry[] ced, out CedictEntry[] hdd)
        {
            List <CedictEntry> cedList = new List <CedictEntry>();
            List <CedictEntry> hddList = new List <CedictEntry>();
            int hash = CedictEntry.Hash(simp);
            // Do we have this hash?
            HashChainPointer hcp = new HashChainPointer(hash);
            int pos = Array.BinarySearch(hashPtrs, hcp, new HashComp());

            using (BinReader br = new BinReader(dataFileName))
            {
                // CEDICT entries
                if (pos >= 0 && hashPtrs[pos].CedictPos != 0)
                {
                    int binPos = hashPtrs[pos].CedictPos;
                    while (binPos != 0)
                    {
                        br.Position = binPos;
                        // Next in chain
                        binPos = br.ReadInt();
                        // Entry
                        CedictEntry entry = new CedictEntry(br);
                        // Only keep if simplified really is identical
                        // Could be a hash collision
                        if (entry.ChSimpl == simp)
                        {
                            cedList.Add(entry);
                        }
                    }
                }
                // HanDeDict entries
                if (pos >= 0 && hashPtrs[pos].HanDeDictPos != 0)
                {
                    int binPos = hashPtrs[pos].HanDeDictPos;
                    while (binPos != 0)
                    {
                        br.Position = binPos;
                        // Next in chain
                        binPos = br.ReadInt();
                        // Entry
                        CedictEntry entry = new CedictEntry(br);
                        // Only keep if simplified really is identical
                        // Could be a hash collision
                        if (entry.ChSimpl == simp)
                        {
                            hddList.Add(entry);
                        }
                    }
                }
            }
            // Our results
            ced = cedList.ToArray();
            hdd = hddList.ToArray();
        }
示例#4
0
        private void dictLine(string line, bool cedict, BinWriter bw)
        {
            if (line == "" || line.StartsWith("#"))
            {
                return;
            }
            // Parse entry
            CedictEntry entry = parser.ParseEntry(line, 0, null);

            // Verify that simp, trad and pinyin are equal length
            if (entry != null)
            {
                if (entry.ChSimpl.Length != entry.ChTrad.Length || entry.ChSimpl.Length != entry.PinyinCount)
                {
                    entry = null;
                }
            }
            // Just count if failed to parse
            if (entry == null)
            {
                if (cedict)
                {
                    ++cedictDropped;
                }
                else
                {
                    ++hddDropped;
                }
                return;
            }
            // Serialize
            int fpos = bw.Position;

            // First: hash chain: next entry in file with same hash. Will fill later.
            bw.WriteInt(0);
            // Then, entry itself
            entry.Serialize(bw);
            // Hash simplified and remember file position
            int        hash = CedictEntry.Hash(entry.ChSimpl);
            List <int> poss;
            Dictionary <int, List <int> > hashPoss = cedict ? cedictHashPoss : hddHashPoss;

            if (!hashPoss.ContainsKey(hash))
            {
                poss           = new List <int>();
                hashPoss[hash] = poss;
            }
            else
            {
                poss = hashPoss[hash];
            }
            poss.Add(fpos);
        }
示例#5
0
        /// <summary>
        /// See <see cref="ZD.Common.IHeadwordInfo.GetPossibleHeadwords"/>.
        /// </summary>
        public HeadwordSyll[][] GetPossibleHeadwords(string simp, bool unihanFilter)
        {
            int hash = CedictEntry.Hash(simp);
            // Do we have this hash?
            HashChainPointer hcp = new HashChainPointer(hash);
            int pos = Array.BinarySearch(hashPtrs, hcp, new HashComp());

            if (pos < 0 || hashPtrs[pos].CedictPos == 0)
            {
                return(new HeadwordSyll[0][]);
            }
            // Yes! Read all entries with this hash from chain; keep those where simplified really matches.
            List <HeadwordSyll[]> cdHeads = new List <HeadwordSyll[]>();

            using (BinReader br = new BinReader(dataFileName))
            {
                int binPos = hashPtrs[pos].CedictPos;
                while (binPos != 0)
                {
                    br.Position = binPos;
                    // Next in chain
                    binPos = br.ReadInt();
                    // Entry
                    CedictEntry entry = new CedictEntry(br);
                    // Only keep if simplified really is identical
                    // Could be a hash collision
                    if (entry.ChSimpl == simp)
                    {
                        addHeadIfNew(cdHeads, entry, unihanFilter);
                    }
                }
            }
            if (cdHeads.Count == 0)
            {
                return(new HeadwordSyll[0][]);
            }
            return(cdHeads.ToArray());
        }
示例#6
0
            private Dictionary <string, HashSet <int> > getPinyinCandidates(List <PinyinSyllable> sylls)
            {
                // Prepare
                Dictionary <string, HashSet <int> > res        = new Dictionary <string, HashSet <int> >();
                Dictionary <int, string>            hashToText = new Dictionary <int, string>();

                // Build custom, single query to get *all* instance that
                // are relevant for any requested syllable
                // Also initialize result dictionary with pinyin keys
                StringBuilder sb = new StringBuilder();

                sb.Append("SELECT pinyin_hash, tone, syll_count, blob_id FROM pinyin_instances WHERE");
                bool first = true;

                foreach (PinyinSyllable syll in sylls)
                {
                    // Init dictionary
                    string key = syll.Tone == -1 ? syll.Text : syll.GetDisplayString(false);
                    res[key] = new HashSet <int>();
                    hashToText[CedictEntry.Hash(syll.Text)] = syll.Text;
                    // Build our custom query
                    if (!first)
                    {
                        sb.Append(" OR");
                    }
                    else
                    {
                        first = false;
                    }
                    sb.Append(" (pinyin_hash=");
                    sb.Append(CedictEntry.Hash(syll.Text).ToString());
                    if (syll.Tone != -1)
                    {
                        sb.Append(" AND tone=");
                        sb.Append(syll.Tone.ToString());
                    }
                    sb.Append(")");
                }
                sb.Append(";");
                // Compile and execute SQL command
                using (MySqlCommand cmd = new MySqlCommand(sb.ToString(), conn))
                    using (MySqlDataReader rdr = cmd.ExecuteReader())
                    {
                        while (rdr.Read())
                        {
                            // Which query syllable is this for?
                            // With or without tone mark.
                            HashSet <int> cands = null;
                            string        text  = hashToText[rdr.GetInt32(0)];
                            if (res.ContainsKey(text))
                            {
                                cands = res[text];
                            }
                            else
                            {
                                text += rdr.GetInt32(1).ToString();
                                cands = res[text];
                            }
                            // Store blob ID
                            cands.Add(rdr.GetInt32(3));
                        }
                    }

                // Done
                return(res);
            }