Beispiel #1
0
        /// <summary>
        /// Ctor: deserializes binary data.
        /// </summary>
        public Index(BinReader br)
        {
            WordHolder = new CedictEngine.WordHolder(br);
            SenseIndex = new Dictionary<int, SenseIndexItem>();
            int senseIndexKeyCount = br.ReadInt();
            for (int i = 0; i != senseIndexKeyCount; ++i)
            {
                int tokenId = br.ReadInt();
                SenseIndexItem sii = new SenseIndexItem(br);
                SenseIndex[tokenId] = sii;
            }

            IdeoIndex = new Dictionary<char, IdeoIndexItem>();
            PinyinIndex = new Dictionary<string, PinyinIndexItem>();

            int ideoIndexKeyCount = br.ReadInt();
            for (int i = 0; i != ideoIndexKeyCount; ++i)
            {
                char c = br.ReadChar();
                IdeoIndexItem iii = new IdeoIndexItem(br);
                IdeoIndex[c] = iii;
            }

            int pinyinIndexKeyCount = br.ReadInt();
            for (int i = 0; i != pinyinIndexKeyCount; ++i)
            {
                string str = br.ReadString();
                PinyinIndexItem pyi = new PinyinIndexItem(br);
                PinyinIndex[str] = pyi;
            }
        }
Beispiel #2
0
 /// <summary>
 /// Indexes one parsed Cedict entry (hanzi, pinyin and target-language indexes).
 /// </summary>
 private void indexEntry(CedictEntry entry, int id)
 {
     // Index character of simplified headword
     foreach (char c in entry.ChSimpl)
     {
         IdeoIndexItem ii;
         if (index.IdeoIndex.ContainsKey(c)) ii = index.IdeoIndex[c];
         else
         {
             ii = new IdeoIndexItem();
             index.IdeoIndex[c] = ii;
         }
         // Avoid indexing same entry twice if a char occurs multiple times
         if (ii.EntriesHeadwordSimp.Count == 0 ||
             ii.EntriesHeadwordSimp[ii.EntriesHeadwordSimp.Count - 1] != id)
             ii.EntriesHeadwordSimp.Add(id);
     }
     // Index characters of traditional headword
     foreach (char c in entry.ChTrad)
     {
         IdeoIndexItem ii;
         if (index.IdeoIndex.ContainsKey(c)) ii = index.IdeoIndex[c];
         else
         {
             ii = new IdeoIndexItem();
             index.IdeoIndex[c] = ii;
         }
         // Avoid indexing same entry twice if a char occurs multiple times
         if (ii.EntriesHeadwordTrad.Count == 0 ||
             ii.EntriesHeadwordTrad[ii.EntriesHeadwordTrad.Count - 1] != id)
             ii.EntriesHeadwordTrad.Add(id);
     }
     // Index pinyin syllables
     foreach (PinyinSyllable pys in entry.Pinyin)
     {
         PinyinIndexItem pi;
         // Index contains lower-case syllables
         string textLo = pys.Text.ToLowerInvariant();
         if (index.PinyinIndex.ContainsKey(textLo)) pi = index.PinyinIndex[textLo];
         else
         {
             pi = new PinyinIndexItem();
             index.PinyinIndex[textLo] = pi;
         }
         // Figure out which list in index item - by tone
         List<int> entryList;
         if (pys.Tone == -1) entryList = pi.EntriesNT;
         else if (pys.Tone == 0) entryList = pi.Entries0;
         else if (pys.Tone == 1) entryList = pi.Entries1;
         else if (pys.Tone == 2) entryList = pi.Entries2;
         else if (pys.Tone == 3) entryList = pi.Entries3;
         else if (pys.Tone == 4) entryList = pi.Entries4;
         else throw new Exception("Invalid tone: " + pys.Tone.ToString());
         // Avoid indexing same entry twice if a syllable occurs multiple times
         if (entryList.Count == 0 || entryList[entryList.Count - 1] != id)
             entryList.Add(id);
     }
     // Index equiv of each sense
     int senseIx = -1;
     foreach (CedictSense sense in entry.Senses)
     {
         ++senseIx;
         // Empty equiv: nothing to index
         if (sense.Equiv.IsEmpty) continue;
         // Tokenize
         ReadOnlyCollection<EquivToken> tokens = tokenizer.Tokenize(sense.Equiv);
         // Index sense
         indexSense(tokens, id, senseIx);
     }
 }