/// <summary> /// Ctor: deserializes binary data. /// </summary> public Index(BinReader br) { WordHolder = new CedictEngine.WordHolder(br); SenseIndex = new Dictionary<int, SenseIndexItem>(); int senseIndexKeyCount = br.ReadInt(); for (int i = 0; i != senseIndexKeyCount; ++i) { int tokenId = br.ReadInt(); SenseIndexItem sii = new SenseIndexItem(br); SenseIndex[tokenId] = sii; } IdeoIndex = new Dictionary<char, IdeoIndexItem>(); PinyinIndex = new Dictionary<string, PinyinIndexItem>(); int ideoIndexKeyCount = br.ReadInt(); for (int i = 0; i != ideoIndexKeyCount; ++i) { char c = br.ReadChar(); IdeoIndexItem iii = new IdeoIndexItem(br); IdeoIndex[c] = iii; } int pinyinIndexKeyCount = br.ReadInt(); for (int i = 0; i != pinyinIndexKeyCount; ++i) { string str = br.ReadString(); PinyinIndexItem pyi = new PinyinIndexItem(br); PinyinIndex[str] = pyi; } }
/// <summary> /// Indexes one parsed Cedict entry (hanzi, pinyin and target-language indexes). /// </summary> private void indexEntry(CedictEntry entry, int id) { // Index character of simplified headword foreach (char c in entry.ChSimpl) { IdeoIndexItem ii; if (index.IdeoIndex.ContainsKey(c)) ii = index.IdeoIndex[c]; else { ii = new IdeoIndexItem(); index.IdeoIndex[c] = ii; } // Avoid indexing same entry twice if a char occurs multiple times if (ii.EntriesHeadwordSimp.Count == 0 || ii.EntriesHeadwordSimp[ii.EntriesHeadwordSimp.Count - 1] != id) ii.EntriesHeadwordSimp.Add(id); } // Index characters of traditional headword foreach (char c in entry.ChTrad) { IdeoIndexItem ii; if (index.IdeoIndex.ContainsKey(c)) ii = index.IdeoIndex[c]; else { ii = new IdeoIndexItem(); index.IdeoIndex[c] = ii; } // Avoid indexing same entry twice if a char occurs multiple times if (ii.EntriesHeadwordTrad.Count == 0 || ii.EntriesHeadwordTrad[ii.EntriesHeadwordTrad.Count - 1] != id) ii.EntriesHeadwordTrad.Add(id); } // Index pinyin syllables foreach (PinyinSyllable pys in entry.Pinyin) { PinyinIndexItem pi; // Index contains lower-case syllables string textLo = pys.Text.ToLowerInvariant(); if (index.PinyinIndex.ContainsKey(textLo)) pi = index.PinyinIndex[textLo]; else { pi = new PinyinIndexItem(); index.PinyinIndex[textLo] = pi; } // Figure out which list in index item - by tone List<int> entryList; if (pys.Tone == -1) entryList = pi.EntriesNT; else if (pys.Tone == 0) entryList = pi.Entries0; else if (pys.Tone == 1) entryList = pi.Entries1; else if (pys.Tone == 2) entryList = pi.Entries2; else if (pys.Tone == 3) entryList = pi.Entries3; else if (pys.Tone == 4) entryList = pi.Entries4; else throw new Exception("Invalid tone: " + pys.Tone.ToString()); // Avoid indexing same entry twice if a syllable occurs multiple times if (entryList.Count == 0 || entryList[entryList.Count - 1] != id) entryList.Add(id); } // Index equiv of each sense int senseIx = -1; foreach (CedictSense sense in entry.Senses) { ++senseIx; // Empty equiv: nothing to index if (sense.Equiv.IsEmpty) continue; // Tokenize ReadOnlyCollection<EquivToken> tokens = tokenizer.Tokenize(sense.Equiv); // Index sense indexSense(tokens, id, senseIx); } }