/// <summary> /// Ctor: deserializes binary data. /// </summary> public Index(BinReader br) { WordHolder = new CedictEngine.WordHolder(br); SenseIndex = new Dictionary <int, SenseIndexItem>(); int senseIndexKeyCount = br.ReadInt(); for (int i = 0; i != senseIndexKeyCount; ++i) { int tokenId = br.ReadInt(); SenseIndexItem sii = new SenseIndexItem(br); SenseIndex[tokenId] = sii; } IdeoIndex = new Dictionary <char, IdeoIndexItem>(); PinyinIndex = new Dictionary <string, PinyinIndexItem>(); int ideoIndexKeyCount = br.ReadInt(); for (int i = 0; i != ideoIndexKeyCount; ++i) { char c = br.ReadChar(); IdeoIndexItem iii = new IdeoIndexItem(br); IdeoIndex[c] = iii; } int pinyinIndexKeyCount = br.ReadInt(); for (int i = 0; i != pinyinIndexKeyCount; ++i) { string str = br.ReadString(); PinyinIndexItem pyi = new PinyinIndexItem(br); PinyinIndex[str] = pyi; } }
/// <summary> /// Ctor: deserializes binary data. /// </summary> public Index(BinReader br) { WordHolder = new CedictEngine.WordHolder(br); SenseIndex = new Dictionary<int, SenseIndexItem>(); int senseIndexKeyCount = br.ReadInt(); for (int i = 0; i != senseIndexKeyCount; ++i) { int tokenId = br.ReadInt(); SenseIndexItem sii = new SenseIndexItem(br); SenseIndex[tokenId] = sii; } IdeoIndex = new Dictionary<char, IdeoIndexItem>(); PinyinIndex = new Dictionary<string, PinyinIndexItem>(); int ideoIndexKeyCount = br.ReadInt(); for (int i = 0; i != ideoIndexKeyCount; ++i) { char c = br.ReadChar(); IdeoIndexItem iii = new IdeoIndexItem(br); IdeoIndex[c] = iii; } int pinyinIndexKeyCount = br.ReadInt(); for (int i = 0; i != pinyinIndexKeyCount; ++i) { string str = br.ReadString(); PinyinIndexItem pyi = new PinyinIndexItem(br); PinyinIndex[str] = pyi; } }
private void indexSense(ReadOnlyCollection <EquivToken> tokens, int entryId, int senseIx) { // If there are no non-Chinese, non-number tokens: nothing to save, nothing to index bool relevant = false; foreach (EquivToken eqt in tokens) { if (eqt.TokenId != index.WordHolder.IdZho && eqt.TokenId != index.WordHolder.IdNum) { relevant = true; break; } } if (!relevant) { return; } // Keep tokenized sense in memory int senseId = tsenses.Count; TokenizedSense ts = new TokenizedSense(entryId, senseIx, tokens); tsenses.Add(ts); // Add to instance list of each token in list // First get set of different token IDs - we don't index dupes HashSet <int> tokenIdSet = new HashSet <int>(); foreach (EquivToken eqt in tokens) { tokenIdSet.Add(eqt.TokenId); } // Now, index each distinct ID foreach (int tokenId in tokenIdSet) { SenseIndexItem sii; if (!index.SenseIndex.ContainsKey(tokenId)) { sii = new SenseIndexItem(); index.SenseIndex[tokenId] = sii; } else { sii = index.SenseIndex[tokenId]; } if (tokenIdSet.Count > byte.MaxValue) { throw new Exception("Sense's token count out of byte range: " + tokenIdSet.Count.ToString()); } SenseInfo senseInfo = new SenseInfo { TokenizedSenseId = senseId, TokensInSense = (byte)tokenIdSet.Count, }; sii.Instances.Add(senseInfo); } }
private void indexSense(ReadOnlyCollection<EquivToken> tokens, int entryId, int senseIx) { // If there are no non-Chinese, non-number tokens: nothing to save, nothing to index bool relevant = false; foreach (EquivToken eqt in tokens) { if (eqt.TokenId != index.WordHolder.IdZho && eqt.TokenId != index.WordHolder.IdNum) { relevant = true; break; } } if (!relevant) return; // Keep tokenized sense in memory int senseId = tsenses.Count; TokenizedSense ts = new TokenizedSense(entryId, senseIx, tokens); tsenses.Add(ts); // Add to instance list of each token in list // First get set of different token IDs - we don't index dupes HashSet<int> tokenIdSet = new HashSet<int>(); foreach (EquivToken eqt in tokens) tokenIdSet.Add(eqt.TokenId); // Now, index each distinct ID foreach (int tokenId in tokenIdSet) { SenseIndexItem sii; if (!index.SenseIndex.ContainsKey(tokenId)) { sii = new SenseIndexItem(); index.SenseIndex[tokenId] = sii; } else sii = index.SenseIndex[tokenId]; if (tokenIdSet.Count > byte.MaxValue) throw new Exception("Sense's token count out of byte range: " + tokenIdSet.Count.ToString()); SenseInfo senseInfo = new SenseInfo { TokenizedSenseId = senseId, TokensInSense = (byte)tokenIdSet.Count, }; sii.Instances.Add(senseInfo); } }