예제 #1
0
파일: Index.cs 프로젝트: smartree/Zydeo
        /// <summary>
        /// Ctor: deserializes binary data.
        /// </summary>
        public Index(BinReader br)
        {
            WordHolder = new CedictEngine.WordHolder(br);
            SenseIndex = new Dictionary <int, SenseIndexItem>();
            int senseIndexKeyCount = br.ReadInt();

            for (int i = 0; i != senseIndexKeyCount; ++i)
            {
                int            tokenId = br.ReadInt();
                SenseIndexItem sii     = new SenseIndexItem(br);
                SenseIndex[tokenId] = sii;
            }

            IdeoIndex   = new Dictionary <char, IdeoIndexItem>();
            PinyinIndex = new Dictionary <string, PinyinIndexItem>();

            int ideoIndexKeyCount = br.ReadInt();

            for (int i = 0; i != ideoIndexKeyCount; ++i)
            {
                char          c   = br.ReadChar();
                IdeoIndexItem iii = new IdeoIndexItem(br);
                IdeoIndex[c] = iii;
            }

            int pinyinIndexKeyCount = br.ReadInt();

            for (int i = 0; i != pinyinIndexKeyCount; ++i)
            {
                string          str = br.ReadString();
                PinyinIndexItem pyi = new PinyinIndexItem(br);
                PinyinIndex[str] = pyi;
            }
        }
예제 #2
0
파일: Index.cs 프로젝트: sheeeng/Zydeo
        /// <summary>
        /// Ctor: deserializes binary data.
        /// </summary>
        public Index(BinReader br)
        {
            WordHolder = new CedictEngine.WordHolder(br);
            SenseIndex = new Dictionary<int, SenseIndexItem>();
            int senseIndexKeyCount = br.ReadInt();
            for (int i = 0; i != senseIndexKeyCount; ++i)
            {
                int tokenId = br.ReadInt();
                SenseIndexItem sii = new SenseIndexItem(br);
                SenseIndex[tokenId] = sii;
            }

            IdeoIndex = new Dictionary<char, IdeoIndexItem>();
            PinyinIndex = new Dictionary<string, PinyinIndexItem>();

            int ideoIndexKeyCount = br.ReadInt();
            for (int i = 0; i != ideoIndexKeyCount; ++i)
            {
                char c = br.ReadChar();
                IdeoIndexItem iii = new IdeoIndexItem(br);
                IdeoIndex[c] = iii;
            }

            int pinyinIndexKeyCount = br.ReadInt();
            for (int i = 0; i != pinyinIndexKeyCount; ++i)
            {
                string str = br.ReadString();
                PinyinIndexItem pyi = new PinyinIndexItem(br);
                PinyinIndex[str] = pyi;
            }
        }
예제 #3
0
        private void indexSense(ReadOnlyCollection <EquivToken> tokens, int entryId, int senseIx)
        {
            // If there are no non-Chinese, non-number tokens: nothing to save, nothing to index
            bool relevant = false;

            foreach (EquivToken eqt in tokens)
            {
                if (eqt.TokenId != index.WordHolder.IdZho && eqt.TokenId != index.WordHolder.IdNum)
                {
                    relevant = true; break;
                }
            }
            if (!relevant)
            {
                return;
            }

            // Keep tokenized sense in memory
            int            senseId = tsenses.Count;
            TokenizedSense ts      = new TokenizedSense(entryId, senseIx, tokens);

            tsenses.Add(ts);
            // Add to instance list of each token in list
            // First get set of different token IDs - we don't index dupes
            HashSet <int> tokenIdSet = new HashSet <int>();

            foreach (EquivToken eqt in tokens)
            {
                tokenIdSet.Add(eqt.TokenId);
            }
            // Now, index each distinct ID
            foreach (int tokenId in tokenIdSet)
            {
                SenseIndexItem sii;
                if (!index.SenseIndex.ContainsKey(tokenId))
                {
                    sii = new SenseIndexItem();
                    index.SenseIndex[tokenId] = sii;
                }
                else
                {
                    sii = index.SenseIndex[tokenId];
                }
                if (tokenIdSet.Count > byte.MaxValue)
                {
                    throw new Exception("Sense's token count out of byte range: " + tokenIdSet.Count.ToString());
                }
                SenseInfo senseInfo = new SenseInfo
                {
                    TokenizedSenseId = senseId,
                    TokensInSense    = (byte)tokenIdSet.Count,
                };
                sii.Instances.Add(senseInfo);
            }
        }
예제 #4
0
        private void indexSense(ReadOnlyCollection<EquivToken> tokens, int entryId, int senseIx)
        {
            // If there are no non-Chinese, non-number tokens: nothing to save, nothing to index
            bool relevant = false;
            foreach (EquivToken eqt in tokens)
            {
                if (eqt.TokenId != index.WordHolder.IdZho && eqt.TokenId != index.WordHolder.IdNum)
                { relevant = true; break; }
            }
            if (!relevant) return;

            // Keep tokenized sense in memory
            int senseId = tsenses.Count;
            TokenizedSense ts = new TokenizedSense(entryId, senseIx, tokens);
            tsenses.Add(ts);
            // Add to instance list of each token in list
            // First get set of different token IDs - we don't index dupes
            HashSet<int> tokenIdSet = new HashSet<int>();
            foreach (EquivToken eqt in tokens) tokenIdSet.Add(eqt.TokenId);
            // Now, index each distinct ID
            foreach (int tokenId in tokenIdSet)
            {
                SenseIndexItem sii;
                if (!index.SenseIndex.ContainsKey(tokenId))
                {
                    sii = new SenseIndexItem();
                    index.SenseIndex[tokenId] = sii;
                }
                else sii = index.SenseIndex[tokenId];
                if (tokenIdSet.Count > byte.MaxValue)
                    throw new Exception("Sense's token count out of byte range: " + tokenIdSet.Count.ToString());
                SenseInfo senseInfo = new SenseInfo
                {
                    TokenizedSenseId = senseId,
                    TokensInSense = (byte)tokenIdSet.Count,
                };
                sii.Instances.Add(senseInfo);
            }
        }