Exemple #1
0
        /// <summary>
        /// 添加索引
        /// </summary>
        /// <param name="recnum">文档唯一编号</param>
        /// <param name="text">文档内容</param>
        private void AddtoIndex(int recnum, string text)
        {
            if (text == "" || text == null)
            {
                return;
            }
            text = text.ToLowerInvariant(); // lowercase index 转小写
            string[] keys;
            //分词
            if (_docMode)
            {
                //_log.Debug("text size = " + text.Length);
                //分词器
                //Dictionary<string, int> wordfreq = Tokenizer.GenerateWordFreq(text);
                Dictionary <string, int> wordfreq = _tokenizer.GenerateWordFreq(text);
                //_log.Debug("word count = " + wordfreq.Count);
                var kk = wordfreq.Keys;
                keys = new string[kk.Count];
                kk.CopyTo(keys, 0);
            }
            else
            {
                keys = text.Split(' ');
            }

            //------------------------------
            //将分词出来的词进行判断
            //已经存在则得到该词对应的位图索引
            //不存在则创建新的索引编号
            //------------------------------
            foreach (string key in keys)
            {
                if (key == "")
                {
                    continue;
                }

                int bmp;
                if (_words.TryGetValue(key, out bmp))//存在
                {
                    _bitmaps.GetBitmap(bmp).Set(recnum, true);
                }
                else
                {
                    //索引编号
                    bmp = _bitmaps.GetFreeRecordNumber();
                    _bitmaps.SetDuplicate(bmp, recnum);
                    _words.Add(key, bmp);
                }
            }
            _wordschanged = true;
        }
Exemple #2
0
        /// <summary>
        /// Add Text to the index
        /// </summary>
        /// <param name="recnum"></param>
        /// <param name="text"></param>
        private void AddtoIndex(int recnum, string text)
        {
            if (text == "" || text == null)
            {
                return;
            }

            text = text.ToLowerInvariant();             // lowercase index
            string[] keys;

            if (HootConfOptions.DocMode)
            {
                //_log.Debug("text size = " + text.Length);
                Dictionary <string, int> wordfreq = _tokenizer.GenerateWordFreq(text, HootConfOptions);
                //_log.Debug("word count = " + wordfreq.Count);
                var kk = wordfreq.Keys;
                keys = new string[kk.Count];
                kk.CopyTo(keys, 0);
            }
            else
            {
                keys = text.Split(' ');
            }

            foreach (string key in keys)
            {
                if (key == "")
                {
                    continue;
                }

                int bmp;
                if (_words.TryGetValue(key, out bmp))
                {
                    _bitmaps.GetBitmap(bmp).Set(recnum, true);
                }
                else
                {
                    bmp = _bitmaps.GetFreeRecordNumber();
                    _bitmaps.SetDuplicate(bmp, recnum);
                    _words.Add(key, bmp);
                }
            }
            _wordschanged = true;
        }