/// <summary> /// 添加索引 /// </summary> /// <param name="recnum">文档唯一编号</param> /// <param name="text">文档内容</param> private void AddtoIndex(int recnum, string text) { if (text == "" || text == null) { return; } text = text.ToLowerInvariant(); // lowercase index 转小写 string[] keys; //分词 if (_docMode) { //_log.Debug("text size = " + text.Length); //分词器 //Dictionary<string, int> wordfreq = Tokenizer.GenerateWordFreq(text); Dictionary <string, int> wordfreq = _tokenizer.GenerateWordFreq(text); //_log.Debug("word count = " + wordfreq.Count); var kk = wordfreq.Keys; keys = new string[kk.Count]; kk.CopyTo(keys, 0); } else { keys = text.Split(' '); } //------------------------------ //将分词出来的词进行判断 //已经存在则得到该词对应的位图索引 //不存在则创建新的索引编号 //------------------------------ foreach (string key in keys) { if (key == "") { continue; } int bmp; if (_words.TryGetValue(key, out bmp))//存在 { _bitmaps.GetBitmap(bmp).Set(recnum, true); } else { //索引编号 bmp = _bitmaps.GetFreeRecordNumber(); _bitmaps.SetDuplicate(bmp, recnum); _words.Add(key, bmp); } } _wordschanged = true; }
/// <summary> /// Add Text to the index /// </summary> /// <param name="recnum"></param> /// <param name="text"></param> private void AddtoIndex(int recnum, string text) { if (text == "" || text == null) { return; } text = text.ToLowerInvariant(); // lowercase index string[] keys; if (HootConfOptions.DocMode) { //_log.Debug("text size = " + text.Length); Dictionary <string, int> wordfreq = _tokenizer.GenerateWordFreq(text, HootConfOptions); //_log.Debug("word count = " + wordfreq.Count); var kk = wordfreq.Keys; keys = new string[kk.Count]; kk.CopyTo(keys, 0); } else { keys = text.Split(' '); } foreach (string key in keys) { if (key == "") { continue; } int bmp; if (_words.TryGetValue(key, out bmp)) { _bitmaps.GetBitmap(bmp).Set(recnum, true); } else { bmp = _bitmaps.GetFreeRecordNumber(); _bitmaps.SetDuplicate(bmp, recnum); _words.Add(key, bmp); } } _wordschanged = true; }