예제 #1
0
        /// <summary>
        /// 类初始化
        /// </summary>
        /// <param name="wordDict">敏感词集合(key-敏感词,value-替换字符)</param>
        private void Initialization(Dictionary <string, string> wordDict)
        {
            Array.Clear(_wordTable, 0, _wordTable.Length);

            List <WordEntity> wordList = new List <WordEntity>();
            WordFilterStatus  status;

            foreach (var pair in wordDict)
            {
                StringBuilder wordBuilder = new StringBuilder();
                string        tempWord    = pair.Key;
                status = WordFilterStatus.Replace;

                string replacement = string.IsNullOrEmpty(pair.Value) ? "*" : pair.Value;
                if (replacement == "{Banned}")
                {
                    status = WordFilterStatus.Banned;
                }

                if (pair.Key.StartsWith("\""))
                {
                    //转换成繁体再加载一个
                    WordEntity regexWord = new WordEntity()
                    {
                        Word             = pair.Key.Trim('\"'),
                        Replacement      = replacement,
                        WordFilterStatus = status
                    };

                    _regexWords.Add(regexWord);

                    continue;
                }

                tempWord = IgnoreCharacterProcess(pair.Key);

                for (int i = 0; i < tempWord.Length; i++)
                {
                    wordBuilder.Append((char)FastToLower(tempWord[i])); //统一转换成小写
                }

                //添加到列表
                WordEntity simpleWord = new WordEntity()
                {
                    Word             = wordBuilder.ToString(),
                    Replacement      = replacement,
                    WordFilterStatus = status
                };

                wordList.Add(simpleWord);
            }

            _regexWords = _regexWords.OrderByDescending(n => n.WordFilterStatus).ToList();

            //去除重复
            Comparison <WordEntity> cmp = delegate(WordEntity a, WordEntity b)
            {
                return(a.Word.CompareTo(b.Word));
            };

            wordList.Sort(cmp);
            for (int idx = wordList.Count - 1; idx > 0; idx--)
            {
                if (wordList[idx].Word == wordList[idx - 1].Word)
                {
                    wordList.RemoveAt(idx);
                }
            }

            //添加到字典
            foreach (WordEntity entity in wordList)
            {
                WordGroup group = _wordTable[(int)entity.Word[0]];

                if (group == null)
                {
                    group = new WordGroup();
                    group.AppendWord(entity.Word.Substring(1), entity.WordFilterStatus, entity.Replacement);
                    _wordTable[(int)entity.Word[0]] = group;
                }
                else
                {
                    group.AppendWord(entity.Word.Substring(1), entity.WordFilterStatus, entity.Replacement);
                }
            }
        }
예제 #2
0
        /// <summary>
        /// 测试字符串是否出现屏蔽字
        /// </summary>
        /// <param name="source">需要处理的字符串</param>
        /// <param name="status">敏感词处理状态</param>
        /// <returns>最终处理方式</returns>
        public string Filter(string source, out WordFilterStatus status)
        {
            status = WordFilterStatus.Replace;

            if (string.IsNullOrEmpty(source))
            {
                return(source);
            }

            StringBuilder sb = new StringBuilder();

            for (int start = 0; start < source.Length; start++)
            {
                WordGroup wordGroup = _wordTable[FastToLower(source[start])];

                if (wordGroup != null)
                {
                    bool found = false;

                    for (int idx = 0; idx < wordGroup.Count; idx++)
                    {
                        WordEntity we = wordGroup.GetItem(idx);

                        int matchLength = 0;
                        if (we.Word.Length == 0 || CheckString(source, we.Word, start + 1, out matchLength))
                        {
                            if (we.WordFilterStatus == WordFilterStatus.Banned)
                            {
                                status = we.WordFilterStatus;
                                break;
                            }

                            found = true;
                            sb.Append(we.Replacement);
                            start += matchLength;
                        }
                    }

                    if (status == WordFilterStatus.Banned)
                    {
                        break;
                    }

                    if (!found)
                    {
                        sb.Append(source[start]);
                        found = false;
                    }
                }
                else
                {
                    sb.Append(source[start]);
                }
            }

            if (status == WordFilterStatus.Replace)
            {
                source = sb.ToString();
                source = RegexFilter(source, out status);
            }

            return(source);
        }