示例#1
0
        public void IssuesTest_57_3()
        {
            String        test = "his is sha ash";
            List <String> list = new List <String>();

            list.Add("ash");
            list.Add("sha");
            list.Add("bcd");

            IllegalWordsSearch iwords = new IllegalWordsSearch();

            iwords.SetKeywords(list);

            var b = iwords.ContainsAny(test);

            Assert.AreEqual(true, b);

            var f = iwords.FindFirst(test);

            Assert.AreEqual("sha", f.Keyword);

            var all = iwords.FindAll(test);

            Assert.AreEqual(2, all.Count);
        }
        private static IllegalWordsSearch CreateIllegalWordsSearch()
        {
            string[] words1 = File.ReadAllLines(Path.GetFullPath(KeywordsPath), Encoding.UTF8);
            string[] words2 = File.ReadAllLines(Path.GetFullPath(UrlsPath), Encoding.UTF8);
            var      words  = new List <string>();

            foreach (var item in words1)
            {
                words.Add(item.Trim());
            }
            foreach (var item in words2)
            {
                words.Add(item.Trim());
            }

            var search = new IllegalWordsSearch();

            search.SetKeywords(words);

            search.Save(Path.GetFullPath(BitPath));

            var text = new FileInfo(Path.GetFullPath(KeywordsPath)).LastWriteTime.ToString("yyyy-MM-dd HH:mm:ss") + "|"
                       + new FileInfo(Path.GetFullPath(UrlsPath)).LastWriteTime.ToString("yyyy-MM-dd HH:mm:ss");

            File.WriteAllText(Path.GetFullPath(InfoPath), text);

            return(search);
        }
示例#3
0
        public void IssuesTest_20()
        {
            string text     = "A10021003吃饭";
            var    keywords = new string[] { "1", "A", "2", "0", "吃" };
            var    iws      = new IllegalWordsSearch();
            var    ss       = new StringSearch();
            var    sse      = new StringSearchEx2();

            iws.SetKeywords(keywords);
            iws.UseIgnoreCase       = true;
            iws.UseDBCcaseConverter = true;
            var iwsFirst = iws.FindFirst(text);

            Assert.AreEqual("吃", iwsFirst.Keyword);
            var iwsAll = iws.FindAll(text);

            Assert.AreEqual(1, iwsAll.Count);// 因为1A20左右都是英文或数字,所以识别失败

            ss.SetKeywords(keywords);
            var ssFirst = ss.FindFirst(text);

            Assert.AreEqual("A", ssFirst);
            var ssAll = ss.FindAll(text);

            Assert.AreEqual(9, ssAll.Count);

            sse.SetKeywords(keywords);
            var sseFirst = sse.FindFirst(text);

            Assert.AreEqual("A", sseFirst);
            var sseAll = sse.FindAll(text);

            Assert.AreEqual(9, sseAll.Count);
        }
示例#4
0
        public void IssuesTest_57()
        {
            String        test = "一,二二,三三三,四四四四,五五五五五,六六六六六六";
            List <String> list = new List <String>();

            list.Add("一");
            list.Add("二二");
            list.Add("三三三");
            list.Add("四四四四");
            list.Add("五五五五五");
            list.Add("六六六六六六");

            IllegalWordsSearch iwords = new IllegalWordsSearch();

            iwords.SetKeywords(list);

            bool b = iwords.ContainsAny(test);

            Assert.AreEqual(true, b);


            IllegalWordsSearchResult f = iwords.FindFirst(test);

            Assert.AreEqual("一", f.Keyword);

            List <IllegalWordsSearchResult> all = iwords.FindAll(test);

            Assert.AreEqual("一", all[0].Keyword);
            Assert.AreEqual("二二", all[1].Keyword);
            Assert.AreEqual("三三三", all[2].Keyword);
            Assert.AreEqual("四四四四", all[3].Keyword);
            Assert.AreEqual("五五五五五", all[4].Keyword);
            Assert.AreEqual("六六六六六六", all[5].Keyword);
        }
        public void IssuesTest_17()
        {
            var    illegalWordsSearch = new IllegalWordsSearch();
            string s = "中国|zg人|abc";

            illegalWordsSearch.SetKeywords(s.Split('|'));
            var str = illegalWordsSearch.Replace("我是中美国人厉害中国完美abcddb好的", '*');

            Assert.Equal("我是中美国人厉害**完美***ddb好的", str);
        }
示例#6
0
        public void IssuesTest_17()
        {
            var illegalWordsSearch = new IllegalWordsSearch();
            string s = "中国|zg人|abc";
            illegalWordsSearch.SetKeywords(s.Split('|'));
            var str = illegalWordsSearch.Replace("我是中美国人厉害中国完美abcddb好的", '*');

            //Assert.AreEqual("我是中美国人厉害**完美***ddb好的", str);
            //注,abc先转abc,再判断abc左右是否为英文或数字,因为后面为d是英文,所以不能过滤
            Assert.AreEqual("我是中美国人厉害**完美abcddb好的", str);
        }
示例#7
0
        public void IssuesTest_65()
        {
            var           search   = new IllegalWordsSearch();
            List <string> keywords = new List <string>();

            keywords.Add("f**k");
            keywords.Add("ffx");
            search.SetKeywords(keywords);
            var result = search.Replace("fFuck");

            Assert.AreEqual("*****", result);
        }
示例#8
0
        public void IssuesTest_56()
        {
            var keywords = new string[] { "我爱中国", "中国", };
            var txt = "新型财富密码就是大喊“我[爱中]国”么?伏拉夫,轻松拥有千万粉丝的新晋网红,快手粉丝465万,抖音粉丝704万。他是靠“爱中国”火起来的。伏拉夫在短视频平台上的简介是:爱中国!爱火锅!";

            var iws = new IllegalWordsSearch();
            iws.SetKeywords(keywords);
            iws.SetSkipWords("]");

            var ts = iws.FindAll(txt);
            Assert.AreEqual(3, ts.Count);
            Assert.AreEqual("中]国", ts[0].Keyword);

        }
 public void SetKeys(List <string> keys)
 {
     if (keys != null && keys.Any())
     {
         var allKeys = new List <string>();
         foreach (var k in keys)
         {
             allKeys.Add(k);                                   // 增加词汇
             allKeys.Add(WordsHelper.ToTraditionalChinese(k)); // 增加繁体
             allKeys.Add(WordsHelper.GetPinyin(k));            // 增加拼音
         }
         IllegalWordsSearch.SetKeywords(allKeys);
     }
 }
示例#10
0
        public void IssuesTest_57_2()
        {
            String test = "jameson吃饭";
            List<String> list = new List<String>();
            list.Add("jameson吃饭");
            list.Add("吃饭jameson");

            IllegalWordsSearch iwords = new IllegalWordsSearch();
            iwords.SetKeywords(list);

            var b = iwords.ContainsAny(test);
            Assert.AreEqual(true, b);

            var f = iwords.FindFirst(test);
            Assert.AreEqual("jameson吃饭", f.Keyword);

        }
示例#11
0
        static List <string> ReadBadWord()
        {
            List <string> list = new List <string>();

            using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) {
                string key = sw.ReadLine();
                while (key != null)
                {
                    if (key != string.Empty)
                    {
                        tf1.AddKey(key);

                        ff.AddKey(key);

                        list.Add(key);
                    }
                    key = sw.ReadLine();
                }
            }
            stringSearch.SetKeywords(list);
            stringSearchEx.SetKeywords(list);
            stringSearchEx2.SetKeywords(list);
            stringSearchEx3.SetKeywords(list);
            wordsSearch.SetKeywords(list);
            wordsSearchEx.SetKeywords(list);
            wordsSearchEx2.SetKeywords(list);
            wordsSearchEx3.SetKeywords(list);
            illegalWordsSearch.SetKeywords(list);

            list = list.OrderBy(q => q).ToList();
            var str = string.Join("|", list);

            str = Regex.Replace(str, @"([\\\.\+\*\-\(\)\[\]\{\}!])", @"\$1");

            re = new Regex(str, RegexOptions.IgnoreCase);


            var str2 = tf1.ToString();

            //str2 = Regex.Replace(str2, @"([\.\+\*\-\[\]\{\}!])", @"\$1");
            re2 = new Regex(str2);

            return(list);
        }
示例#12
0
        static void ReadBadWord()
        {
            List <string> list = new List <string>();

            using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) {
                string key = sw.ReadLine();
                while (key != null)
                {
                    if (key != string.Empty)
                    {
                        tf1.AddKey(key);

                        ff.AddKey(key);

                        list.Add(key);
                    }
                    key = sw.ReadLine();
                }
            }
            //search = new TextSearch();
            //search.Keywords = list.ToArray();
            word.SetKeywords(list);
            search.SetKeywords(list);
            iword1.SetKeywords(list);
            iword2.SetKeywords(list);
            //iword3 = new IllegalWordsSearch2(list);
            list = list.OrderBy(q => q).ToList();
            var str = string.Join("|", list);

            str = Regex.Replace(str, @"([\\\.\+\*\-\(\)\[\]\{\}!])", @"\$1");

            re = new Regex(str);


            var str2 = tf1.ToString();

            //str2 = Regex.Replace(str2, @"([\.\+\*\-\[\]\{\}!])", @"\$1");
            re2 = new Regex(str2);
        }
示例#13
0
        public void IllegalWordsSearchTest()
        {
            string s = "中国|国人|zg人|f**k|all|as|19|http://|ToolGood|assert|zgasser|共产党";

            int[]  bl   = new int[] { 7, 4, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 };
            string test = "我是中国人";


            var iwords = new IllegalWordsSearch();

            iwords.SetKeywords(s.Split('|'));


            var b = iwords.ContainsAny(test);

            Assert.AreEqual(true, b);


            var f = iwords.FindFirst(test);

            Assert.AreEqual(true, f.Success);
            Assert.AreEqual("中国", f.Keyword);
            Assert.AreEqual(2, f.Start);
            Assert.AreEqual(3, f.End);



            var all = iwords.FindAll(test);

            Assert.AreEqual("中国", all[0].SrcString);
            Assert.AreEqual("国人", all[1].SrcString);

            test = "共产党";
            all  = iwords.FindAll(test);
            Assert.AreEqual("共产党", all[0].SrcString);


            test = "我是中国zg人";
            all  = iwords.FindAll(test);
            Assert.AreEqual("中国", all[0].SrcString);
            Assert.AreEqual("zg人", all[1].SrcString);

            test = "中间国zg人";
            all  = iwords.FindAll(test);
            Assert.AreEqual("zg人", all[0].SrcString);

            test = "f**k al[]l"; //未启用跳词
            all  = iwords.FindAll(test);
            Assert.AreEqual("f**k", all[0].SrcString);
            Assert.AreEqual(1, all.Count);


            test = "f**k al[]l";
            iwords.UseSkipWordFilter = true; //启用跳词
            all = iwords.FindAll(test);
            Assert.AreEqual("f**k", all[0].SrcString);
            Assert.AreEqual("al[]l", all[1].SrcString);
            Assert.AreEqual(2, all.Count);

            test = "http://ToolGood.com";
            all  = iwords.FindAll(test);
            Assert.AreEqual("toolgood", all[0].Keyword); //关键字ToolGood默认转小写
            Assert.AreEqual("ToolGood", all[0].SrcString);
            Assert.AreEqual(1, all.Count);

            test = "asssert all";
            all  = iwords.FindAll(test); //未启用重复词
            Assert.AreEqual("all", all[0].SrcString);
            Assert.AreEqual(1, all.Count);

            test = "asssert all";
            iwords.UseDuplicateWordFilter = true; //启用重复词
            all = iwords.FindAll(test);
            Assert.AreEqual("asssert", all[0].SrcString);
            Assert.AreEqual("assert", all[0].Keyword);
            Assert.AreEqual("all", all[1].SrcString);
            Assert.AreEqual(2, all.Count);

            test = "asssert allll"; //重复词匹配到末尾
            all  = iwords.FindAll(test);
            Assert.AreEqual("asssert", all[0].SrcString);
            Assert.AreEqual("assert", all[0].Keyword);
            Assert.AreEqual("allll", all[1].SrcString);
            Assert.AreEqual(2, all.Count);

            test = "zgasssert aallll"; //不会匹配zgasser 或 assert
            all  = iwords.FindAll(test);
            Assert.AreEqual("aallll", all[0].SrcString);
            Assert.AreEqual("all", all[0].Keyword);
            Assert.AreEqual(1, all.Count);

            test = "我是【中]国【人";
            all  = iwords.FindAll(test);
            Assert.AreEqual("中]国", all[0].SrcString);
            Assert.AreEqual("国【人", all[1].SrcString);

            test = "我是【中国【人";
            all  = iwords.FindAll(test);
            Assert.AreEqual("中国", all[0].SrcString);
            Assert.AreEqual("国【人", all[1].SrcString);
            Assert.AreEqual(2, all.Count);


            var ss = iwords.Replace(test, '*');

            Assert.AreEqual("我是【****", ss);

            test = "我是中国人"; //使用黑名单
            iwords.SetBlacklist(bl);
            iwords.UseBlacklistFilter = true;
            all = iwords.FindAll(test, 1);
            Assert.AreEqual("中国", all[0].SrcString);
            Assert.AreEqual(1, all.Count);
        }
示例#14
0
 public BadWordService()
 {
     StringSearch = new IllegalWordsSearch();
     StringSearch.UseIgnoreCase = true;
     StringSearch.SetKeywords(censoredWords);
 }
示例#15
0
        public void IllegalWordsSearchTest()
        {
            string s    = "中国|国人|zg人|f**k|all|as|19|http://|ToolGood";
            string test = "我是中国人";


            IllegalWordsSearch iwords = new IllegalWordsSearch(2);

            iwords.SetKeywords(s.Split('|'));


            var b = iwords.ContainsAny(test);

            Assert.AreEqual(true, b);


            var f = iwords.FindFirst(test);

            Assert.AreEqual(true, f.Success);
            Assert.AreEqual("中国", f.Keyword);
            Assert.AreEqual(2, f.Start);
            Assert.AreEqual(3, f.End);



            var all = iwords.FindAll(test);

            Assert.AreEqual("中国", all[0].SrcString);
            Assert.AreEqual("国人", all[1].SrcString);

            test = "我是中国zg人";
            all  = iwords.FindAll(test);
            Assert.AreEqual("中国", all[0].SrcString);
            Assert.AreEqual("zg人", all[1].SrcString);
            Assert.AreEqual("国zg人", all[2].SrcString);

            test = "中间国zg人";
            all  = iwords.FindAll(test);
            Assert.AreEqual("zg人", all[0].SrcString);
            Assert.AreEqual("国zg人", all[1].SrcString);

            test = "f**k al.l";
            all  = iwords.FindAll(test);
            Assert.AreEqual("f**k", all[0].SrcString);
            Assert.AreEqual("al.l", all[1].SrcString);
            Assert.AreEqual(2, all.Count);

            test = "ht@tp://ToolGood.com";
            all  = iwords.FindAll(test);
            Assert.AreEqual("ht@tp://", all[0].SrcString);
            Assert.AreEqual("http://", all[0].Keyword);
            Assert.AreEqual("toolgood", all[1].Keyword);
            Assert.AreEqual("ToolGood", all[1].SrcString);
            Assert.AreEqual(2, all.Count);


            test = "asssert all";
            all  = iwords.FindAll(test);
            Assert.AreEqual("all", all[0].SrcString);
            Assert.AreEqual(1, all.Count);

            test = "19w 1919 all";
            all  = iwords.FindAll(test);
            Assert.AreEqual("19", all[0].SrcString);
            Assert.AreEqual("all", all[1].SrcString);

            test = "我是【中]国【人";
            all  = iwords.FindAll(test);
            Assert.AreEqual("中]国", all[0].SrcString);
            Assert.AreEqual("国【人", all[1].SrcString);

            test = "我是【中国【人";
            all  = iwords.FindAll(test);
            Assert.AreEqual("中国", all[0].SrcString);
            Assert.AreEqual("国【人", all[1].SrcString);
            Assert.AreEqual(2, all.Count);


            var ss = iwords.Replace(test, '*');

            Assert.AreEqual("我是【****", ss);
        }