public void IssuesTest_57_3() { String test = "his is sha ash"; List <String> list = new List <String>(); list.Add("ash"); list.Add("sha"); list.Add("bcd"); IllegalWordsSearch iwords = new IllegalWordsSearch(); iwords.SetKeywords(list); var b = iwords.ContainsAny(test); Assert.AreEqual(true, b); var f = iwords.FindFirst(test); Assert.AreEqual("sha", f.Keyword); var all = iwords.FindAll(test); Assert.AreEqual(2, all.Count); }
private static IllegalWordsSearch CreateIllegalWordsSearch() { string[] words1 = File.ReadAllLines(Path.GetFullPath(KeywordsPath), Encoding.UTF8); string[] words2 = File.ReadAllLines(Path.GetFullPath(UrlsPath), Encoding.UTF8); var words = new List <string>(); foreach (var item in words1) { words.Add(item.Trim()); } foreach (var item in words2) { words.Add(item.Trim()); } var search = new IllegalWordsSearch(); search.SetKeywords(words); search.Save(Path.GetFullPath(BitPath)); var text = new FileInfo(Path.GetFullPath(KeywordsPath)).LastWriteTime.ToString("yyyy-MM-dd HH:mm:ss") + "|" + new FileInfo(Path.GetFullPath(UrlsPath)).LastWriteTime.ToString("yyyy-MM-dd HH:mm:ss"); File.WriteAllText(Path.GetFullPath(InfoPath), text); return(search); }
public void IssuesTest_20() { string text = "A10021003吃饭"; var keywords = new string[] { "1", "A", "2", "0", "吃" }; var iws = new IllegalWordsSearch(); var ss = new StringSearch(); var sse = new StringSearchEx2(); iws.SetKeywords(keywords); iws.UseIgnoreCase = true; iws.UseDBCcaseConverter = true; var iwsFirst = iws.FindFirst(text); Assert.AreEqual("吃", iwsFirst.Keyword); var iwsAll = iws.FindAll(text); Assert.AreEqual(1, iwsAll.Count);// 因为1A20左右都是英文或数字,所以识别失败 ss.SetKeywords(keywords); var ssFirst = ss.FindFirst(text); Assert.AreEqual("A", ssFirst); var ssAll = ss.FindAll(text); Assert.AreEqual(9, ssAll.Count); sse.SetKeywords(keywords); var sseFirst = sse.FindFirst(text); Assert.AreEqual("A", sseFirst); var sseAll = sse.FindAll(text); Assert.AreEqual(9, sseAll.Count); }
public void IssuesTest_57() { String test = "一,二二,三三三,四四四四,五五五五五,六六六六六六"; List <String> list = new List <String>(); list.Add("一"); list.Add("二二"); list.Add("三三三"); list.Add("四四四四"); list.Add("五五五五五"); list.Add("六六六六六六"); IllegalWordsSearch iwords = new IllegalWordsSearch(); iwords.SetKeywords(list); bool b = iwords.ContainsAny(test); Assert.AreEqual(true, b); IllegalWordsSearchResult f = iwords.FindFirst(test); Assert.AreEqual("一", f.Keyword); List <IllegalWordsSearchResult> all = iwords.FindAll(test); Assert.AreEqual("一", all[0].Keyword); Assert.AreEqual("二二", all[1].Keyword); Assert.AreEqual("三三三", all[2].Keyword); Assert.AreEqual("四四四四", all[3].Keyword); Assert.AreEqual("五五五五五", all[4].Keyword); Assert.AreEqual("六六六六六六", all[5].Keyword); }
public void IssuesTest_17() { var illegalWordsSearch = new IllegalWordsSearch(); string s = "中国|zg人|abc"; illegalWordsSearch.SetKeywords(s.Split('|')); var str = illegalWordsSearch.Replace("我是中美国人厉害中国完美abcddb好的", '*'); Assert.Equal("我是中美国人厉害**完美***ddb好的", str); }
public void IssuesTest_17() { var illegalWordsSearch = new IllegalWordsSearch(); string s = "中国|zg人|abc"; illegalWordsSearch.SetKeywords(s.Split('|')); var str = illegalWordsSearch.Replace("我是中美国人厉害中国完美abcddb好的", '*'); //Assert.AreEqual("我是中美国人厉害**完美***ddb好的", str); //注,abc先转abc,再判断abc左右是否为英文或数字,因为后面为d是英文,所以不能过滤 Assert.AreEqual("我是中美国人厉害**完美abcddb好的", str); }
public void IssuesTest_65() { var search = new IllegalWordsSearch(); List <string> keywords = new List <string>(); keywords.Add("f**k"); keywords.Add("ffx"); search.SetKeywords(keywords); var result = search.Replace("fFuck"); Assert.AreEqual("*****", result); }
public void IssuesTest_56() { var keywords = new string[] { "我爱中国", "中国", }; var txt = "新型财富密码就是大喊“我[爱中]国”么?伏拉夫,轻松拥有千万粉丝的新晋网红,快手粉丝465万,抖音粉丝704万。他是靠“爱中国”火起来的。伏拉夫在短视频平台上的简介是:爱中国!爱火锅!"; var iws = new IllegalWordsSearch(); iws.SetKeywords(keywords); iws.SetSkipWords("]"); var ts = iws.FindAll(txt); Assert.AreEqual(3, ts.Count); Assert.AreEqual("中]国", ts[0].Keyword); }
public void SetKeys(List <string> keys) { if (keys != null && keys.Any()) { var allKeys = new List <string>(); foreach (var k in keys) { allKeys.Add(k); // 增加词汇 allKeys.Add(WordsHelper.ToTraditionalChinese(k)); // 增加繁体 allKeys.Add(WordsHelper.GetPinyin(k)); // 增加拼音 } IllegalWordsSearch.SetKeywords(allKeys); } }
public void IssuesTest_57_2() { String test = "jameson吃饭"; List<String> list = new List<String>(); list.Add("jameson吃饭"); list.Add("吃饭jameson"); IllegalWordsSearch iwords = new IllegalWordsSearch(); iwords.SetKeywords(list); var b = iwords.ContainsAny(test); Assert.AreEqual(true, b); var f = iwords.FindFirst(test); Assert.AreEqual("jameson吃饭", f.Keyword); }
static List <string> ReadBadWord() { List <string> list = new List <string>(); using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) { string key = sw.ReadLine(); while (key != null) { if (key != string.Empty) { tf1.AddKey(key); ff.AddKey(key); list.Add(key); } key = sw.ReadLine(); } } stringSearch.SetKeywords(list); stringSearchEx.SetKeywords(list); stringSearchEx2.SetKeywords(list); stringSearchEx3.SetKeywords(list); wordsSearch.SetKeywords(list); wordsSearchEx.SetKeywords(list); wordsSearchEx2.SetKeywords(list); wordsSearchEx3.SetKeywords(list); illegalWordsSearch.SetKeywords(list); list = list.OrderBy(q => q).ToList(); var str = string.Join("|", list); str = Regex.Replace(str, @"([\\\.\+\*\-\(\)\[\]\{\}!])", @"\$1"); re = new Regex(str, RegexOptions.IgnoreCase); var str2 = tf1.ToString(); //str2 = Regex.Replace(str2, @"([\.\+\*\-\[\]\{\}!])", @"\$1"); re2 = new Regex(str2); return(list); }
static void ReadBadWord() { List <string> list = new List <string>(); using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) { string key = sw.ReadLine(); while (key != null) { if (key != string.Empty) { tf1.AddKey(key); ff.AddKey(key); list.Add(key); } key = sw.ReadLine(); } } //search = new TextSearch(); //search.Keywords = list.ToArray(); word.SetKeywords(list); search.SetKeywords(list); iword1.SetKeywords(list); iword2.SetKeywords(list); //iword3 = new IllegalWordsSearch2(list); list = list.OrderBy(q => q).ToList(); var str = string.Join("|", list); str = Regex.Replace(str, @"([\\\.\+\*\-\(\)\[\]\{\}!])", @"\$1"); re = new Regex(str); var str2 = tf1.ToString(); //str2 = Regex.Replace(str2, @"([\.\+\*\-\[\]\{\}!])", @"\$1"); re2 = new Regex(str2); }
public void IllegalWordsSearchTest() { string s = "中国|国人|zg人|f**k|all|as|19|http://|ToolGood|assert|zgasser|共产党"; int[] bl = new int[] { 7, 4, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 }; string test = "我是中国人"; var iwords = new IllegalWordsSearch(); iwords.SetKeywords(s.Split('|')); var b = iwords.ContainsAny(test); Assert.AreEqual(true, b); var f = iwords.FindFirst(test); Assert.AreEqual(true, f.Success); Assert.AreEqual("中国", f.Keyword); Assert.AreEqual(2, f.Start); Assert.AreEqual(3, f.End); var all = iwords.FindAll(test); Assert.AreEqual("中国", all[0].SrcString); Assert.AreEqual("国人", all[1].SrcString); test = "共产党"; all = iwords.FindAll(test); Assert.AreEqual("共产党", all[0].SrcString); test = "我是中国zg人"; all = iwords.FindAll(test); Assert.AreEqual("中国", all[0].SrcString); Assert.AreEqual("zg人", all[1].SrcString); test = "中间国zg人"; all = iwords.FindAll(test); Assert.AreEqual("zg人", all[0].SrcString); test = "f**k al[]l"; //未启用跳词 all = iwords.FindAll(test); Assert.AreEqual("f**k", all[0].SrcString); Assert.AreEqual(1, all.Count); test = "f**k al[]l"; iwords.UseSkipWordFilter = true; //启用跳词 all = iwords.FindAll(test); Assert.AreEqual("f**k", all[0].SrcString); Assert.AreEqual("al[]l", all[1].SrcString); Assert.AreEqual(2, all.Count); test = "http://ToolGood.com"; all = iwords.FindAll(test); Assert.AreEqual("toolgood", all[0].Keyword); //关键字ToolGood默认转小写 Assert.AreEqual("ToolGood", all[0].SrcString); Assert.AreEqual(1, all.Count); test = "asssert all"; all = iwords.FindAll(test); //未启用重复词 Assert.AreEqual("all", all[0].SrcString); Assert.AreEqual(1, all.Count); test = "asssert all"; iwords.UseDuplicateWordFilter = true; //启用重复词 all = iwords.FindAll(test); Assert.AreEqual("asssert", all[0].SrcString); Assert.AreEqual("assert", all[0].Keyword); Assert.AreEqual("all", all[1].SrcString); Assert.AreEqual(2, all.Count); test = "asssert allll"; //重复词匹配到末尾 all = iwords.FindAll(test); Assert.AreEqual("asssert", all[0].SrcString); Assert.AreEqual("assert", all[0].Keyword); Assert.AreEqual("allll", all[1].SrcString); Assert.AreEqual(2, all.Count); test = "zgasssert aallll"; //不会匹配zgasser 或 assert all = iwords.FindAll(test); Assert.AreEqual("aallll", all[0].SrcString); Assert.AreEqual("all", all[0].Keyword); Assert.AreEqual(1, all.Count); test = "我是【中]国【人"; all = iwords.FindAll(test); Assert.AreEqual("中]国", all[0].SrcString); Assert.AreEqual("国【人", all[1].SrcString); test = "我是【中国【人"; all = iwords.FindAll(test); Assert.AreEqual("中国", all[0].SrcString); Assert.AreEqual("国【人", all[1].SrcString); Assert.AreEqual(2, all.Count); var ss = iwords.Replace(test, '*'); Assert.AreEqual("我是【****", ss); test = "我是中国人"; //使用黑名单 iwords.SetBlacklist(bl); iwords.UseBlacklistFilter = true; all = iwords.FindAll(test, 1); Assert.AreEqual("中国", all[0].SrcString); Assert.AreEqual(1, all.Count); }
public BadWordService() { StringSearch = new IllegalWordsSearch(); StringSearch.UseIgnoreCase = true; StringSearch.SetKeywords(censoredWords); }
public void IllegalWordsSearchTest() { string s = "中国|国人|zg人|f**k|all|as|19|http://|ToolGood"; string test = "我是中国人"; IllegalWordsSearch iwords = new IllegalWordsSearch(2); iwords.SetKeywords(s.Split('|')); var b = iwords.ContainsAny(test); Assert.AreEqual(true, b); var f = iwords.FindFirst(test); Assert.AreEqual(true, f.Success); Assert.AreEqual("中国", f.Keyword); Assert.AreEqual(2, f.Start); Assert.AreEqual(3, f.End); var all = iwords.FindAll(test); Assert.AreEqual("中国", all[0].SrcString); Assert.AreEqual("国人", all[1].SrcString); test = "我是中国zg人"; all = iwords.FindAll(test); Assert.AreEqual("中国", all[0].SrcString); Assert.AreEqual("zg人", all[1].SrcString); Assert.AreEqual("国zg人", all[2].SrcString); test = "中间国zg人"; all = iwords.FindAll(test); Assert.AreEqual("zg人", all[0].SrcString); Assert.AreEqual("国zg人", all[1].SrcString); test = "f**k al.l"; all = iwords.FindAll(test); Assert.AreEqual("f**k", all[0].SrcString); Assert.AreEqual("al.l", all[1].SrcString); Assert.AreEqual(2, all.Count); test = "ht@tp://ToolGood.com"; all = iwords.FindAll(test); Assert.AreEqual("ht@tp://", all[0].SrcString); Assert.AreEqual("http://", all[0].Keyword); Assert.AreEqual("toolgood", all[1].Keyword); Assert.AreEqual("ToolGood", all[1].SrcString); Assert.AreEqual(2, all.Count); test = "asssert all"; all = iwords.FindAll(test); Assert.AreEqual("all", all[0].SrcString); Assert.AreEqual(1, all.Count); test = "19w 1919 all"; all = iwords.FindAll(test); Assert.AreEqual("19", all[0].SrcString); Assert.AreEqual("all", all[1].SrcString); test = "我是【中]国【人"; all = iwords.FindAll(test); Assert.AreEqual("中]国", all[0].SrcString); Assert.AreEqual("国【人", all[1].SrcString); test = "我是【中国【人"; all = iwords.FindAll(test); Assert.AreEqual("中国", all[0].SrcString); Assert.AreEqual("国【人", all[1].SrcString); Assert.AreEqual(2, all.Count); var ss = iwords.Replace(test, '*'); Assert.AreEqual("我是【****", ss); }