public void IssuesTest_20() { string text = "A10021003吃饭"; var keywords = new string[] { "1", "A", "2", "0", "吃" }; var iws = new IllegalWordsSearch(); var ss = new StringSearch(); var sse = new StringSearchEx2(); iws.SetKeywords(keywords); iws.UseIgnoreCase = true; iws.UseDBCcaseConverter = true; var iwsFirst = iws.FindFirst(text); Assert.AreEqual("吃", iwsFirst.Keyword); var iwsAll = iws.FindAll(text); Assert.AreEqual(1, iwsAll.Count);// 因为1A20左右都是英文或数字,所以识别失败 ss.SetKeywords(keywords); var ssFirst = ss.FindFirst(text); Assert.AreEqual("A", ssFirst); var ssAll = ss.FindAll(text); Assert.AreEqual(9, ssAll.Count); sse.SetKeywords(keywords); var sseFirst = sse.FindFirst(text); Assert.AreEqual("A", sseFirst); var sseAll = sse.FindAll(text); Assert.AreEqual(9, sseAll.Count); }
public void test3() { string s = "中国|国人|zg人"; string test = "我是中国人"; StringSearchEx2 iwords = new StringSearchEx2(); iwords.SetKeywords(s.Split('|').ToList()); var b = iwords.ContainsAny(test); Assert.AreEqual(true, b); var f = iwords.FindFirst(test); Assert.AreEqual("中国", f); var all = iwords.FindAll(test); Assert.AreEqual("中国", all[0]); Assert.AreEqual("国人", all[1]); Assert.AreEqual(2, all.Count); var str = iwords.Replace(test, '*'); Assert.AreEqual("我是***", str); }
static List <string> ReadBadWord() { List <string> list = new List <string>(); using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) { string key = sw.ReadLine(); while (key != null) { if (key != string.Empty) { tf1.AddKey(key); ff.AddKey(key); list.Add(key); } key = sw.ReadLine(); } } stringSearch.SetKeywords(list); stringSearchEx.SetKeywords(list); stringSearchEx2.SetKeywords(list); stringSearchEx3.SetKeywords(list); wordsSearch.SetKeywords(list); wordsSearchEx.SetKeywords(list); wordsSearchEx2.SetKeywords(list); wordsSearchEx3.SetKeywords(list); //illegalWordsSearch.SetKeywords(list); list = list.OrderBy(q => q).ToList(); var str = string.Join("|", list); str = Regex.Replace(str, @"([\\\.\+\*\-\(\)\[\]\{\}!])", @"\$1"); re = new Regex(str, RegexOptions.IgnoreCase); var str2 = tf1.ToString(); //str2 = Regex.Replace(str2, @"([\.\+\*\-\[\]\{\}!])", @"\$1"); re2 = new Regex(str2); return(list); }
public void test4() { string s = "中国人|中国|国人|zg人|我是中国人|我是中国|是中国人"; string test = "我是中国人"; StringSearchEx2 iwords = new StringSearchEx2(); iwords.SetKeywords(s.Split('|').ToList()); var all = iwords.FindAll(test); Assert.AreEqual(6, all.Count); var str = iwords.Replace(test, '*'); Assert.AreEqual("*****", str); }