Example #1
0
        public void IssuesTest_20()
        {
            string text     = "A10021003吃饭";
            var    keywords = new string[] { "1", "A", "2", "0", "吃" };
            var    iws      = new IllegalWordsSearch();
            var    ss       = new StringSearch();
            var    sse      = new StringSearchEx2();

            iws.SetKeywords(keywords);
            iws.UseIgnoreCase       = true;
            iws.UseDBCcaseConverter = true;
            var iwsFirst = iws.FindFirst(text);

            Assert.AreEqual("吃", iwsFirst.Keyword);
            var iwsAll = iws.FindAll(text);

            Assert.AreEqual(1, iwsAll.Count);// 因为1A20左右都是英文或数字,所以识别失败

            ss.SetKeywords(keywords);
            var ssFirst = ss.FindFirst(text);

            Assert.AreEqual("A", ssFirst);
            var ssAll = ss.FindAll(text);

            Assert.AreEqual(9, ssAll.Count);

            sse.SetKeywords(keywords);
            var sseFirst = sse.FindFirst(text);

            Assert.AreEqual("A", sseFirst);
            var sseAll = sse.FindAll(text);

            Assert.AreEqual(9, sseAll.Count);
        }
Example #2
0
        public void test3()
        {
            string s    = "中国|国人|zg人";
            string test = "我是中国人";

            StringSearchEx2 iwords = new StringSearchEx2();

            iwords.SetKeywords(s.Split('|').ToList());

            var b = iwords.ContainsAny(test);

            Assert.AreEqual(true, b);


            var f = iwords.FindFirst(test);

            Assert.AreEqual("中国", f);



            var all = iwords.FindAll(test);

            Assert.AreEqual("中国", all[0]);
            Assert.AreEqual("国人", all[1]);
            Assert.AreEqual(2, all.Count);

            var str = iwords.Replace(test, '*');

            Assert.AreEqual("我是***", str);
        }
Example #3
0
        static List <string> ReadBadWord()
        {
            List <string> list = new List <string>();

            using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) {
                string key = sw.ReadLine();
                while (key != null)
                {
                    if (key != string.Empty)
                    {
                        tf1.AddKey(key);

                        ff.AddKey(key);

                        list.Add(key);
                    }
                    key = sw.ReadLine();
                }
            }
            stringSearch.SetKeywords(list);
            stringSearchEx.SetKeywords(list);
            stringSearchEx2.SetKeywords(list);
            stringSearchEx3.SetKeywords(list);
            wordsSearch.SetKeywords(list);
            wordsSearchEx.SetKeywords(list);
            wordsSearchEx2.SetKeywords(list);
            wordsSearchEx3.SetKeywords(list);
            //illegalWordsSearch.SetKeywords(list);

            list = list.OrderBy(q => q).ToList();
            var str = string.Join("|", list);

            str = Regex.Replace(str, @"([\\\.\+\*\-\(\)\[\]\{\}!])", @"\$1");

            re = new Regex(str, RegexOptions.IgnoreCase);


            var str2 = tf1.ToString();

            //str2 = Regex.Replace(str2, @"([\.\+\*\-\[\]\{\}!])", @"\$1");
            re2 = new Regex(str2);

            return(list);
        }
Example #4
0
        public void test4()
        {
            string s    = "中国人|中国|国人|zg人|我是中国人|我是中国|是中国人";
            string test = "我是中国人";

            StringSearchEx2 iwords = new StringSearchEx2();

            iwords.SetKeywords(s.Split('|').ToList());



            var all = iwords.FindAll(test);

            Assert.AreEqual(6, all.Count);

            var str = iwords.Replace(test, '*');

            Assert.AreEqual("*****", str);
        }