Ejemplo n.º 1
0
        public void test2()
        {
            string s    = "中国|国人|zg人";
            string test = "我是中国人";

            WordsSearchEx wordsSearch = new WordsSearchEx();

            wordsSearch.SetKeywords(s.Split('|').ToList());

            var b = wordsSearch.ContainsAny(test);

            Assert.AreEqual(true, b);


            var f = wordsSearch.FindFirst(test);

            Assert.AreEqual("中国", f.Keyword);

            var alls = wordsSearch.FindAll(test);

            Assert.AreEqual("中国", alls[0].Keyword);
            Assert.AreEqual(2, alls[0].Start);
            Assert.AreEqual(3, alls[0].End);
            Assert.AreEqual(0, alls[0].Index);//返回索引Index,默认从0开始
            Assert.AreEqual("国人", alls[1].Keyword);
            Assert.AreEqual(2, alls.Count);

            var t = wordsSearch.Replace(test, '*');

            Assert.AreEqual("我是***", t);
        }
Ejemplo n.º 2
0
        private static void WordsSearchExSearch(List <string> list, string txt)
        {
            WordsSearchEx wordsSearch = new WordsSearchEx();

            wordsSearch.SetKeywords(list);
            wordsSearch.Save("WordsSearchEx.dat");

            Stopwatch watch = new Stopwatch();

            watch.Start();
            for (int i = 0; i < 100000; i++)
            {
                wordsSearch.FindAll(txt);
            }
            watch.Stop();
            Console.WriteLine("WordsSearchEx: " + watch.ElapsedMilliseconds.ToString("N0") + "ms");
        }
Ejemplo n.º 3
0
        static void Main(string[] args)
        {
            ReadBadWord();
            var text = File.ReadAllText("Talk.txt");

            //var l = text.Length;
            //illegalWordsSearch.Save("test.ini");
            //var tt = illegalWordsSearch.FindAll(text);

            //word2.Load("test.ini");
            //Console.Write("-------------------- SetKeywords Test --------------------\r\n");

            //Run(1, "StringSearch.SetKeywords  ", () => {
            //    List<string> list = new List<string>();
            //    using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) {
            //        string key = sw.ReadLine();
            //        while (key != null) {
            //            if (key != string.Empty) {
            //                list.Add(key);
            //            }
            //            key = sw.ReadLine();
            //        }
            //    }
            //    StringSearch s = new StringSearch();
            //    s.SetKeywords(list);
            //});
            //Run(1, "StringSearchEx.SetKeywords  ", () => {
            //    List<string> list = new List<string>();
            //    using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) {
            //        string key = sw.ReadLine();
            //        while (key != null) {
            //            if (key != string.Empty) {
            //                list.Add(key);
            //            }
            //            key = sw.ReadLine();
            //        }
            //    }
            //    StringSearchEx s = new StringSearchEx();
            //    s.SetKeywords(list);
            //});
            //Run(1, "StringSearchEx.Load  ", () => {
            //    StringSearchEx s = new StringSearchEx();
            //    s.Load("test.ini");
            //});



            //var ts1 = word.FindAll(text);
            //var ts = word2.FindAll(text);
            //Console.Write("-------------------- ToSenseWord Test --------------------\r\n");

            //Run("ToSenseWord1  ", () => { WordTest.ToSenseWord1(text); });
            //Run("ToSenseWord2  ", () => { WordTest.ToSenseWord2(text); });
            //Run("ToSenseWord3  ", () => { WordTest.ToSenseWord3(text); });
            //Run("ToSenseWord4  ", () => { WordTest.ToSenseWord4(text); });
            //Run("ToSenseWord5  ", () => { WordTest.ToSenseWord5(text); });
            //Run("ToSenseWord6  ", () => { WordTest.ToSenseWord6(text); });
            //Run("ToSenseWord7  ", () => { WordTest.ToSenseWord7(text); });
            //Run("ToSenseWord8  ", () => { WordTest.ToSenseWord8(text); });
            //Run("ToSenseWord9  ", () => { WordTest.ToSenseWord9(text); });
            //Run("ToSenseWord10  ", () => { WordTest.ToSenseWord10(text); });

            //Run("GetDisablePostion1  ", () => { WordTest.GetDisablePostion1(text); });
            //Run("GetDisablePostion2  ", () => { WordTest.GetDisablePostion2(text); });
            //Run("GetDisablePostion3  ", () => { WordTest.GetDisablePostion3(text); });
            //Run("GetDisablePostion4  ", () => { WordTest.GetDisablePostion4(text); });
            //Run("GetDisablePostion5  ", () => { WordTest.GetDisablePostion5(text); });
            //Run("GetDisablePostion6  ", () => { WordTest.GetDisablePostion6(text); });
            //Run("GetDisablePostion7  ", () => { WordTest.GetDisablePostion7(text); });
            //Run("GetDisablePostion9  ", () => { WordTest.GetDisablePostion9(text); });
            //Run("GetDisablePostion8  ", () => { WordTest.GetDisablePostion8(text); });

            //Console.Write("-------------------- ToSenseIllegalWords --------------------\r\n");

            //Run("ToSenseIllegalWords", () => { WordsHelper.ToSenseIllegalWords(text); });



            Console.Write("-------------------- FindFirst OR ContainsAny --------------------\r\n");
            Run("TrieFilter", () => { tf1.HasBadWord(text); });
            Run("FastFilter", () => { ff.HasBadWord(text); });
            Run("StringSearch(ContainsAny)", () => { stringSearch.ContainsAny(text); });
            Run("StringSearch(FindFirst)", () => { stringSearch.FindFirst(text); });
            Run("StringSearchEx(ContainsAny)", () => { stringSearchEx.ContainsAny(text); });
            Run("StringSearchEx(FindFirst)", () => { stringSearchEx.FindFirst(text); });
            Run("WordsSearch(ContainsAny)", () => { wordsSearch.ContainsAny(text); });
            Run("WordsSearch(FindFirst)", () => { wordsSearch.FindFirst(text); });
            Run("WordsSearchEx(ContainsAny)", () => { wordsSearchEx.ContainsAny(text); });
            Run("WordsSearchEx(FindFirst)", () => { wordsSearchEx.FindFirst(text); });

            Run("IllegalWordsSearch(FindFirst)", () => { illegalWordsSearch.FindFirst(text); });
            Run("IllegalWordsSearch(ContainsAny)", () => { illegalWordsSearch.ContainsAny(text); });

            Console.Write("-------------------- Find All --------------------\r\n");
            Run("TrieFilter(FindAll)", () => { tf1.FindAll(text); });
            Run("FastFilter(FindAll)", () => { ff.FindAll(text); });
            Run("StringSearch(FindAll)", () => { stringSearch.FindAll(text); });
            Run("StringSearchEx(FindAll)", () => { stringSearchEx.FindAll(text); });
            Run("WordsSearch(FindAll)", () => { wordsSearch.FindAll(text); });
            Run("WordsSearchEx(FindAll)", () => { wordsSearchEx.FindAll(text); });
            Run("IllegalWordsSearch(FindAll)", () => { illegalWordsSearch.FindAll(text); });
            Console.Write("-------------------- Replace --------------------\r\n");
            Run("TrieFilter(Replace)", () => { tf1.Replace(text); });
            Run("FastFilter(Replace)", () => { ff.Replace(text); });
            Run("StringSearch(Replace)", () => { stringSearch.Replace(text); });
            Run("StringSearchEx(Replace)", () => { stringSearchEx.Replace(text); });

            Run("WordsSearch(Replace)", () => { wordsSearch.Replace(text); });
            Run("WordsSearchEx(Replace)", () => { wordsSearchEx.Replace(text); });
            Run("IllegalWordsSearch(Replace)", () => { illegalWordsSearch.Replace(text); });

            Console.Write("-------------------- Regex --------------------\r\n");
            Run("Regex.IsMatch", () => { re.IsMatch(text); });
            Run("Regex.Match", () => { re.Match(text); });
            Run("Regex.Matches", () => { re.Matches(text); });

            Console.Write("-------------------- Regex used Trie tree --------------------\r\n");
            Run("Regex.IsMatch", () => { re2.IsMatch(text); });
            Run("Regex.Match", () => { re2.Match(text); });
            Run("Regex.Matches", () => { re2.Matches(text); });


            Console.ReadKey();
        }
Ejemplo n.º 4
0
        static void Main(string[] args)
        {
            ReadBadWord();
            var text = File.ReadAllText("Talk.txt");


            Console.Write("-------------------- FindFirst OR ContainsAny 100000次 --------------------\r\n");
            Run("TrieFilter", () => { tf1.HasBadWord(text); });
            Run("FastFilter", () => { ff.HasBadWord(text); });
            Run("StringSearch(ContainsAny)", () => { stringSearch.ContainsAny(text); });
            Run("StringSearchEx(ContainsAny)--- WordsSearchEx(ContainsAny)代码相同", () => { stringSearchEx.ContainsAny(text); });
            Run("StringSearchEx2(ContainsAny)--- WordsSearchEx2(ContainsAny)代码相同", () => { stringSearchEx2.ContainsAny(text); });
            Run("StringSearchEx3(ContainsAny)--- WordsSearchEx3(ContainsAny)代码相同", () => { stringSearchEx3.ContainsAny(text); });
            //Run("IllegalWordsSearch(ContainsAny)", () => { illegalWordsSearch.ContainsAny(text); });

            Run("StringSearch(FindFirst)", () => { stringSearch.FindFirst(text); });
            Run("StringSearchEx(FindFirst)", () => { stringSearchEx.FindFirst(text); });
            Run("StringSearchEx2(FindFirst)", () => { stringSearchEx2.FindFirst(text); });
            Run("StringSearchEx3(FindFirst)", () => { stringSearchEx3.FindFirst(text); });
            Run("WordsSearch(FindFirst)", () => { wordsSearch.FindFirst(text); });
            Run("WordsSearchEx(FindFirst)", () => { wordsSearchEx.FindFirst(text); });
            Run("WordsSearchEx2(FindFirst)", () => { wordsSearchEx2.FindFirst(text); });
            Run("WordsSearchEx3(FindFirst)", () => { wordsSearchEx3.FindFirst(text); });
            //Run("IllegalWordsSearch(FindFirst)", () => { illegalWordsSearch.FindFirst(text); });


            Console.Write("-------------------- Find All 100000次 --------------------\r\n");
            Run("TrieFilter(FindAll)", () => { tf1.FindAll(text); });
            Run("FastFilter(FindAll)", () => { ff.FindAll(text); });
            Run("StringSearch(FindAll)", () => { stringSearch.FindAll(text); });
            Run("StringSearchEx(FindAll)", () => { stringSearchEx.FindAll(text); });
            Run("StringSearchEx2(FindAll)", () => { stringSearchEx2.FindAll(text); });
            Run("StringSearchEx3(FindAll)", () => { stringSearchEx3.FindAll(text); });

            Run("WordsSearch(FindAll)", () => { wordsSearch.FindAll(text); });
            Run("WordsSearchEx(FindAll)", () => { wordsSearchEx.FindAll(text); });
            Run("WordsSearchEx2(FindAll)", () => { wordsSearchEx2.FindAll(text); });
            Run("WordsSearchEx3(FindAll)", () => { wordsSearchEx3.FindAll(text); });
            //Run("IllegalWordsSearch(FindAll)", () => { illegalWordsSearch.FindAll(text); });

            Console.Write("-------------------- Replace  100000次 --------------------\r\n");
            Run("TrieFilter(Replace)", () => { tf1.Replace(text); });
            Run("FastFilter(Replace)", () => { ff.Replace(text); });
            Run("StringSearch(Replace)", () => { stringSearch.Replace(text); });
            Run("WordsSearch(Replace)", () => { wordsSearch.Replace(text); });
            Run("StringSearchEx(Replace)--- WordsSearchEx(Replace)代码相同", () => { stringSearchEx.Replace(text); });
            Run("StringSearchEx2(Replace)--- WordsSearchEx2(Replace)代码相同", () => { stringSearchEx2.Replace(text); });
            Run("StringSearchEx3(Replace)--- WordsSearchEx3(Replace)代码相同", () => { stringSearchEx3.Replace(text); });
            //Run("IllegalWordsSearch(Replace)", () => { illegalWordsSearch.Replace(text); });

            Console.Write("-------------------- Regex  100次 --------------------\r\n");
            Run(100, "Regex.IsMatch", () => { re.IsMatch(text); });
            Run(100, "Regex.Match", () => { re.Match(text); });
            Run(100, "Regex.Matches", () => { re.Matches(text); });

            Console.Write("-------------------- Regex used Trie tree  100次 --------------------\r\n");
            Run(100, "Regex.IsMatch", () => { re2.IsMatch(text); });
            Run(100, "Regex.Match", () => { re2.Match(text); });
            Run(100, "Regex.Matches", () => { re2.Matches(text); });

            Console.ReadKey();
        }
Ejemplo n.º 5
0
        public static string[] GetPinyinList(string text, int tone = 0)
        {
            InitPyIndex();
            InitPyWords();

            List <string> list = new List <string>();

            for (int j = 0; j < text.Length; j++)
            {
                list.Add(null);
            }

            var pos    = _search.FindAll(text);
            var pindex = -1;

            foreach (var p in pos)
            {
                if (p.Start > pindex)
                {
                    for (int j = 0; j < p.Keyword.Length; j++)
                    {
                        list[j + p.Start] = _pyShow[_wordPy[_wordPyIndex[p.Index] + j] + tone];
                    }
                    pindex = p.End;
                }
            }
            var i = 0;

            while (i < text.Length)
            {
                if (list[i] == null)
                {
                    var c = text[i];
                    if (c >= 0x3400 && c <= 0x9fd5)
                    {
                        var index = c - 0x3400;
                        var start = _pyIndex[index];
                        var end   = _pyIndex[index + 1];
                        if (end > start)
                        {
                            list[i] = _pyShow[_pyData[start] + tone];
                        }
                    }
                    else if (c >= 0xd840 && c <= 0xd86e && i + 1 < text.Length)
                    {
                        var ct = text[i + 1];
                        if (ct >= 0xdc00 && ct <= 0xdfff)
                        {
                            var index  = _pyIndex2[c - 0xd840][ct - 0xdc00];
                            var index2 = _pyIndex2[c - 0xd840][ct - 0xdc00 + 1];
                            if (index < index2)
                            {
                                i++;
                                list[i] = _pyShow[_pyData2[c - 0xd840][index] + tone];
                            }
                            else
                            {
                                list[i] = text[i].ToString();
                            }
                        }
                        else
                        {
                            list[i] = text[i].ToString();
                        }
                    }
                    else
                    {
                        list[i] = text[i].ToString();
                    }
                }
                i++;
            }
            list.RemoveAll(q => q == null);
            return(list.ToArray());
        }