Esempio n. 1
0
        public static string[] GetPinyinList(string text, int tone = 0)
        {
            InitPyIndex();
            InitPyWords();

            List <string> list = new List <string>();

            for (int j = 0; j < text.Length; j++)
            {
                list.Add(null);
            }

            var pos    = _search.FindAll(text);
            var pindex = -1;

            foreach (var p in pos)
            {
                if (p.Start > pindex)
                {
                    for (int j = 0; j < p.Keyword.Length; j++)
                    {
                        list[j + p.Start] = _pyShow[_wordPy[_wordPyIndex[p.Index] + j] + tone];
                    }
                    pindex = p.End;
                }
            }
            var i = 0;

            while (i < text.Length)
            {
                if (list[i] == null)
                {
                    var c = text[i];
                    if (c >= 0x3400 && c <= 0x9fd5)
                    {
                        var index = c - 0x3400;
                        var start = _pyIndex[index];
                        var end   = _pyIndex[index + 1];
                        if (end > start)
                        {
                            list[i] = _pyShow[_pyData[start] + tone];
                        }
                    }
                    else if (c >= 0xd840 && c <= 0xd86e && i + 1 < text.Length)
                    {
                        var ct = text[i + 1];
                        if (ct >= 0xdc00 && ct <= 0xdfff)
                        {
                            var index  = _pyIndex2[c - 0xd840][ct - 0xdc00];
                            var index2 = _pyIndex2[c - 0xd840][ct - 0xdc00 + 1];
                            if (index < index2)
                            {
                                i++;
                                list[i] = _pyShow[_pyData2[c - 0xd840][index] + tone];
                            }
                            else
                            {
                                list[i] = text[i].ToString();
                            }
                        }
                        else
                        {
                            list[i] = text[i].ToString();
                        }
                    }
                    else
                    {
                        list[i] = text[i].ToString();
                    }
                }
                i++;
            }
            list.RemoveAll(q => q == null);
            return(list.ToArray());
        }
Esempio n. 2
0
        static void Main(string[] args)
        {
            ReadBadWord();
            var text = File.ReadAllText("Talk.txt");

            //var l = text.Length;
            stringSearchEx.Save("test.ini");
            var tt = stringSearchEx.FindAll(text);

            //word2.Load("test.ini");
            Console.Write("-------------------- SetKeywords Test --------------------\r\n");

            Run(1, "StringSearch.SetKeywords  ", () => {
                List <string> list = new List <string>();
                using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) {
                    string key = sw.ReadLine();
                    while (key != null)
                    {
                        if (key != string.Empty)
                        {
                            list.Add(key);
                        }
                        key = sw.ReadLine();
                    }
                }
                StringSearch s = new StringSearch();
                s.SetKeywords(list);
            });
            Run(1, "StringSearchEx.SetKeywords  ", () => {
                List <string> list = new List <string>();
                using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) {
                    string key = sw.ReadLine();
                    while (key != null)
                    {
                        if (key != string.Empty)
                        {
                            list.Add(key);
                        }
                        key = sw.ReadLine();
                    }
                }
                StringSearchEx s = new StringSearchEx();
                s.SetKeywords(list);
            });
            Run(1, "StringSearchEx.Load  ", () => {
                StringSearchEx s = new StringSearchEx();
                s.Load("test.ini");
            });



            //var ts1 = word.FindAll(text);
            //var ts = word2.FindAll(text);
            Console.Write("-------------------- ToSenseWord Test --------------------\r\n");

            Run("ToSenseWord1  ", () => { WordTest.ToSenseWord1(text); });
            Run("ToSenseWord2  ", () => { WordTest.ToSenseWord2(text); });
            Run("ToSenseWord3  ", () => { WordTest.ToSenseWord3(text); });
            Run("ToSenseWord4  ", () => { WordTest.ToSenseWord4(text); });
            Run("ToSenseWord5  ", () => { WordTest.ToSenseWord5(text); });
            Run("ToSenseWord6  ", () => { WordTest.ToSenseWord6(text); });
            Run("ToSenseWord7  ", () => { WordTest.ToSenseWord7(text); });
            Run("ToSenseWord8  ", () => { WordTest.ToSenseWord8(text); });
            Run("ToSenseWord9  ", () => { WordTest.ToSenseWord9(text); });
            Run("ToSenseWord10  ", () => { WordTest.ToSenseWord10(text); });

            Run("GetDisablePostion1  ", () => { WordTest.GetDisablePostion1(text); });
            Run("GetDisablePostion2  ", () => { WordTest.GetDisablePostion2(text); });
            Run("GetDisablePostion3  ", () => { WordTest.GetDisablePostion3(text); });
            Run("GetDisablePostion4  ", () => { WordTest.GetDisablePostion4(text); });
            Run("GetDisablePostion5  ", () => { WordTest.GetDisablePostion5(text); });
            Run("GetDisablePostion6  ", () => { WordTest.GetDisablePostion6(text); });
            Run("GetDisablePostion7  ", () => { WordTest.GetDisablePostion7(text); });
            Run("GetDisablePostion9  ", () => { WordTest.GetDisablePostion9(text); });
            Run("GetDisablePostion8  ", () => { WordTest.GetDisablePostion8(text); });

            //Console.Write("-------------------- ToSenseIllegalWords --------------------\r\n");

            //Run("ToSenseIllegalWords", () => { WordsHelper.ToSenseIllegalWords(text); });



            Console.Write("-------------------- FindFirst OR ContainsAny --------------------\r\n");
            Run("TrieFilter", () => { tf1.HasBadWord(text); });
            Run("FastFilter", () => { ff.HasBadWord(text); });
            Run("StringSearch(ContainsAny)", () => { stringSearch.ContainsAny(text); });
            Run("StringSearch(FindFirst)", () => { stringSearch.FindFirst(text); });
            Run("StringSearchEx(ContainsAny)", () => { stringSearchEx.ContainsAny(text); });
            Run("StringSearchEx(FindFirst)", () => { stringSearchEx.FindFirst(text); });
            Run("WordsSearch(ContainsAny)", () => { wordsSearch.ContainsAny(text); });
            Run("WordsSearch(FindFirst)", () => { wordsSearch.FindFirst(text); });
            Run("WordsSearchEx(ContainsAny)", () => { wordsSearchEx.ContainsAny(text); });
            Run("WordsSearchEx(FindFirst)", () => { wordsSearchEx.FindFirst(text); });
            Run("IllegalWordsQuickSearch(FindFirst)", () => { illegalWordsQuickSearch.FindFirst(text); });
            Run("IllegalWordsQuickSearch(ContainsAny)", () => { illegalWordsQuickSearch.ContainsAny(text); });

            Run("IllegalWordsSearch(FindFirst)", () => { illegalWordsSearch.FindFirst(text); });
            Run("IllegalWordsSearch(ContainsAny)", () => { illegalWordsSearch.ContainsAny(text); });

            Console.Write("-------------------- Find All --------------------\r\n");
            Run("TrieFilter(FindAll)", () => { tf1.FindAll(text); });
            Run("FastFilter(FindAll)", () => { ff.FindAll(text); });
            Run("StringSearch(FindAll)", () => { stringSearch.FindAll(text); });
            Run("StringSearchEx(FindAll)", () => { stringSearchEx.FindAll(text); });
            Run("WordsSearch(FindAll)", () => { wordsSearch.FindAll(text); });
            Run("WordsSearchEx(FindAll)", () => { wordsSearchEx.FindAll(text); });
            Run("IllegalWordsQuickSearch(FindAll)", () => { illegalWordsQuickSearch.FindAll(text); });
            Run("IllegalWordsSearch(FindAll)", () => { illegalWordsSearch.FindAll(text); });
            Console.Write("-------------------- Replace --------------------\r\n");
            Run("TrieFilter(Replace)", () => { tf1.Replace(text); });
            Run("FastFilter(Replace)", () => { ff.Replace(text); });
            Run("StringSearch(Replace)", () => { stringSearch.Replace(text); });
            Run("StringSearchEx(Replace)", () => { stringSearchEx.Replace(text); });

            Run("WordsSearch(Replace)", () => { wordsSearch.Replace(text); });
            Run("WordsSearchEx(Replace)", () => { wordsSearchEx.Replace(text); });
            Run("IllegalWordsQuickSearch(Replace)", () => { illegalWordsQuickSearch.Replace(text); });
            Run("IllegalWordsSearch(Replace)", () => { illegalWordsSearch.Replace(text); });

            Console.Write("-------------------- Regex --------------------\r\n");
            Run("Regex.IsMatch", () => { re.IsMatch(text); });
            Run("Regex.Match", () => { re.Match(text); });
            Run("Regex.Matches", () => { re.Matches(text); });

            Console.Write("-------------------- Regex used Trie tree --------------------\r\n");
            Run("Regex.IsMatch", () => { re2.IsMatch(text); });
            Run("Regex.Match", () => { re2.Match(text); });
            Run("Regex.Matches", () => { re2.Matches(text); });


            Console.ReadKey();
        }
Esempio n. 3
0
        static void Main(string[] args)
        {
            ReadBadWord();
            var text = File.ReadAllText("Talk.txt");

            Console.Write("-------------------- ToSenseWord Test --------------------\r\n");

            Run("ToSenseWord1  ", () => { WordTest.ToSenseWord1(text); });
            Run("ToSenseWord2  ", () => { WordTest.ToSenseWord2(text); });
            Run("ToSenseWord3  ", () => { WordTest.ToSenseWord3(text); });
            Run("ToSenseWord4  ", () => { WordTest.ToSenseWord4(text); });
            Run("ToSenseWord5  ", () => { WordTest.ToSenseWord5(text); });
            Run("ToSenseWord6  ", () => { WordTest.ToSenseWord6(text); });
            Run("ToSenseWord7  ", () => { WordTest.ToSenseWord7(text); });
            Run("ToSenseWord8  ", () => { WordTest.ToSenseWord8(text); });
            Run("ToSenseWord9  ", () => { WordTest.ToSenseWord9(text); });
            Run("ToSenseWord10  ", () => { WordTest.ToSenseWord10(text); });

            //Run("GetDisablePostion1  ", () => { WordTest.GetDisablePostion1(text); });
            //Run("GetDisablePostion2  ", () => { WordTest.GetDisablePostion2(text); });
            //Run("GetDisablePostion3  ", () => { WordTest.GetDisablePostion3(text); });
            //Run("GetDisablePostion4  ", () => { WordTest.GetDisablePostion4(text); });
            //Run("GetDisablePostion5  ", () => { WordTest.GetDisablePostion5(text); });
            //Run("GetDisablePostion6  ", () => { WordTest.GetDisablePostion6(text); });
            //Run("GetDisablePostion7  ", () => { WordTest.GetDisablePostion7(text); });
            //Run("GetDisablePostion9  ", () => { WordTest.GetDisablePostion9(text); });
            //Run("GetDisablePostion8  ", () => { WordTest.GetDisablePostion8(text); });

            Console.Write("-------------------- ToSenseIllegalWords --------------------\r\n");

            Run("ToSenseIllegalWords", () => { WordsHelper.ToSenseIllegalWords(text); });



            Console.Write("-------------------- FindFirst OR ContainsAny --------------------\r\n");
            Run("TrieFilter", () => { tf1.HasBadWord(text); });
            Run("FastFilter", () => { ff.HasBadWord(text); });
            Run("StringSearch(ContainsAny)", () => { word.ContainsAny(text); });
            Run("StringSearch(FindFirst)", () => { word.FindFirst(text); });
            Run("WordsSearch(ContainsAny)", () => { search.ContainsAny(text); });
            Run("WordsSearch(FindFirst)", () => { search.FindFirst(text); });
            Run("IllegalWordsQuickSearch(FindFirst)", () => { iword1.FindFirst(text); });
            Run("IllegalWordsQuickSearch(ContainsAny)", () => { iword1.ContainsAny(text); });

            Run("IllegalWordsSearch(FindFirst)", () => { iword2.FindFirst(text); });
            Run("IllegalWordsSearch(ContainsAny)", () => { iword2.ContainsAny(text); });

            Console.Write("-------------------- Find All --------------------\r\n");
            Run("TrieFilter(FindAll)", () => { tf1.FindAll(text); });
            Run("FastFilter(FindAll)", () => { ff.FindAll(text); });
            Run("StringSearch(FindAll)", () => { word.FindAll(text); });
            Run("WordsSearch(FindAll)", () => { search.FindAll(text); });
            Run("IllegalWordsQuickSearch(FindAll)", () => { iword1.FindAll(text); });
            Run("IllegalWordsSearch(FindAll)", () => { iword2.FindAll(text); });
            Console.Write("-------------------- Replace --------------------\r\n");
            Run("TrieFilter(Replace)", () => { tf1.Replace(text); });
            Run("FastFilter(Replace)", () => { ff.Replace(text); });
            Run("StringSearch(Replace)", () => { word.Replace(text); });
            Run("WordsSearch(Replace)", () => { search.Replace(text); });
            Run("IllegalWordsQuickSearch(Replace)", () => { iword1.Replace(text); });
            Run("IllegalWordsSearch(Replace)", () => { iword2.Replace(text); });

            Console.Write("-------------------- Regex --------------------\r\n");
            Run("Regex.IsMatch", () => { re.IsMatch(text); });
            Run("Regex.Match", () => { re.Match(text); });
            Run("Regex.Matches", () => { re.Matches(text); });

            Console.Write("-------------------- Regex used Trie tree --------------------\r\n");
            Run("Regex.IsMatch", () => { re2.IsMatch(text); });
            Run("Regex.Match", () => { re2.Match(text); });
            Run("Regex.Matches", () => { re2.Matches(text); });


            Console.ReadKey();
        }
Esempio n. 4
0
        static void Main(string[] args)
        {
            ReadBadWord();
            var text = File.ReadAllText("Talk.txt");


            Console.Write("-------------------- FindFirst OR ContainsAny 100000次 --------------------\r\n");
            Run("TrieFilter", () => { tf1.HasBadWord(text); });
            Run("FastFilter", () => { ff.HasBadWord(text); });
            Run("StringSearch(ContainsAny)", () => { stringSearch.ContainsAny(text); });
            Run("StringSearchEx(ContainsAny)--- WordsSearchEx(ContainsAny)代码相同", () => { stringSearchEx.ContainsAny(text); });
            Run("StringSearchEx2(ContainsAny)--- WordsSearchEx2(ContainsAny)代码相同", () => { stringSearchEx2.ContainsAny(text); });
            Run("StringSearchEx3(ContainsAny)--- WordsSearchEx3(ContainsAny)代码相同", () => { stringSearchEx3.ContainsAny(text); });
            Run("IllegalWordsSearch(ContainsAny)", () => { illegalWordsSearch.ContainsAny(text); });

            Run("StringSearch(FindFirst)", () => { stringSearch.FindFirst(text); });
            Run("StringSearchEx(FindFirst)", () => { stringSearchEx.FindFirst(text); });
            Run("StringSearchEx2(FindFirst)", () => { stringSearchEx2.FindFirst(text); });
            Run("StringSearchEx3(FindFirst)", () => { stringSearchEx3.FindFirst(text); });
            Run("WordsSearch(FindFirst)", () => { wordsSearch.FindFirst(text); });
            Run("WordsSearchEx(FindFirst)", () => { wordsSearchEx.FindFirst(text); });
            Run("WordsSearchEx2(FindFirst)", () => { wordsSearchEx2.FindFirst(text); });
            Run("WordsSearchEx3(FindFirst)", () => { wordsSearchEx3.FindFirst(text); });
            Run("IllegalWordsSearch(FindFirst)", () => { illegalWordsSearch.FindFirst(text); });


            Console.Write("-------------------- Find All 100000次 --------------------\r\n");
            Run("TrieFilter(FindAll)", () => { tf1.FindAll(text); });
            Run("FastFilter(FindAll)", () => { ff.FindAll(text); });
            Run("StringSearch(FindAll)", () => { stringSearch.FindAll(text); });
            Run("StringSearchEx(FindAll)", () => { stringSearchEx.FindAll(text); });
            Run("StringSearchEx2(FindAll)", () => { stringSearchEx2.FindAll(text); });
            Run("StringSearchEx3(FindAll)", () => { stringSearchEx3.FindAll(text); });

            Run("WordsSearch(FindAll)", () => { wordsSearch.FindAll(text); });
            Run("WordsSearchEx(FindAll)", () => { wordsSearchEx.FindAll(text); });
            Run("WordsSearchEx2(FindAll)", () => { wordsSearchEx2.FindAll(text); });
            Run("WordsSearchEx3(FindAll)", () => { wordsSearchEx3.FindAll(text); });
            Run("IllegalWordsSearch(FindAll)", () => { illegalWordsSearch.FindAll(text); });

            Console.Write("-------------------- Replace  100000次 --------------------\r\n");
            Run("TrieFilter(Replace)", () => { tf1.Replace(text); });
            Run("FastFilter(Replace)", () => { ff.Replace(text); });
            Run("StringSearch(Replace)", () => { stringSearch.Replace(text); });
            Run("WordsSearch(Replace)", () => { wordsSearch.Replace(text); });
            Run("StringSearchEx(Replace)--- WordsSearchEx(Replace)代码相同", () => { stringSearchEx.Replace(text); });
            Run("StringSearchEx2(Replace)--- WordsSearchEx2(Replace)代码相同", () => { stringSearchEx2.Replace(text); });
            Run("StringSearchEx3(Replace)--- WordsSearchEx3(Replace)代码相同", () => { stringSearchEx3.Replace(text); });
            Run("IllegalWordsSearch(Replace)", () => { illegalWordsSearch.Replace(text); });

            Console.Write("-------------------- Regex  100次 --------------------\r\n");
            Run(100, "Regex.IsMatch", () => { re.IsMatch(text); });
            Run(100, "Regex.Match", () => { re.Match(text); });
            Run(100, "Regex.Matches", () => { re.Matches(text); });

            Console.Write("-------------------- Regex used Trie tree  100次 --------------------\r\n");
            Run(100, "Regex.IsMatch", () => { re2.IsMatch(text); });
            Run(100, "Regex.Match", () => { re2.Match(text); });
            Run(100, "Regex.Matches", () => { re2.Matches(text); });

            Console.ReadKey();
        }
Esempio n. 5
0
        static void Main(string[] args)
        {
            // 预处理
            // 第一步 处理搜狗词库
            if (File.Exists("scel_1.txt") == false)
            {
                var scel_1 = GetWords();
                File.WriteAllText("scel_1.txt", string.Join("\n", scel_1));
                scel_1.Clear();
            }
            // 第二步 精简词库
            {
                var txt   = File.ReadAllText("scel_1.txt");
                var lines = txt.Split('\n');
                Dictionary <string, string> dict = new Dictionary <string, string>();
                foreach (var item in lines)
                {
                    var sp = item.Split(' ');
                    dict[sp[0]] = sp[1];
                }
                List <string> keys = dict.Select(q => q.Key).ToList();

                WordsSearch wordsSearch;
                for (int i = 3; i < 8; i++)
                {
                    var keywords = keys.Where(q => q.Length <= i).ToList();
                    wordsSearch = new WordsSearch();
                    wordsSearch.SetKeywords(keywords);

                    for (int j = keys.Count - 1; j >= 0; j--)
                    {
                        var key = keys[j];
                        if (key.Length <= i)
                        {
                            continue;
                        }

                        var all = wordsSearch.FindAll(key);
                        if (all.Count > 0)
                        {
                            //进行拼音测试,相同则删除
                        }
                    }
                }

                //File.WriteAllText("scel_2.txt", string.Join("\n", scel_1));
            }

            // 第三步 获取词的所有拼音

            // 第四步 获取网上的拼音
            if (File.Exists("pinyin_1.txt") == false)
            {
                var pinyin_1 = GetPinYin();
                File.WriteAllText("pinyin_1.txt", string.Join("\n", pinyin_1));
                pinyin_1.Clear();
            }

            // 第五步 分离 单字拼音 和 词组拼音
            if (File.Exists("pinyin_2_one.txt") == false)
            {
                var           txt   = File.ReadAllText("pinyin_1.txt");
                var           lines = txt.Split('\n');
                List <string> ones  = new List <string>();
                List <string> mores = new List <string>();
                foreach (var line in lines)
                {
                    var sp = line.Split(',');
                    if (GetLength(sp[0]) == 1)
                    {
                        ones.Add(line);
                    }
                    else
                    {
                        mores.Add(line);
                    }
                }
                File.WriteAllText("pinyin_2_one.txt", string.Join("\n", ones));
                File.WriteAllText("pinyin_2_more.txt", string.Join("\n", mores));
                ones.Clear();
                mores.Clear();
            }
            // 第六步 简单 合并 单字拼音, 防止常用拼音被覆盖
            if (File.Exists("pinyin_3_one.txt") == false)
            {
                var txt   = File.ReadAllText("pinyin_2_one.txt");
                var lines = txt.Split('\n').ToList();
                for (int i = lines.Count - 1; i >= 1; i--)
                {
                    if (lines[i].StartsWith(lines[i - 1]))
                    {
                        lines.RemoveAt(i);
                    }
                }
                File.WriteAllText("pinyin_3_one.txt", string.Join("\n", lines));
            }
            // 第七步 检查 拼音数 与 词组长度不一样的
            if (File.Exists("pinyin_4_ok.txt") == false)
            {
                var           txt    = File.ReadAllText("pinyin_2_more.txt");
                var           lines  = txt.Split('\n');
                List <string> oks    = new List <string>();
                List <string> errors = new List <string>();
                foreach (var line in lines)
                {
                    var sp = line.Split(',');
                    if (GetLength(sp[0]) == sp.Length - 1)
                    {
                        oks.Add(line);
                    }
                    else
                    {
                        errors.Add(line);
                    }
                }
                File.WriteAllText("pinyin_4_ok.txt", string.Join("\n", oks));
                File.WriteAllText("pinyin_4_error.txt", string.Join("\n", errors));
            }
        }