public void test2() { string s = "中国|国人|zg人"; string test = "我是中国人"; WordsSearchEx wordsSearch = new WordsSearchEx(); wordsSearch.SetKeywords(s.Split('|').ToList()); var b = wordsSearch.ContainsAny(test); Assert.AreEqual(true, b); var f = wordsSearch.FindFirst(test); Assert.AreEqual("中国", f.Keyword); var alls = wordsSearch.FindAll(test); Assert.AreEqual("中国", alls[0].Keyword); Assert.AreEqual(2, alls[0].Start); Assert.AreEqual(3, alls[0].End); Assert.AreEqual(0, alls[0].Index);//返回索引Index,默认从0开始 Assert.AreEqual("国人", alls[1].Keyword); Assert.AreEqual(2, alls.Count); var t = wordsSearch.Replace(test, '*'); Assert.AreEqual("我是***", t); }
private static void WordsSearchExSearch(List <string> list, string txt) { WordsSearchEx wordsSearch = new WordsSearchEx(); wordsSearch.SetKeywords(list); wordsSearch.Save("WordsSearchEx.dat"); Stopwatch watch = new Stopwatch(); watch.Start(); for (int i = 0; i < 100000; i++) { wordsSearch.FindAll(txt); } watch.Stop(); Console.WriteLine("WordsSearchEx: " + watch.ElapsedMilliseconds.ToString("N0") + "ms"); }
static void Main(string[] args) { ReadBadWord(); var text = File.ReadAllText("Talk.txt"); //var l = text.Length; //illegalWordsSearch.Save("test.ini"); //var tt = illegalWordsSearch.FindAll(text); //word2.Load("test.ini"); //Console.Write("-------------------- SetKeywords Test --------------------\r\n"); //Run(1, "StringSearch.SetKeywords ", () => { // List<string> list = new List<string>(); // using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) { // string key = sw.ReadLine(); // while (key != null) { // if (key != string.Empty) { // list.Add(key); // } // key = sw.ReadLine(); // } // } // StringSearch s = new StringSearch(); // s.SetKeywords(list); //}); //Run(1, "StringSearchEx.SetKeywords ", () => { // List<string> list = new List<string>(); // using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) { // string key = sw.ReadLine(); // while (key != null) { // if (key != string.Empty) { // list.Add(key); // } // key = sw.ReadLine(); // } // } // StringSearchEx s = new StringSearchEx(); // s.SetKeywords(list); //}); //Run(1, "StringSearchEx.Load ", () => { // StringSearchEx s = new StringSearchEx(); // s.Load("test.ini"); //}); //var ts1 = word.FindAll(text); //var ts = word2.FindAll(text); //Console.Write("-------------------- ToSenseWord Test --------------------\r\n"); //Run("ToSenseWord1 ", () => { WordTest.ToSenseWord1(text); }); //Run("ToSenseWord2 ", () => { WordTest.ToSenseWord2(text); }); //Run("ToSenseWord3 ", () => { WordTest.ToSenseWord3(text); }); //Run("ToSenseWord4 ", () => { WordTest.ToSenseWord4(text); }); //Run("ToSenseWord5 ", () => { WordTest.ToSenseWord5(text); }); //Run("ToSenseWord6 ", () => { WordTest.ToSenseWord6(text); }); //Run("ToSenseWord7 ", () => { WordTest.ToSenseWord7(text); }); //Run("ToSenseWord8 ", () => { WordTest.ToSenseWord8(text); }); //Run("ToSenseWord9 ", () => { WordTest.ToSenseWord9(text); }); //Run("ToSenseWord10 ", () => { WordTest.ToSenseWord10(text); }); //Run("GetDisablePostion1 ", () => { WordTest.GetDisablePostion1(text); }); //Run("GetDisablePostion2 ", () => { WordTest.GetDisablePostion2(text); }); //Run("GetDisablePostion3 ", () => { WordTest.GetDisablePostion3(text); }); //Run("GetDisablePostion4 ", () => { WordTest.GetDisablePostion4(text); }); //Run("GetDisablePostion5 ", () => { WordTest.GetDisablePostion5(text); }); //Run("GetDisablePostion6 ", () => { WordTest.GetDisablePostion6(text); }); //Run("GetDisablePostion7 ", () => { WordTest.GetDisablePostion7(text); }); //Run("GetDisablePostion9 ", () => { WordTest.GetDisablePostion9(text); }); //Run("GetDisablePostion8 ", () => { WordTest.GetDisablePostion8(text); }); //Console.Write("-------------------- ToSenseIllegalWords --------------------\r\n"); //Run("ToSenseIllegalWords", () => { WordsHelper.ToSenseIllegalWords(text); }); Console.Write("-------------------- FindFirst OR ContainsAny --------------------\r\n"); Run("TrieFilter", () => { tf1.HasBadWord(text); }); Run("FastFilter", () => { ff.HasBadWord(text); }); Run("StringSearch(ContainsAny)", () => { stringSearch.ContainsAny(text); }); Run("StringSearch(FindFirst)", () => { stringSearch.FindFirst(text); }); Run("StringSearchEx(ContainsAny)", () => { stringSearchEx.ContainsAny(text); }); Run("StringSearchEx(FindFirst)", () => { stringSearchEx.FindFirst(text); }); Run("WordsSearch(ContainsAny)", () => { wordsSearch.ContainsAny(text); }); Run("WordsSearch(FindFirst)", () => { wordsSearch.FindFirst(text); }); Run("WordsSearchEx(ContainsAny)", () => { wordsSearchEx.ContainsAny(text); }); Run("WordsSearchEx(FindFirst)", () => { wordsSearchEx.FindFirst(text); }); Run("IllegalWordsSearch(FindFirst)", () => { illegalWordsSearch.FindFirst(text); }); Run("IllegalWordsSearch(ContainsAny)", () => { illegalWordsSearch.ContainsAny(text); }); Console.Write("-------------------- Find All --------------------\r\n"); Run("TrieFilter(FindAll)", () => { tf1.FindAll(text); }); Run("FastFilter(FindAll)", () => { ff.FindAll(text); }); Run("StringSearch(FindAll)", () => { stringSearch.FindAll(text); }); Run("StringSearchEx(FindAll)", () => { stringSearchEx.FindAll(text); }); Run("WordsSearch(FindAll)", () => { wordsSearch.FindAll(text); }); Run("WordsSearchEx(FindAll)", () => { wordsSearchEx.FindAll(text); }); Run("IllegalWordsSearch(FindAll)", () => { illegalWordsSearch.FindAll(text); }); Console.Write("-------------------- Replace --------------------\r\n"); Run("TrieFilter(Replace)", () => { tf1.Replace(text); }); Run("FastFilter(Replace)", () => { ff.Replace(text); }); Run("StringSearch(Replace)", () => { stringSearch.Replace(text); }); Run("StringSearchEx(Replace)", () => { stringSearchEx.Replace(text); }); Run("WordsSearch(Replace)", () => { wordsSearch.Replace(text); }); Run("WordsSearchEx(Replace)", () => { wordsSearchEx.Replace(text); }); Run("IllegalWordsSearch(Replace)", () => { illegalWordsSearch.Replace(text); }); Console.Write("-------------------- Regex --------------------\r\n"); Run("Regex.IsMatch", () => { re.IsMatch(text); }); Run("Regex.Match", () => { re.Match(text); }); Run("Regex.Matches", () => { re.Matches(text); }); Console.Write("-------------------- Regex used Trie tree --------------------\r\n"); Run("Regex.IsMatch", () => { re2.IsMatch(text); }); Run("Regex.Match", () => { re2.Match(text); }); Run("Regex.Matches", () => { re2.Matches(text); }); Console.ReadKey(); }
static void Main(string[] args) { ReadBadWord(); var text = File.ReadAllText("Talk.txt"); Console.Write("-------------------- FindFirst OR ContainsAny 100000次 --------------------\r\n"); Run("TrieFilter", () => { tf1.HasBadWord(text); }); Run("FastFilter", () => { ff.HasBadWord(text); }); Run("StringSearch(ContainsAny)", () => { stringSearch.ContainsAny(text); }); Run("StringSearchEx(ContainsAny)--- WordsSearchEx(ContainsAny)代码相同", () => { stringSearchEx.ContainsAny(text); }); Run("StringSearchEx2(ContainsAny)--- WordsSearchEx2(ContainsAny)代码相同", () => { stringSearchEx2.ContainsAny(text); }); Run("StringSearchEx3(ContainsAny)--- WordsSearchEx3(ContainsAny)代码相同", () => { stringSearchEx3.ContainsAny(text); }); //Run("IllegalWordsSearch(ContainsAny)", () => { illegalWordsSearch.ContainsAny(text); }); Run("StringSearch(FindFirst)", () => { stringSearch.FindFirst(text); }); Run("StringSearchEx(FindFirst)", () => { stringSearchEx.FindFirst(text); }); Run("StringSearchEx2(FindFirst)", () => { stringSearchEx2.FindFirst(text); }); Run("StringSearchEx3(FindFirst)", () => { stringSearchEx3.FindFirst(text); }); Run("WordsSearch(FindFirst)", () => { wordsSearch.FindFirst(text); }); Run("WordsSearchEx(FindFirst)", () => { wordsSearchEx.FindFirst(text); }); Run("WordsSearchEx2(FindFirst)", () => { wordsSearchEx2.FindFirst(text); }); Run("WordsSearchEx3(FindFirst)", () => { wordsSearchEx3.FindFirst(text); }); //Run("IllegalWordsSearch(FindFirst)", () => { illegalWordsSearch.FindFirst(text); }); Console.Write("-------------------- Find All 100000次 --------------------\r\n"); Run("TrieFilter(FindAll)", () => { tf1.FindAll(text); }); Run("FastFilter(FindAll)", () => { ff.FindAll(text); }); Run("StringSearch(FindAll)", () => { stringSearch.FindAll(text); }); Run("StringSearchEx(FindAll)", () => { stringSearchEx.FindAll(text); }); Run("StringSearchEx2(FindAll)", () => { stringSearchEx2.FindAll(text); }); Run("StringSearchEx3(FindAll)", () => { stringSearchEx3.FindAll(text); }); Run("WordsSearch(FindAll)", () => { wordsSearch.FindAll(text); }); Run("WordsSearchEx(FindAll)", () => { wordsSearchEx.FindAll(text); }); Run("WordsSearchEx2(FindAll)", () => { wordsSearchEx2.FindAll(text); }); Run("WordsSearchEx3(FindAll)", () => { wordsSearchEx3.FindAll(text); }); //Run("IllegalWordsSearch(FindAll)", () => { illegalWordsSearch.FindAll(text); }); Console.Write("-------------------- Replace 100000次 --------------------\r\n"); Run("TrieFilter(Replace)", () => { tf1.Replace(text); }); Run("FastFilter(Replace)", () => { ff.Replace(text); }); Run("StringSearch(Replace)", () => { stringSearch.Replace(text); }); Run("WordsSearch(Replace)", () => { wordsSearch.Replace(text); }); Run("StringSearchEx(Replace)--- WordsSearchEx(Replace)代码相同", () => { stringSearchEx.Replace(text); }); Run("StringSearchEx2(Replace)--- WordsSearchEx2(Replace)代码相同", () => { stringSearchEx2.Replace(text); }); Run("StringSearchEx3(Replace)--- WordsSearchEx3(Replace)代码相同", () => { stringSearchEx3.Replace(text); }); //Run("IllegalWordsSearch(Replace)", () => { illegalWordsSearch.Replace(text); }); Console.Write("-------------------- Regex 100次 --------------------\r\n"); Run(100, "Regex.IsMatch", () => { re.IsMatch(text); }); Run(100, "Regex.Match", () => { re.Match(text); }); Run(100, "Regex.Matches", () => { re.Matches(text); }); Console.Write("-------------------- Regex used Trie tree 100次 --------------------\r\n"); Run(100, "Regex.IsMatch", () => { re2.IsMatch(text); }); Run(100, "Regex.Match", () => { re2.Match(text); }); Run(100, "Regex.Matches", () => { re2.Matches(text); }); Console.ReadKey(); }
public static string[] GetPinyinList(string text, int tone = 0) { InitPyIndex(); InitPyWords(); List <string> list = new List <string>(); for (int j = 0; j < text.Length; j++) { list.Add(null); } var pos = _search.FindAll(text); var pindex = -1; foreach (var p in pos) { if (p.Start > pindex) { for (int j = 0; j < p.Keyword.Length; j++) { list[j + p.Start] = _pyShow[_wordPy[_wordPyIndex[p.Index] + j] + tone]; } pindex = p.End; } } var i = 0; while (i < text.Length) { if (list[i] == null) { var c = text[i]; if (c >= 0x3400 && c <= 0x9fd5) { var index = c - 0x3400; var start = _pyIndex[index]; var end = _pyIndex[index + 1]; if (end > start) { list[i] = _pyShow[_pyData[start] + tone]; } } else if (c >= 0xd840 && c <= 0xd86e && i + 1 < text.Length) { var ct = text[i + 1]; if (ct >= 0xdc00 && ct <= 0xdfff) { var index = _pyIndex2[c - 0xd840][ct - 0xdc00]; var index2 = _pyIndex2[c - 0xd840][ct - 0xdc00 + 1]; if (index < index2) { i++; list[i] = _pyShow[_pyData2[c - 0xd840][index] + tone]; } else { list[i] = text[i].ToString(); } } else { list[i] = text[i].ToString(); } } else { list[i] = text[i].ToString(); } } i++; } list.RemoveAll(q => q == null); return(list.ToArray()); }