public static string[] GetPinyinList(string text, int tone = 0) { InitPyIndex(); InitPyWords(); List <string> list = new List <string>(); for (int j = 0; j < text.Length; j++) { list.Add(null); } var pos = _search.FindAll(text); var pindex = -1; foreach (var p in pos) { if (p.Start > pindex) { for (int j = 0; j < p.Keyword.Length; j++) { list[j + p.Start] = _pyShow[_wordPy[_wordPyIndex[p.Index] + j] + tone]; } pindex = p.End; } } var i = 0; while (i < text.Length) { if (list[i] == null) { var c = text[i]; if (c >= 0x3400 && c <= 0x9fd5) { var index = c - 0x3400; var start = _pyIndex[index]; var end = _pyIndex[index + 1]; if (end > start) { list[i] = _pyShow[_pyData[start] + tone]; } } else if (c >= 0xd840 && c <= 0xd86e && i + 1 < text.Length) { var ct = text[i + 1]; if (ct >= 0xdc00 && ct <= 0xdfff) { var index = _pyIndex2[c - 0xd840][ct - 0xdc00]; var index2 = _pyIndex2[c - 0xd840][ct - 0xdc00 + 1]; if (index < index2) { i++; list[i] = _pyShow[_pyData2[c - 0xd840][index] + tone]; } else { list[i] = text[i].ToString(); } } else { list[i] = text[i].ToString(); } } else { list[i] = text[i].ToString(); } } i++; } list.RemoveAll(q => q == null); return(list.ToArray()); }
static void Main(string[] args) { ReadBadWord(); var text = File.ReadAllText("Talk.txt"); //var l = text.Length; stringSearchEx.Save("test.ini"); var tt = stringSearchEx.FindAll(text); //word2.Load("test.ini"); Console.Write("-------------------- SetKeywords Test --------------------\r\n"); Run(1, "StringSearch.SetKeywords ", () => { List <string> list = new List <string>(); using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) { string key = sw.ReadLine(); while (key != null) { if (key != string.Empty) { list.Add(key); } key = sw.ReadLine(); } } StringSearch s = new StringSearch(); s.SetKeywords(list); }); Run(1, "StringSearchEx.SetKeywords ", () => { List <string> list = new List <string>(); using (StreamReader sw = new StreamReader(File.OpenRead("BadWord.txt"))) { string key = sw.ReadLine(); while (key != null) { if (key != string.Empty) { list.Add(key); } key = sw.ReadLine(); } } StringSearchEx s = new StringSearchEx(); s.SetKeywords(list); }); Run(1, "StringSearchEx.Load ", () => { StringSearchEx s = new StringSearchEx(); s.Load("test.ini"); }); //var ts1 = word.FindAll(text); //var ts = word2.FindAll(text); Console.Write("-------------------- ToSenseWord Test --------------------\r\n"); Run("ToSenseWord1 ", () => { WordTest.ToSenseWord1(text); }); Run("ToSenseWord2 ", () => { WordTest.ToSenseWord2(text); }); Run("ToSenseWord3 ", () => { WordTest.ToSenseWord3(text); }); Run("ToSenseWord4 ", () => { WordTest.ToSenseWord4(text); }); Run("ToSenseWord5 ", () => { WordTest.ToSenseWord5(text); }); Run("ToSenseWord6 ", () => { WordTest.ToSenseWord6(text); }); Run("ToSenseWord7 ", () => { WordTest.ToSenseWord7(text); }); Run("ToSenseWord8 ", () => { WordTest.ToSenseWord8(text); }); Run("ToSenseWord9 ", () => { WordTest.ToSenseWord9(text); }); Run("ToSenseWord10 ", () => { WordTest.ToSenseWord10(text); }); Run("GetDisablePostion1 ", () => { WordTest.GetDisablePostion1(text); }); Run("GetDisablePostion2 ", () => { WordTest.GetDisablePostion2(text); }); Run("GetDisablePostion3 ", () => { WordTest.GetDisablePostion3(text); }); Run("GetDisablePostion4 ", () => { WordTest.GetDisablePostion4(text); }); Run("GetDisablePostion5 ", () => { WordTest.GetDisablePostion5(text); }); Run("GetDisablePostion6 ", () => { WordTest.GetDisablePostion6(text); }); Run("GetDisablePostion7 ", () => { WordTest.GetDisablePostion7(text); }); Run("GetDisablePostion9 ", () => { WordTest.GetDisablePostion9(text); }); Run("GetDisablePostion8 ", () => { WordTest.GetDisablePostion8(text); }); //Console.Write("-------------------- ToSenseIllegalWords --------------------\r\n"); //Run("ToSenseIllegalWords", () => { WordsHelper.ToSenseIllegalWords(text); }); Console.Write("-------------------- FindFirst OR ContainsAny --------------------\r\n"); Run("TrieFilter", () => { tf1.HasBadWord(text); }); Run("FastFilter", () => { ff.HasBadWord(text); }); Run("StringSearch(ContainsAny)", () => { stringSearch.ContainsAny(text); }); Run("StringSearch(FindFirst)", () => { stringSearch.FindFirst(text); }); Run("StringSearchEx(ContainsAny)", () => { stringSearchEx.ContainsAny(text); }); Run("StringSearchEx(FindFirst)", () => { stringSearchEx.FindFirst(text); }); Run("WordsSearch(ContainsAny)", () => { wordsSearch.ContainsAny(text); }); Run("WordsSearch(FindFirst)", () => { wordsSearch.FindFirst(text); }); Run("WordsSearchEx(ContainsAny)", () => { wordsSearchEx.ContainsAny(text); }); Run("WordsSearchEx(FindFirst)", () => { wordsSearchEx.FindFirst(text); }); Run("IllegalWordsQuickSearch(FindFirst)", () => { illegalWordsQuickSearch.FindFirst(text); }); Run("IllegalWordsQuickSearch(ContainsAny)", () => { illegalWordsQuickSearch.ContainsAny(text); }); Run("IllegalWordsSearch(FindFirst)", () => { illegalWordsSearch.FindFirst(text); }); Run("IllegalWordsSearch(ContainsAny)", () => { illegalWordsSearch.ContainsAny(text); }); Console.Write("-------------------- Find All --------------------\r\n"); Run("TrieFilter(FindAll)", () => { tf1.FindAll(text); }); Run("FastFilter(FindAll)", () => { ff.FindAll(text); }); Run("StringSearch(FindAll)", () => { stringSearch.FindAll(text); }); Run("StringSearchEx(FindAll)", () => { stringSearchEx.FindAll(text); }); Run("WordsSearch(FindAll)", () => { wordsSearch.FindAll(text); }); Run("WordsSearchEx(FindAll)", () => { wordsSearchEx.FindAll(text); }); Run("IllegalWordsQuickSearch(FindAll)", () => { illegalWordsQuickSearch.FindAll(text); }); Run("IllegalWordsSearch(FindAll)", () => { illegalWordsSearch.FindAll(text); }); Console.Write("-------------------- Replace --------------------\r\n"); Run("TrieFilter(Replace)", () => { tf1.Replace(text); }); Run("FastFilter(Replace)", () => { ff.Replace(text); }); Run("StringSearch(Replace)", () => { stringSearch.Replace(text); }); Run("StringSearchEx(Replace)", () => { stringSearchEx.Replace(text); }); Run("WordsSearch(Replace)", () => { wordsSearch.Replace(text); }); Run("WordsSearchEx(Replace)", () => { wordsSearchEx.Replace(text); }); Run("IllegalWordsQuickSearch(Replace)", () => { illegalWordsQuickSearch.Replace(text); }); Run("IllegalWordsSearch(Replace)", () => { illegalWordsSearch.Replace(text); }); Console.Write("-------------------- Regex --------------------\r\n"); Run("Regex.IsMatch", () => { re.IsMatch(text); }); Run("Regex.Match", () => { re.Match(text); }); Run("Regex.Matches", () => { re.Matches(text); }); Console.Write("-------------------- Regex used Trie tree --------------------\r\n"); Run("Regex.IsMatch", () => { re2.IsMatch(text); }); Run("Regex.Match", () => { re2.Match(text); }); Run("Regex.Matches", () => { re2.Matches(text); }); Console.ReadKey(); }
static void Main(string[] args) { ReadBadWord(); var text = File.ReadAllText("Talk.txt"); Console.Write("-------------------- ToSenseWord Test --------------------\r\n"); Run("ToSenseWord1 ", () => { WordTest.ToSenseWord1(text); }); Run("ToSenseWord2 ", () => { WordTest.ToSenseWord2(text); }); Run("ToSenseWord3 ", () => { WordTest.ToSenseWord3(text); }); Run("ToSenseWord4 ", () => { WordTest.ToSenseWord4(text); }); Run("ToSenseWord5 ", () => { WordTest.ToSenseWord5(text); }); Run("ToSenseWord6 ", () => { WordTest.ToSenseWord6(text); }); Run("ToSenseWord7 ", () => { WordTest.ToSenseWord7(text); }); Run("ToSenseWord8 ", () => { WordTest.ToSenseWord8(text); }); Run("ToSenseWord9 ", () => { WordTest.ToSenseWord9(text); }); Run("ToSenseWord10 ", () => { WordTest.ToSenseWord10(text); }); //Run("GetDisablePostion1 ", () => { WordTest.GetDisablePostion1(text); }); //Run("GetDisablePostion2 ", () => { WordTest.GetDisablePostion2(text); }); //Run("GetDisablePostion3 ", () => { WordTest.GetDisablePostion3(text); }); //Run("GetDisablePostion4 ", () => { WordTest.GetDisablePostion4(text); }); //Run("GetDisablePostion5 ", () => { WordTest.GetDisablePostion5(text); }); //Run("GetDisablePostion6 ", () => { WordTest.GetDisablePostion6(text); }); //Run("GetDisablePostion7 ", () => { WordTest.GetDisablePostion7(text); }); //Run("GetDisablePostion9 ", () => { WordTest.GetDisablePostion9(text); }); //Run("GetDisablePostion8 ", () => { WordTest.GetDisablePostion8(text); }); Console.Write("-------------------- ToSenseIllegalWords --------------------\r\n"); Run("ToSenseIllegalWords", () => { WordsHelper.ToSenseIllegalWords(text); }); Console.Write("-------------------- FindFirst OR ContainsAny --------------------\r\n"); Run("TrieFilter", () => { tf1.HasBadWord(text); }); Run("FastFilter", () => { ff.HasBadWord(text); }); Run("StringSearch(ContainsAny)", () => { word.ContainsAny(text); }); Run("StringSearch(FindFirst)", () => { word.FindFirst(text); }); Run("WordsSearch(ContainsAny)", () => { search.ContainsAny(text); }); Run("WordsSearch(FindFirst)", () => { search.FindFirst(text); }); Run("IllegalWordsQuickSearch(FindFirst)", () => { iword1.FindFirst(text); }); Run("IllegalWordsQuickSearch(ContainsAny)", () => { iword1.ContainsAny(text); }); Run("IllegalWordsSearch(FindFirst)", () => { iword2.FindFirst(text); }); Run("IllegalWordsSearch(ContainsAny)", () => { iword2.ContainsAny(text); }); Console.Write("-------------------- Find All --------------------\r\n"); Run("TrieFilter(FindAll)", () => { tf1.FindAll(text); }); Run("FastFilter(FindAll)", () => { ff.FindAll(text); }); Run("StringSearch(FindAll)", () => { word.FindAll(text); }); Run("WordsSearch(FindAll)", () => { search.FindAll(text); }); Run("IllegalWordsQuickSearch(FindAll)", () => { iword1.FindAll(text); }); Run("IllegalWordsSearch(FindAll)", () => { iword2.FindAll(text); }); Console.Write("-------------------- Replace --------------------\r\n"); Run("TrieFilter(Replace)", () => { tf1.Replace(text); }); Run("FastFilter(Replace)", () => { ff.Replace(text); }); Run("StringSearch(Replace)", () => { word.Replace(text); }); Run("WordsSearch(Replace)", () => { search.Replace(text); }); Run("IllegalWordsQuickSearch(Replace)", () => { iword1.Replace(text); }); Run("IllegalWordsSearch(Replace)", () => { iword2.Replace(text); }); Console.Write("-------------------- Regex --------------------\r\n"); Run("Regex.IsMatch", () => { re.IsMatch(text); }); Run("Regex.Match", () => { re.Match(text); }); Run("Regex.Matches", () => { re.Matches(text); }); Console.Write("-------------------- Regex used Trie tree --------------------\r\n"); Run("Regex.IsMatch", () => { re2.IsMatch(text); }); Run("Regex.Match", () => { re2.Match(text); }); Run("Regex.Matches", () => { re2.Matches(text); }); Console.ReadKey(); }
static void Main(string[] args) { ReadBadWord(); var text = File.ReadAllText("Talk.txt"); Console.Write("-------------------- FindFirst OR ContainsAny 100000次 --------------------\r\n"); Run("TrieFilter", () => { tf1.HasBadWord(text); }); Run("FastFilter", () => { ff.HasBadWord(text); }); Run("StringSearch(ContainsAny)", () => { stringSearch.ContainsAny(text); }); Run("StringSearchEx(ContainsAny)--- WordsSearchEx(ContainsAny)代码相同", () => { stringSearchEx.ContainsAny(text); }); Run("StringSearchEx2(ContainsAny)--- WordsSearchEx2(ContainsAny)代码相同", () => { stringSearchEx2.ContainsAny(text); }); Run("StringSearchEx3(ContainsAny)--- WordsSearchEx3(ContainsAny)代码相同", () => { stringSearchEx3.ContainsAny(text); }); Run("IllegalWordsSearch(ContainsAny)", () => { illegalWordsSearch.ContainsAny(text); }); Run("StringSearch(FindFirst)", () => { stringSearch.FindFirst(text); }); Run("StringSearchEx(FindFirst)", () => { stringSearchEx.FindFirst(text); }); Run("StringSearchEx2(FindFirst)", () => { stringSearchEx2.FindFirst(text); }); Run("StringSearchEx3(FindFirst)", () => { stringSearchEx3.FindFirst(text); }); Run("WordsSearch(FindFirst)", () => { wordsSearch.FindFirst(text); }); Run("WordsSearchEx(FindFirst)", () => { wordsSearchEx.FindFirst(text); }); Run("WordsSearchEx2(FindFirst)", () => { wordsSearchEx2.FindFirst(text); }); Run("WordsSearchEx3(FindFirst)", () => { wordsSearchEx3.FindFirst(text); }); Run("IllegalWordsSearch(FindFirst)", () => { illegalWordsSearch.FindFirst(text); }); Console.Write("-------------------- Find All 100000次 --------------------\r\n"); Run("TrieFilter(FindAll)", () => { tf1.FindAll(text); }); Run("FastFilter(FindAll)", () => { ff.FindAll(text); }); Run("StringSearch(FindAll)", () => { stringSearch.FindAll(text); }); Run("StringSearchEx(FindAll)", () => { stringSearchEx.FindAll(text); }); Run("StringSearchEx2(FindAll)", () => { stringSearchEx2.FindAll(text); }); Run("StringSearchEx3(FindAll)", () => { stringSearchEx3.FindAll(text); }); Run("WordsSearch(FindAll)", () => { wordsSearch.FindAll(text); }); Run("WordsSearchEx(FindAll)", () => { wordsSearchEx.FindAll(text); }); Run("WordsSearchEx2(FindAll)", () => { wordsSearchEx2.FindAll(text); }); Run("WordsSearchEx3(FindAll)", () => { wordsSearchEx3.FindAll(text); }); Run("IllegalWordsSearch(FindAll)", () => { illegalWordsSearch.FindAll(text); }); Console.Write("-------------------- Replace 100000次 --------------------\r\n"); Run("TrieFilter(Replace)", () => { tf1.Replace(text); }); Run("FastFilter(Replace)", () => { ff.Replace(text); }); Run("StringSearch(Replace)", () => { stringSearch.Replace(text); }); Run("WordsSearch(Replace)", () => { wordsSearch.Replace(text); }); Run("StringSearchEx(Replace)--- WordsSearchEx(Replace)代码相同", () => { stringSearchEx.Replace(text); }); Run("StringSearchEx2(Replace)--- WordsSearchEx2(Replace)代码相同", () => { stringSearchEx2.Replace(text); }); Run("StringSearchEx3(Replace)--- WordsSearchEx3(Replace)代码相同", () => { stringSearchEx3.Replace(text); }); Run("IllegalWordsSearch(Replace)", () => { illegalWordsSearch.Replace(text); }); Console.Write("-------------------- Regex 100次 --------------------\r\n"); Run(100, "Regex.IsMatch", () => { re.IsMatch(text); }); Run(100, "Regex.Match", () => { re.Match(text); }); Run(100, "Regex.Matches", () => { re.Matches(text); }); Console.Write("-------------------- Regex used Trie tree 100次 --------------------\r\n"); Run(100, "Regex.IsMatch", () => { re2.IsMatch(text); }); Run(100, "Regex.Match", () => { re2.Match(text); }); Run(100, "Regex.Matches", () => { re2.Matches(text); }); Console.ReadKey(); }
static void Main(string[] args) { // 预处理 // 第一步 处理搜狗词库 if (File.Exists("scel_1.txt") == false) { var scel_1 = GetWords(); File.WriteAllText("scel_1.txt", string.Join("\n", scel_1)); scel_1.Clear(); } // 第二步 精简词库 { var txt = File.ReadAllText("scel_1.txt"); var lines = txt.Split('\n'); Dictionary <string, string> dict = new Dictionary <string, string>(); foreach (var item in lines) { var sp = item.Split(' '); dict[sp[0]] = sp[1]; } List <string> keys = dict.Select(q => q.Key).ToList(); WordsSearch wordsSearch; for (int i = 3; i < 8; i++) { var keywords = keys.Where(q => q.Length <= i).ToList(); wordsSearch = new WordsSearch(); wordsSearch.SetKeywords(keywords); for (int j = keys.Count - 1; j >= 0; j--) { var key = keys[j]; if (key.Length <= i) { continue; } var all = wordsSearch.FindAll(key); if (all.Count > 0) { //进行拼音测试,相同则删除 } } } //File.WriteAllText("scel_2.txt", string.Join("\n", scel_1)); } // 第三步 获取词的所有拼音 // 第四步 获取网上的拼音 if (File.Exists("pinyin_1.txt") == false) { var pinyin_1 = GetPinYin(); File.WriteAllText("pinyin_1.txt", string.Join("\n", pinyin_1)); pinyin_1.Clear(); } // 第五步 分离 单字拼音 和 词组拼音 if (File.Exists("pinyin_2_one.txt") == false) { var txt = File.ReadAllText("pinyin_1.txt"); var lines = txt.Split('\n'); List <string> ones = new List <string>(); List <string> mores = new List <string>(); foreach (var line in lines) { var sp = line.Split(','); if (GetLength(sp[0]) == 1) { ones.Add(line); } else { mores.Add(line); } } File.WriteAllText("pinyin_2_one.txt", string.Join("\n", ones)); File.WriteAllText("pinyin_2_more.txt", string.Join("\n", mores)); ones.Clear(); mores.Clear(); } // 第六步 简单 合并 单字拼音, 防止常用拼音被覆盖 if (File.Exists("pinyin_3_one.txt") == false) { var txt = File.ReadAllText("pinyin_2_one.txt"); var lines = txt.Split('\n').ToList(); for (int i = lines.Count - 1; i >= 1; i--) { if (lines[i].StartsWith(lines[i - 1])) { lines.RemoveAt(i); } } File.WriteAllText("pinyin_3_one.txt", string.Join("\n", lines)); } // 第七步 检查 拼音数 与 词组长度不一样的 if (File.Exists("pinyin_4_ok.txt") == false) { var txt = File.ReadAllText("pinyin_2_more.txt"); var lines = txt.Split('\n'); List <string> oks = new List <string>(); List <string> errors = new List <string>(); foreach (var line in lines) { var sp = line.Split(','); if (GetLength(sp[0]) == sp.Length - 1) { oks.Add(line); } else { errors.Add(line); } } File.WriteAllText("pinyin_4_ok.txt", string.Join("\n", oks)); File.WriteAllText("pinyin_4_error.txt", string.Join("\n", errors)); } }