public void test2() { string s = "中国|国人|zg人"; string test = "我是中国人"; WordsSearchEx wordsSearch = new WordsSearchEx(); wordsSearch.SetKeywords(s.Split('|').ToList()); var b = wordsSearch.ContainsAny(test); Assert.AreEqual(true, b); var f = wordsSearch.FindFirst(test); Assert.AreEqual("中国", f.Keyword); var alls = wordsSearch.FindAll(test); Assert.AreEqual("中国", alls[0].Keyword); Assert.AreEqual(2, alls[0].Start); Assert.AreEqual(3, alls[0].End); Assert.AreEqual(0, alls[0].Index);//返回索引Index,默认从0开始 Assert.AreEqual("国人", alls[1].Keyword); Assert.AreEqual(2, alls.Count); var t = wordsSearch.Replace(test, '*'); Assert.AreEqual("我是***", t); }
private static void InitPyWords() { if (_search == null) { lock (lockObj) { if (_search == null) { var ass = typeof(WordsHelper).Assembly; #if NETSTANDARD2_1 const string resourceName = "ToolGood.Words.dict.pyWords.txt.br"; #else const string resourceName = "ToolGood.Words.dict.pyWords.txt.z"; #endif Stream sm = ass.GetManifestResourceStream(resourceName); byte[] bs = new byte[sm.Length]; sm.Read(bs, 0, (int)sm.Length); sm.Close(); var bytes = Decompress(bs); var tStr = Encoding.UTF8.GetString(bytes); bytes = null; var lines = tStr.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); tStr = null; var wordPy = new List <ushort>(); List <string> keywords = new List <string>(); List <int> wordPyIndex = new List <int>(); wordPyIndex.Add(0); foreach (var line in lines) { var sp = line.Split(','); keywords.Add(sp[0]); for (int i = 1; i < sp.Length; i++) { var idx = sp[i]; wordPy.Add(ushort.Parse(idx, System.Globalization.NumberStyles.HexNumber)); } wordPyIndex.Add(wordPy.Count); } var search = new WordsSearchEx(); search.SetKeywords(keywords); _wordPyIndex = wordPyIndex.ToArray(); _wordPy = wordPy.ToArray(); _search = search; wordPy = null; keywords = null; wordPyIndex = null; } GC.Collect(); } } }
private static void WordsSearchExSearch(List <string> list, string txt) { WordsSearchEx wordsSearch = new WordsSearchEx(); wordsSearch.SetKeywords(list); wordsSearch.Save("WordsSearchEx.dat"); Stopwatch watch = new Stopwatch(); watch.Start(); for (int i = 0; i < 100000; i++) { wordsSearch.FindAll(txt); } watch.Stop(); Console.WriteLine("WordsSearchEx: " + watch.ElapsedMilliseconds.ToString("N0") + "ms"); }