Esempio n. 1
0
        public void test()
        {
            string s    = "中国|国人|zg人";
            string test = "我是中国人";

            WordsSearch wordsSearch = new WordsSearch();

            wordsSearch.SetKeywords(s.Split('|'));

            var b = wordsSearch.ContainsAny(test);

            Assert.AreEqual(true, b);


            var f = wordsSearch.FindFirst(test);

            Assert.AreEqual("中国", f.Keyword);

            var alls = wordsSearch.FindAll(test);

            Assert.AreEqual("中国", alls[0].Keyword);
            Assert.AreEqual(2, alls[0].Start);
            Assert.AreEqual(3, alls[0].End);
            Assert.AreEqual(0, alls[0].Index);//返回索引Index,默认从0开始
            Assert.AreEqual("国人", alls[1].Keyword);
            Assert.AreEqual(2, alls.Count);

            var t = wordsSearch.Replace(test, '*');

            Assert.AreEqual("我是***", t);
        }
Esempio n. 2
0
        private void OneKeyClearEnWords_Click(object sender, EventArgs e)//清除标题的字母
        {
            string InsertStr  = MaintextBox.Text;
            int    index      = MaintextBox.GetFirstCharIndexOfCurrentLine();//得到当前行第一个字符的索引
            int    line       = GetTextboxLine(index);
            string strRemoved = Regex.Replace(InsertStr, "[a - b]", "", RegexOptions.IgnoreCase);

            strRemoved = Regex.Replace(strRemoved, "[d-l]", "", RegexOptions.IgnoreCase);
            strRemoved = Regex.Replace(strRemoved, "[n-z]", "", RegexOptions.IgnoreCase);
            strRemoved = Regex.Replace(InsertStr, "[A - B]", "", RegexOptions.IgnoreCase);
            strRemoved = Regex.Replace(strRemoved, "[D-L]", "", RegexOptions.IgnoreCase);
            InsertStr  = Regex.Replace(strRemoved, "[N-Z]", "", RegexOptions.IgnoreCase);
            WordsSearch iwords   = new WordsSearch();
            string      keywords = "c|C";

            iwords.SetKeywords(keywords.Split('|'));
            List <WordsSearchResult> LocationresultList = iwords.FindAll(InsertStr);

            for (int i = 0; i < LocationresultList.Count(); i++)
            {
                int n = LocationresultList[i].Start;
                if (!(InsertStr.Substring(n + 1, 1) == "m" || InsertStr.Substring(n + 1, 1) == "M"))
                {
                    InsertStr = InsertStr.Remove(n, 1);
                    InsertStr = InsertStr.Insert(n, "*");
                }
            }
            InsertStr.Replace("*", "");
            MaintextBox.Text            = InsertStr;
            MaintextBox.SelectionStart  = getnewindex(line);
            MaintextBox.SelectionLength = 0;
            MaintextBox.ScrollToCaret();//到指定行
            MaintextBox.Focus();
        }
Esempio n. 3
0
 private static WordsSearch GetWordsSearch(bool s2t, int srcType)
 {
     if (s2t)
     {
         if (srcType == 0)
         {
             if (s2tSearch == null)
             {
                 s2tSearch = BuildWordsSearch("s2t.dat", false);
             }
             return(s2tSearch);
         }
         else if (srcType == 1)
         {
             if (t2hkSearch == null)
             {
                 t2hkSearch = BuildWordsSearch("t2hk.dat", false);
             }
             return(t2hkSearch);
         }
         else if (srcType == 2)
         {
             if (t2twSearch == null)
             {
                 t2twSearch = BuildWordsSearch("t2tw.dat", false);
             }
             return(t2twSearch);
         }
     }
     else
     {
         if (srcType == 0)
         {
             if (t2sSearch == null)
             {
                 t2sSearch = BuildWordsSearch("t2s.dat", false);
             }
             return(t2sSearch);
         }
         else if (srcType == 1)
         {
             if (hk2tSearch == null)
             {
                 hk2tSearch = BuildWordsSearch("t2hk.dat", true);
             }
             return(hk2tSearch);
         }
         else if (srcType == 2)
         {
             if (tw2tSearch == null)
             {
                 tw2tSearch = BuildWordsSearch("t2tw.dat", true);
             }
             return(tw2tSearch);
         }
     }
     return(null);
 }
Esempio n. 4
0
 /// <summary>
 /// 清理 简繁转换 缓存
 /// </summary>
 public static void ClearTranslate()
 {
     s2tSearch        = null;
     t2sSearch        = null;
     t2twSearch       = null;
     tw2tSearch       = null;
     t2hkSearch       = null;
     hk2tSearch       = null;
     Dict._Simplified = null;
 }
Esempio n. 5
0
        private static WordsSearch BuildWordsSearch(string fileName, bool reverse)
        {
            var         dict        = GetTransformationDict(fileName);
            WordsSearch wordsSearch = new WordsSearch();

            if (reverse)
            {
                wordsSearch.SetKeywords(dict.Select(q => q.Value).ToList());
                wordsSearch._others = dict.Select(q => q.Key).ToArray();
            }
            else
            {
                wordsSearch.SetKeywords(dict.Select(q => q.Key).ToList());
                wordsSearch._others = dict.Select(q => q.Value).ToArray();
            }
            return(wordsSearch);
        }
Esempio n. 6
0
        private static void WordsSearchSearch(List <string> list, string txt)
        {
            WordsSearch wordsSearch = new WordsSearch();

            wordsSearch.SetKeywords(list);



            Stopwatch watch = new Stopwatch();

            watch.Start();
            for (int i = 0; i < 100000; i++)
            {
                wordsSearch.FindAll(txt);
            }
            watch.Stop();
            Console.WriteLine("WordsSearch: " + watch.ElapsedMilliseconds.ToString("N0") + "ms");
        }
Esempio n. 7
0
        /// <summary>
        /// 得到文字关键词
        /// </summary>
        /// <returns></returns>
        private static WordsSearch GetWordsSearch()
        {
            if (_search == null)
            {
                Dictionary <string, int> dict = new Dictionary <string, int>();
                var sp    = BaseWordService.DictPinYinConfig.Word.Split(',');
                var index = 0;
                foreach (var item in sp)
                {
                    dict[item] = index;
                    index     += item.Length;
                }

                _search = new WordsSearch();
                _search.SetKeywords(dict);
            }

            return(_search);
        }
 private void InitPinyinSearch()
 {
     if (_wordsSearch == null)
     {
         HashSet <string> allPinyins = new HashSet <string>();
         var pys = PinyinDict.PyShow;
         for (int i = 1; i < pys.Length; i += 2)
         {
             var py = pys[i].ToUpper();
             for (int j = 1; j <= py.Length; j++)
             {
                 var key = py.Substring(0, j);
                 allPinyins.Add(key);
             }
         }
         var wordsSearch = new WordsSearch();
         wordsSearch.SetKeywords(allPinyins.ToList());
         _wordsSearch = wordsSearch;
     }
 }
Esempio n. 9
0
        private static void InitPyWords()
        {
            if (_search == null)
            {
                var ass = typeof(WordsHelper).Assembly;
#if NETSTANDARD2_1
                var resourceName = "ToolGood.Words.dict.pyWords.txt.br";
#else
                var resourceName = "ToolGood.Words.dict.pyWords.txt.z";
#endif
                Stream sm = ass.GetManifestResourceStream(resourceName);
                byte[] bs = new byte[sm.Length];
                sm.Read(bs, 0, (int)sm.Length);
                sm.Close();
                var bytes = Decompress(bs);
                var tStr  = Encoding.UTF8.GetString(bytes);

                var           lines       = tStr.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
                var           wordPy      = new List <ushort>();
                List <string> keywords    = new List <string>();
                List <int>    wordPyIndex = new List <int>();
                wordPyIndex.Add(0);

                foreach (var line in lines)
                {
                    var sp = line.Split(',');
                    keywords.Add(sp[0]);
                    for (int i = 1; i < sp.Length; i++)
                    {
                        var idx = sp[i];
                        wordPy.Add(ushort.Parse(idx, System.Globalization.NumberStyles.HexNumber));
                    }
                    wordPyIndex.Add(wordPy.Count);
                }
                var search = new WordsSearch();
                search.SetKeywords(keywords);
                _wordPyIndex = wordPyIndex.ToArray();
                _wordPy      = wordPy.ToArray();
                _search      = search;
            }
        }
Esempio n. 10
0
        private static string TransformationReplace(string text, WordsSearch wordsSearch)
        {
            var           ts    = wordsSearch.FindAll(text);
            StringBuilder sb    = new StringBuilder();
            var           index = 0;

            while (index < text.Length)
            {
                var t = ts.Where(q => q.Start == index).OrderByDescending(q => q.End).FirstOrDefault();
                if (t == null)
                {
                    sb.Append(text[index]);
                    index++;
                }
                else
                {
                    sb.Append(wordsSearch._others[t.Index]);
                    index = t.End + 1;
                }
            }
            return(sb.ToString());
        }
Esempio n. 11
0
        static void Main(string[] args)
        {
            // 预处理
            // 第一步 处理搜狗词库
            if (File.Exists("scel_1.txt") == false)
            {
                var scel_1 = GetWords();
                File.WriteAllText("scel_1.txt", string.Join("\n", scel_1));
                scel_1.Clear();
            }
            // 第二步 精简词库
            {
                var txt   = File.ReadAllText("scel_1.txt");
                var lines = txt.Split('\n');
                Dictionary <string, string> dict = new Dictionary <string, string>();
                foreach (var item in lines)
                {
                    var sp = item.Split(' ');
                    dict[sp[0]] = sp[1];
                }
                List <string> keys = dict.Select(q => q.Key).ToList();

                WordsSearch wordsSearch;
                for (int i = 3; i < 8; i++)
                {
                    var keywords = keys.Where(q => q.Length <= i).ToList();
                    wordsSearch = new WordsSearch();
                    wordsSearch.SetKeywords(keywords);

                    for (int j = keys.Count - 1; j >= 0; j--)
                    {
                        var key = keys[j];
                        if (key.Length <= i)
                        {
                            continue;
                        }

                        var all = wordsSearch.FindAll(key);
                        if (all.Count > 0)
                        {
                            //进行拼音测试,相同则删除
                        }
                    }
                }

                //File.WriteAllText("scel_2.txt", string.Join("\n", scel_1));
            }

            // 第三步 获取词的所有拼音

            // 第四步 获取网上的拼音
            if (File.Exists("pinyin_1.txt") == false)
            {
                var pinyin_1 = GetPinYin();
                File.WriteAllText("pinyin_1.txt", string.Join("\n", pinyin_1));
                pinyin_1.Clear();
            }

            // 第五步 分离 单字拼音 和 词组拼音
            if (File.Exists("pinyin_2_one.txt") == false)
            {
                var           txt   = File.ReadAllText("pinyin_1.txt");
                var           lines = txt.Split('\n');
                List <string> ones  = new List <string>();
                List <string> mores = new List <string>();
                foreach (var line in lines)
                {
                    var sp = line.Split(',');
                    if (GetLength(sp[0]) == 1)
                    {
                        ones.Add(line);
                    }
                    else
                    {
                        mores.Add(line);
                    }
                }
                File.WriteAllText("pinyin_2_one.txt", string.Join("\n", ones));
                File.WriteAllText("pinyin_2_more.txt", string.Join("\n", mores));
                ones.Clear();
                mores.Clear();
            }
            // 第六步 简单 合并 单字拼音, 防止常用拼音被覆盖
            if (File.Exists("pinyin_3_one.txt") == false)
            {
                var txt   = File.ReadAllText("pinyin_2_one.txt");
                var lines = txt.Split('\n').ToList();
                for (int i = lines.Count - 1; i >= 1; i--)
                {
                    if (lines[i].StartsWith(lines[i - 1]))
                    {
                        lines.RemoveAt(i);
                    }
                }
                File.WriteAllText("pinyin_3_one.txt", string.Join("\n", lines));
            }
            // 第七步 检查 拼音数 与 词组长度不一样的
            if (File.Exists("pinyin_4_ok.txt") == false)
            {
                var           txt    = File.ReadAllText("pinyin_2_more.txt");
                var           lines  = txt.Split('\n');
                List <string> oks    = new List <string>();
                List <string> errors = new List <string>();
                foreach (var line in lines)
                {
                    var sp = line.Split(',');
                    if (GetLength(sp[0]) == sp.Length - 1)
                    {
                        oks.Add(line);
                    }
                    else
                    {
                        errors.Add(line);
                    }
                }
                File.WriteAllText("pinyin_4_ok.txt", string.Join("\n", oks));
                File.WriteAllText("pinyin_4_error.txt", string.Join("\n", errors));
            }
        }