Esempio n. 1
0
        private void btnConvertTest_Click(object sender, EventArgs e)
        {
            if (SelectedParsePattern == null)
            {
                MessageBox.Show("请点击右上角按钮选择匹配规则");
                return;
            }
            IWordCodeGenerater factory = null;

            if (string.IsNullOrEmpty(txbFilePath.Text))
            {
                factory = new WordPinyinGenerater();
            }
            else
            {
                factory = new SelfDefiningCodeGenerater();
                UserCodingHelper.FilePath = txbFilePath.Text;
            }
            SelectedParsePattern.Factory = factory;


            rtbTo.Clear();
            string[] fromList = rtbFrom.Text.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
            foreach (string str in fromList)
            {
                string s  = str.Trim();
                var    wl = new WordLibrary {
                    Word = s
                };
                string result = SelectedParsePattern.BuildWLString(wl);
                rtbTo.AppendText(result + "\r\n");
            }
        }
Esempio n. 2
0
        public virtual WordLibraryList ImportText(string str)
        {
            pinyinFactory = new PinyinGenerater();

            var wlList = new WordLibraryList();
            string[] words = str.Split(new[] {'\r', '\n'}, StringSplitOptions.RemoveEmptyEntries);
            CountWord = words.Length;
            CurrentStatus = 0;
            for (int i = 0; i < words.Length; i++)
            {
                try
                {
                    string word = words[i].Trim();
                    if (word != string.Empty)
                    {
                        wlList.AddWordLibraryList(ImportLine(word));
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
                CurrentStatus++;
            }
            return wlList;
        }
Esempio n. 3
0
        private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType)
        {
            IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType);

            if (generater == null) //未知编码方式,则不进行编码。
            {
                return;
            }
            countWord     = wordLibraryList.Count;
            currentStatus = 0;
            foreach (WordLibrary wordLibrary in wordLibraryList)
            {
                currentStatus++;
                processMessage = "生成目标编码:" + currentStatus + "/" + countWord;
                if (wordLibrary.CodeType == codeType)
                {
                    continue;
                }
                try
                {
                    generater.GetCodeOfWordLibrary(wordLibrary);
                }
                catch (Exception ex)
                {
                    Debug.WriteLine("生成编码失败" + ex.Message);
                }
                if (codeType != CodeType.Unknown)
                {
                    wordLibrary.CodeType = codeType;
                }
            }
        }
Esempio n. 4
0
 public string Export(WordLibraryList wlList)
 {
     codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
     var sb = new StringBuilder();
     for (int i = 0; i < wlList.Count; i++)
     {
         sb.Append(ExportLine(wlList[i]));
         sb.Append("\r\n");
     }
     return sb.ToString();
 }
Esempio n. 5
0
 public IList<string> Export(WordLibraryList wlList)
 {
     codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
     var sb = new StringBuilder();
     for (int i = 0; i < wlList.Count; i++)
     {
         sb.Append(ExportLine(wlList[i]));
         sb.Append(lineSplitString);
     }
     return new List<string>() { sb.ToString() };
 }
Esempio n. 6
0
        public void CodingString(WordLibrary wl, IWordCodeGenerater factory)
        {
            var codes = new List <string>();

            foreach (char c in wl.Word)
            {
                string code = factory.GetCodeOfChar(c);
                codes.Add(code);
            }
            wl.PinYin = codes.ToArray();
        }
Esempio n. 7
0
        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            if (codeGenerater == null)
            {
                codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
            }
            if (codeGenerater.Is1CharMutiCode)
            {
                IList <string> codes = codeGenerater.GetCodeOfString(wl.Word);
                int            i     = 0;
                foreach (string code in codes)
                {
                    sb.Append(wl.Word);
                    sb.Append("\t");
                    sb.Append(code);
                    sb.Append("\t");
                    sb.Append(wl.Count);
                    i++;
                    if (i != codes.Count)
                    {
                        sb.Append("\r\n");
                    }
                }
            }
            else
            {
                sb.Append(wl.Word);
                sb.Append("\t");
                if (CodeType == CodeType.Pinyin || CodeType == CodeType.TerraPinyin)
                {
                    sb.Append(wl.GetPinYinString(" ", BuildType.None));
                }
                else if (CodeType == wl.CodeType)
                {
                    sb.Append(wl.Codes[0][0]);
                }
                else
                {
                    if (codeGenerater.Is1Char1Code)
                    {
                        sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word), " "));
                    }
                    else
                    {
                        sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word)));
                    }
                }
                sb.Append("\t");
                sb.Append(wl.Count);
            }
            return(sb.ToString());
        }
Esempio n. 8
0
        public string Export(WordLibraryList wlList)
        {
            codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
            var sb = new StringBuilder();

            for (int i = 0; i < wlList.Count; i++)
            {
                sb.Append(ExportLine(wlList[i]));
                sb.Append(lineSplitString);
            }
            return(sb.ToString());
        }
Esempio n. 9
0
 public string ExportLine(WordLibrary wl)
 {
     var sb = new StringBuilder();
     if (codeGenerater == null)
     {
         codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
     }
     if (codeGenerater.Is1CharMutiCode)
     {
         IList<string> codes = codeGenerater.GetCodeOfString(wl.Word);
         int i = 0;
         foreach (string code in codes)
         {
             sb.Append(wl.Word);
             sb.Append("\t");
             sb.Append(code);
             sb.Append("\t");
             sb.Append(wl.Count);
             i++;
             if (i != codes.Count)
                 sb.Append("\r\n");
         }
     }
     else
     {
         sb.Append(wl.Word);
         sb.Append("\t");
         if (CodeType == CodeType.Pinyin||CodeType==CodeType.TerraPinyin)
         {
             sb.Append(wl.GetPinYinString(" ", BuildType.None));
         }
         else if (CodeType == wl.CodeType)
         {
             sb.Append(wl.Codes[0][0]);
         }
         else
         {
             if (codeGenerater.Is1Char1Code)
             {
                 sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word), " "));
             }
             else
             {
                 sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word)));
             }
         }
         sb.Append("\t");
         sb.Append(wl.Count);
     }
     return sb.ToString();
 }
Esempio n. 10
0
        private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType)
        {
            if (wordLibraryList.Count == 0)
            {
                return;
            }
            if (wordLibraryList[0].CodeType == CodeType.NoCode && codeType == CodeType.UserDefinePhrase)
            {
                codeType = CodeType.Pinyin;
            }
            IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType);

            if (generater == null) //未知编码方式,则不进行编码。
            {
                return;
            }
            countWord     = wordLibraryList.Count;
            currentStatus = 0;
            foreach (WordLibrary wordLibrary in wordLibraryList)
            {
                currentStatus++;
                processMessage = "生成目标编码:" + currentStatus + "/" + countWord;
                if (wordLibrary.CodeType == codeType)
                {
                    continue;
                }
                if (wordLibrary.CodeType == CodeType.English)
                {
                    wordLibrary.SetCode(CodeType.English, wordLibrary.Word.ToLower());
                    continue;
                }
                try
                {
                    generater.GetCodeOfWordLibrary(wordLibrary);
                }
                catch (Exception ex)
                {
                    Debug.WriteLine("生成编码失败" + ex.Message);
                }
                if (codeType != CodeType.Unknown)
                {
                    wordLibrary.CodeType = codeType;
                }
            }
        }
Esempio n. 11
0
        public IList <string> Export(WordLibraryList wlList)
        {
            codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
            var sb = new StringBuilder();

            for (int i = 0; i < wlList.Count; i++)
            {
                var line = ExportLine(wlList[i]);
                if (!string.IsNullOrEmpty(line))
                {
                    sb.Append(line);
                    sb.Append(lineSplitString);
                }
            }
            return(new List <string>()
            {
                sb.ToString()
            });
        }
Esempio n. 12
0
        public void Prepare()
        {
            codeGenerater = CodeTypeHelper.GetGenerater(this.UserDefiningPattern.CodeType);
            if (UserDefiningPattern.CodeType == CodeType.UserDefine)
            {
                if (string.IsNullOrEmpty(UserDefiningPattern.MappingTablePath))
                {
                    throw new Exception("未指定字符编码映射文件,无法对词库进行自定义编码的生成");
                }
                IDictionary <char, IList <string> > dict =
                    UserCodingHelper.GetCodingDict(UserDefiningPattern.MappingTablePath,
                                                   UserDefiningPattern.TextEncoding);
                var g = codeGenerater as SelfDefiningCodeGenerater;
                g.MappingDictionary  = dict;
                g.Is1Char1Code       = UserDefiningPattern.IsPinyinFormat;
                g.MutiWordCodeFormat = UserDefiningPattern.MutiWordCodeFormat;
            }

            BuildLineFormat();
        }
Esempio n. 13
0
        public void Prepare()
        {

            codeGenerater = CodeTypeHelper.GetGenerater(this.UserDefiningPattern.CodeType);
            if (UserDefiningPattern.CodeType == CodeType.UserDefine)
            {
                if (string.IsNullOrEmpty(UserDefiningPattern.MappingTablePath))
                {
                    throw new Exception("未指定字符编码映射文件,无法对词库进行自定义编码的生成");
                }
                IDictionary<char, IList<string>> dict =
                    UserCodingHelper.GetCodingDict(UserDefiningPattern.MappingTablePath,
                        UserDefiningPattern.TextEncoding);
                var g = codeGenerater as SelfDefiningCodeGenerater;
                g.MappingDictionary = dict;
                g.Is1Char1Code = UserDefiningPattern.IsPinyinFormat;
                g.MutiWordCodeFormat = UserDefiningPattern.MutiWordCodeFormat;
            }

            BuildLineFormat();
        }
Esempio n. 14
0
        public virtual WordLibraryList ImportText(string str)
        {
            pinyinFactory = new WordPinyinGenerater();

            var wlList = new WordLibraryList();
            string[] words = str.Split(new[] {'\r', '\n'}, StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0; i < words.Length; i++)
            {
                try
                {
                    string word = words[i].Trim();
                    if (word != string.Empty)
                    {
                        wlList.AddWordLibraryList(ImportLine(word));
                    }
                }
                catch
                {
                }
            }
            return wlList;
        }
Esempio n. 15
0
        public virtual WordLibraryList ImportText(string str)
        {
            pinyinFactory = new WordPinyinGenerater();

            var wlList = new WordLibraryList();

            string[] words = str.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0; i < words.Length; i++)
            {
                try
                {
                    string word = words[i].Trim();
                    if (word != string.Empty)
                    {
                        wlList.AddWordLibraryList(ImportLine(word));
                    }
                }
                catch
                {
                }
            }
            return(wlList);
        }
Esempio n. 16
0
 public void Setup()
 {
     generater = new PinyinGenerater();
 }
Esempio n. 17
0
        //private RimeConfigForm form;

        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            if (this.CodeType == wl.CodeType && this.CodeType != CodeType.Pinyin && CodeType != CodeType.TerraPinyin)
            {
                return(wl.Word + "\t" + wl.Codes[0][0] + "\t" + wl.Rank);
            }

            if (codeGenerater == null)
            {
                codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
            }
            try
            {
                codeGenerater.GetCodeOfWordLibrary(wl);
            }
            catch (Exception ex)
            {
                Debug.Fail(ex.Message);
                return(null);
            }


            if (codeGenerater.Is1CharMutiCode)
            {
                IList <string> codes = codeGenerater.GetCodeOfString(wl.Word).ToCodeString(" ");
                int            i     = 0;
                foreach (string code in codes)
                {
                    sb.Append(wl.Word);
                    sb.Append("\t");
                    sb.Append(code);
                    sb.Append("\t");
                    sb.Append(wl.Rank);
                    i++;
                    if (i != codes.Count)
                    {
                        sb.Append(lineSplitString);
                    }
                }
            }
            else
            {
                sb.Append(wl.Word);
                sb.Append("\t");
                if (CodeType == CodeType.Pinyin || CodeType == CodeType.TerraPinyin)
                {
                    sb.Append(wl.GetPinYinString(" ", BuildType.None));
                }
                else if (CodeType == wl.CodeType)
                {
                    sb.Append(wl.Codes[0][0]);
                }
                else
                {
                    sb.Append(wl.Codes.ToCodeString(" ")[0]);
                }
                sb.Append("\t");
                sb.Append(wl.Rank);
            }
            return(sb.ToString());
        }
Esempio n. 18
0
 public void CodingString(WordLibrary wl, IWordCodeGenerater factory)
 {
     var codes = new List<string>();
     foreach (char c in wl.Word)
     {
         string code = factory.GetCodeOfChar(c);
         codes.Add(code);
     }
     wl.PinYin = codes.ToArray();
 }
Esempio n. 19
0
 public void Setup()
 {
     generater = new PinyinGenerater();
 }
Esempio n. 20
0
 public void Setup()
 {
     generater = new ErbiGenerater();
 }
Esempio n. 21
0
 public void Setup()
 {
     generater = new QingsongErbiGenerater();
 }
Esempio n. 22
0
 public void Setup()
 {
     generater = new QingsongErbiGenerater();
 }
 public void Setup()
 {
     generater = new ZhuyinGenerater();
 }
Esempio n. 24
0
 public void Setup()
 {
     generater = new ZhuyinGenerater();
 }
Esempio n. 25
0
        private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType)
        {
            if (wordLibraryList.Count == 0)
            {
                return;
            }
            if (wordLibraryList[0].CodeType == CodeType.NoCode && codeType == CodeType.UserDefinePhrase)
            {
                codeType = CodeType.Pinyin;
            }
            IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType);

            if (generater == null) //未知编码方式,则不进行编码。
            {
                return;
            }
            countWord     = wordLibraryList.Count;
            currentStatus = 0;
            Regex spaceRegex     = new Regex("(?=[^a-zA-Z])\\s+");
            Regex numberRegex    = new Regex("[0-90-9]+");
            Regex englishRegex   = new Regex("[a-zA-Za-zA-Z]+");
            Regex fullWidthRegex = new Regex("[\uff00-\uff5e]+");
            // Regex fullWidthRegex = new Regex("[a-zA-Z0-9]+");
            // Regex punctuationRegex = new Regex("[-・·&%']");
            Regex punctuationRegex = new Regex("[\u0021-\u002f\u003a-\u0040\u005b-\u0060\u007b-\u008f\u00a0-\u00bf\u00d7\u00f7\u2000-\u2bff\u3000-\u303f\u30a0\u30fb\uff01-\uff0f\uff1a-\uff20\uff5b-\uff65]");


            foreach (WordLibrary wordLibrary in wordLibraryList)
            {
                currentStatus++;
                processMessage = "生成目标编码:" + currentStatus + "/" + countWord;
                if (wordLibrary.CodeType == codeType)
                {
                    continue;
                }
                if (wordLibrary.CodeType == CodeType.English)
                {
                    wordLibrary.SetCode(CodeType.English, wordLibrary.Word.ToLower());
                    continue;
                }
                try
                {
                    string word_0 = wordLibrary.Word;
                    string word   = wordLibrary.Word;

                    if (FilterConfig.FullWidth && fullWidthRegex.IsMatch(word))
                    {
                        char[] c = word.ToCharArray();
                        for (int i = 0; i < c.Length; i++)
                        {
                            if (c[i] <= 0xff5e && c[i] >= 0xff00)
                            {
                                c[i] = (char)(c[i] - 65248);
                            }
                        }
                        word = new String(c);
                    }

                    if (FilterConfig.KeepNumber_)
                    {
                        word = numberRegex.Replace(word, "");
                    }

                    if (FilterConfig.KeepEnglish_)
                    {
                        word = englishRegex.Replace(word, "");
                    }

                    if (FilterConfig.KeepSpace_)
                    {
                        if (FilterConfig.KeepSpace == false)
                        {
                            word = word.Replace(" ", "");
                        }
                        else
                        {
                            word = spaceRegex.Replace(word, "");
                        }
                    }

                    if (FilterConfig.KeepPunctuation_)
                    {
                        word = punctuationRegex.Replace(word, "");
                    }

                    if (FilterConfig.ChsNumber)
                    {
                        word = TranslateChineseNumber(word);
                    }

                    if ((englishRegex.IsMatch(word) && FilterConfig.KeepEnglish) || (numberRegex.IsMatch(word) && FilterConfig.KeepNumber) || (punctuationRegex.IsMatch(word) && FilterConfig.KeepPunctuation))
                    {
                        StringBuilder          input  = new StringBuilder();
                        List <IList <string> > output = new List <IList <string> >();

                        int clipType = -1; int type = 0;

                        foreach (char c in word)
                        {
                            if (c >= 0x30 && c <= 0x39)
                            {
                                type = 1;
                            }
                            else if (c >= 0x41 && c <= 0x5a)
                            {
                                type = 2;
                            }
                            else if (c >= 0x61 && c <= 0x7a)
                            {
                                type = 2;
                            }
                            else if (c == 0x20 && FilterConfig.KeepSpace && clipType == 2)
                            {
                                type = 2;
                            }
                            else if ("-・&%'".Contains(c))
                            {
                                type = 3;
                            }
                            else if (punctuationRegex.IsMatch(c.ToString()))
                            {
                                type = 3;
                            }
                            else
                            {
                                type = 0;
                            }
                            if (input.Length < 1)
                            {
                                clipType = type;
                                input.Append(c);
                            }
                            else if (type == clipType)
                            {
                                input.Append(c);
                            }

                            else
                            {
                                if (clipType == 2 && FilterConfig.KeepEnglish)
                                {
                                    if (FilterConfig.needEnglishTag())
                                    {
                                        output.Add(new List <string> {
                                            '_' + input.ToString()
                                        });
                                    }
                                    else
                                    {
                                        output.Add(new List <string> {
                                            input.ToString()
                                        });
                                    }
                                }
                                else if ((clipType == 1 && FilterConfig.KeepNumber) || (clipType == 3 && FilterConfig.KeepPunctuation))
                                {
                                    output.Add(new List <string> {
                                        input.ToString()
                                    });
                                }
                                else
                                {
                                    wordLibrary.Word     = input.ToString();
                                    wordLibrary.CodeType = CodeType.NoCode;
                                    generater.GetCodeOfWordLibrary(wordLibrary);
                                    output.AddRange(wordLibrary.Codes);
                                }
                                input.Clear();
                                input.Append(c);
                                clipType = type;
                            }
                        }

                        if (input.Length > 0)
                        {
                            if (clipType == 2 && FilterConfig.KeepEnglish)
                            {
                                if (FilterConfig.needEnglishTag())
                                {
                                    output.Add(new List <string> {
                                        '_' + input.ToString()
                                    });
                                }
                                else
                                {
                                    output.Add(new List <string> {
                                        input.ToString()
                                    });
                                }
                            }
                            else if ((clipType == 1 && FilterConfig.KeepNumber) || (clipType == 3 && FilterConfig.KeepPunctuation))
                            {
                                output.Add(new List <string> {
                                    input.ToString()
                                });
                            }
                            else
                            {
                                wordLibrary.Word     = input.ToString();
                                wordLibrary.CodeType = CodeType.NoCode;
                                generater.GetCodeOfWordLibrary(wordLibrary);
                                output.AddRange(wordLibrary.Codes);
                            }
                        }

                        wordLibrary.Word  = word_0;
                        wordLibrary.Codes = new Code(output);
                    }
                    else
                    {
                        if (word.Equals(word_0))
                        {
                            generater.GetCodeOfWordLibrary(wordLibrary);
                        }
                        else
                        {
                            wordLibrary.Word = word;
                            generater.GetCodeOfWordLibrary(wordLibrary);
                            wordLibrary.Word = word_0;
                        }
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine("生成编码失败" + ex.Message);
                }
                if (codeType != CodeType.Unknown)
                {
                    wordLibrary.CodeType = codeType;
                }
            }
        }