Esempio n. 1
0
        public void TestPinyin2TerraPinyin()
        {
            WordLibrary wl = new WordLibrary()
            {
                Word = "深蓝", Rank = 123, PinYin = new [] { "shen", "lan" }, CodeType = CodeType.Pinyin
            };

            generater.GetCodeOfWordLibrary(wl);
            foreach (var py in wl.Codes)
            {
                Debug.WriteLine(py);
            }
        }
Esempio n. 2
0
        private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType)
        {
            IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType);

            if (generater == null) //未知编码方式,则不进行编码。
            {
                return;
            }
            countWord     = wordLibraryList.Count;
            currentStatus = 0;
            foreach (WordLibrary wordLibrary in wordLibraryList)
            {
                currentStatus++;
                processMessage = "生成目标编码:" + currentStatus + "/" + countWord;
                if (wordLibrary.CodeType == codeType)
                {
                    continue;
                }
                try
                {
                    generater.GetCodeOfWordLibrary(wordLibrary);
                }
                catch (Exception ex)
                {
                    Debug.WriteLine("生成编码失败" + ex.Message);
                }
                if (codeType != CodeType.Unknown)
                {
                    wordLibrary.CodeType = codeType;
                }
            }
        }
Esempio n. 3
0
        public string ExportLine(WordLibrary wl)
        {
            if (lineFormat == "")
            {
                BuildLineFormat();
            }
            var lines = new List <string>();
            //需要判断源WL与导出的字符串的CodeType是否一致,如果一致,那么可以采用其编码,如果不一致,那么忽略编码,
            //调用CodeGenerater生成新的编码,并用新编码生成行
            IList <string> codes = null;

            if (wl.CodeType != CodeType)
            {
                codeGenerater.GetCodeOfWordLibrary(wl);
            }
            string word = wl.Word;
            int    rank = wl.Rank;

            foreach (string code in wl.Codes.ToCodeString(UserDefiningPattern.CodeSplitString, UserDefiningPattern.CodeSplitType))
            {
                string line = String.Format(lineFormat, code, word, rank);
                lines.Add(line);
            }

            return(String.Join(UserDefiningPattern.LineSplitString, lines.ToArray()));
        }
Esempio n. 4
0
        //private RimeConfigForm form;

        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            if (codeGenerater == null)
            {
                codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
            }
            codeGenerater.GetCodeOfWordLibrary(wl);
            if (codeGenerater.Is1CharMutiCode)
            {
                IList <string> codes = codeGenerater.GetCodeOfString(wl.Word).ToCodeString(" ");
                int            i     = 0;
                foreach (string code in codes)
                {
                    sb.Append(wl.Word);
                    sb.Append("\t");
                    sb.Append(code);
                    sb.Append("\t");
                    sb.Append(wl.Rank);
                    i++;
                    if (i != codes.Count)
                    {
                        sb.Append(lineSplitString);
                    }
                }
            }
            else
            {
                sb.Append(wl.Word);
                sb.Append("\t");
                if (CodeType == CodeType.Pinyin || CodeType == CodeType.TerraPinyin)
                {
                    sb.Append(wl.GetPinYinString(" ", BuildType.None));
                }
                else if (CodeType == wl.CodeType)
                {
                    sb.Append(wl.Codes[0][0]);
                }
                else
                {
                    sb.Append(wl.Codes.ToCodeString(" ")[0]);
                }
                sb.Append("\t");
                sb.Append(wl.Rank);
            }
            return(sb.ToString());
        }
Esempio n. 5
0
        private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType)
        {
            if (wordLibraryList.Count == 0)
            {
                return;
            }
            if (wordLibraryList[0].CodeType == CodeType.NoCode && codeType == CodeType.UserDefinePhrase)
            {
                codeType = CodeType.Pinyin;
            }
            IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType);

            if (generater == null) //未知编码方式,则不进行编码。
            {
                return;
            }
            countWord     = wordLibraryList.Count;
            currentStatus = 0;
            foreach (WordLibrary wordLibrary in wordLibraryList)
            {
                currentStatus++;
                processMessage = "生成目标编码:" + currentStatus + "/" + countWord;
                if (wordLibrary.CodeType == codeType)
                {
                    continue;
                }
                if (wordLibrary.CodeType == CodeType.English)
                {
                    wordLibrary.SetCode(CodeType.English, wordLibrary.Word.ToLower());
                    continue;
                }
                try
                {
                    generater.GetCodeOfWordLibrary(wordLibrary);
                }
                catch (Exception ex)
                {
                    Debug.WriteLine("生成编码失败" + ex.Message);
                }
                if (codeType != CodeType.Unknown)
                {
                    wordLibrary.CodeType = codeType;
                }
            }
        }
Esempio n. 6
0
        private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType)
        {
            if (wordLibraryList.Count == 0)
            {
                return;
            }
            if (wordLibraryList[0].CodeType == CodeType.NoCode && codeType == CodeType.UserDefinePhrase)
            {
                codeType = CodeType.Pinyin;
            }
            IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType);

            if (generater == null) //未知编码方式,则不进行编码。
            {
                return;
            }
            countWord     = wordLibraryList.Count;
            currentStatus = 0;
            Regex spaceRegex     = new Regex("(?=[^a-zA-Z])\\s+");
            Regex numberRegex    = new Regex("[0-90-9]+");
            Regex englishRegex   = new Regex("[a-zA-Za-zA-Z]+");
            Regex fullWidthRegex = new Regex("[\uff00-\uff5e]+");
            // Regex fullWidthRegex = new Regex("[a-zA-Z0-9]+");
            // Regex punctuationRegex = new Regex("[-・·&%']");
            Regex punctuationRegex = new Regex("[\u0021-\u002f\u003a-\u0040\u005b-\u0060\u007b-\u008f\u00a0-\u00bf\u00d7\u00f7\u2000-\u2bff\u3000-\u303f\u30a0\u30fb\uff01-\uff0f\uff1a-\uff20\uff5b-\uff65]");


            foreach (WordLibrary wordLibrary in wordLibraryList)
            {
                currentStatus++;
                processMessage = "生成目标编码:" + currentStatus + "/" + countWord;
                if (wordLibrary.CodeType == codeType)
                {
                    continue;
                }
                if (wordLibrary.CodeType == CodeType.English)
                {
                    wordLibrary.SetCode(CodeType.English, wordLibrary.Word.ToLower());
                    continue;
                }
                try
                {
                    string word_0 = wordLibrary.Word;
                    string word   = wordLibrary.Word;

                    if (FilterConfig.FullWidth && fullWidthRegex.IsMatch(word))
                    {
                        char[] c = word.ToCharArray();
                        for (int i = 0; i < c.Length; i++)
                        {
                            if (c[i] <= 0xff5e && c[i] >= 0xff00)
                            {
                                c[i] = (char)(c[i] - 65248);
                            }
                        }
                        word = new String(c);
                    }

                    if (FilterConfig.KeepNumber_)
                    {
                        word = numberRegex.Replace(word, "");
                    }

                    if (FilterConfig.KeepEnglish_)
                    {
                        word = englishRegex.Replace(word, "");
                    }

                    if (FilterConfig.KeepSpace_)
                    {
                        if (FilterConfig.KeepSpace == false)
                        {
                            word = word.Replace(" ", "");
                        }
                        else
                        {
                            word = spaceRegex.Replace(word, "");
                        }
                    }

                    if (FilterConfig.KeepPunctuation_)
                    {
                        word = punctuationRegex.Replace(word, "");
                    }

                    if (FilterConfig.ChsNumber)
                    {
                        word = TranslateChineseNumber(word);
                    }

                    if ((englishRegex.IsMatch(word) && FilterConfig.KeepEnglish) || (numberRegex.IsMatch(word) && FilterConfig.KeepNumber) || (punctuationRegex.IsMatch(word) && FilterConfig.KeepPunctuation))
                    {
                        StringBuilder          input  = new StringBuilder();
                        List <IList <string> > output = new List <IList <string> >();

                        int clipType = -1; int type = 0;

                        foreach (char c in word)
                        {
                            if (c >= 0x30 && c <= 0x39)
                            {
                                type = 1;
                            }
                            else if (c >= 0x41 && c <= 0x5a)
                            {
                                type = 2;
                            }
                            else if (c >= 0x61 && c <= 0x7a)
                            {
                                type = 2;
                            }
                            else if (c == 0x20 && FilterConfig.KeepSpace && clipType == 2)
                            {
                                type = 2;
                            }
                            else if ("-・&%'".Contains(c))
                            {
                                type = 3;
                            }
                            else if (punctuationRegex.IsMatch(c.ToString()))
                            {
                                type = 3;
                            }
                            else
                            {
                                type = 0;
                            }
                            if (input.Length < 1)
                            {
                                clipType = type;
                                input.Append(c);
                            }
                            else if (type == clipType)
                            {
                                input.Append(c);
                            }

                            else
                            {
                                if (clipType == 2 && FilterConfig.KeepEnglish)
                                {
                                    if (FilterConfig.needEnglishTag())
                                    {
                                        output.Add(new List <string> {
                                            '_' + input.ToString()
                                        });
                                    }
                                    else
                                    {
                                        output.Add(new List <string> {
                                            input.ToString()
                                        });
                                    }
                                }
                                else if ((clipType == 1 && FilterConfig.KeepNumber) || (clipType == 3 && FilterConfig.KeepPunctuation))
                                {
                                    output.Add(new List <string> {
                                        input.ToString()
                                    });
                                }
                                else
                                {
                                    wordLibrary.Word     = input.ToString();
                                    wordLibrary.CodeType = CodeType.NoCode;
                                    generater.GetCodeOfWordLibrary(wordLibrary);
                                    output.AddRange(wordLibrary.Codes);
                                }
                                input.Clear();
                                input.Append(c);
                                clipType = type;
                            }
                        }

                        if (input.Length > 0)
                        {
                            if (clipType == 2 && FilterConfig.KeepEnglish)
                            {
                                if (FilterConfig.needEnglishTag())
                                {
                                    output.Add(new List <string> {
                                        '_' + input.ToString()
                                    });
                                }
                                else
                                {
                                    output.Add(new List <string> {
                                        input.ToString()
                                    });
                                }
                            }
                            else if ((clipType == 1 && FilterConfig.KeepNumber) || (clipType == 3 && FilterConfig.KeepPunctuation))
                            {
                                output.Add(new List <string> {
                                    input.ToString()
                                });
                            }
                            else
                            {
                                wordLibrary.Word     = input.ToString();
                                wordLibrary.CodeType = CodeType.NoCode;
                                generater.GetCodeOfWordLibrary(wordLibrary);
                                output.AddRange(wordLibrary.Codes);
                            }
                        }

                        wordLibrary.Word  = word_0;
                        wordLibrary.Codes = new Code(output);
                    }
                    else
                    {
                        if (word.Equals(word_0))
                        {
                            generater.GetCodeOfWordLibrary(wordLibrary);
                        }
                        else
                        {
                            wordLibrary.Word = word;
                            generater.GetCodeOfWordLibrary(wordLibrary);
                            wordLibrary.Word = word_0;
                        }
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine("生成编码失败" + ex.Message);
                }
                if (codeType != CodeType.Unknown)
                {
                    wordLibrary.CodeType = codeType;
                }
            }
        }