Exemplo n.º 1
0
        private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType)
        {
            if (wordLibraryList.Count == 0)
            {
                return;
            }
            if (wordLibraryList[0].CodeType == CodeType.NoCode && codeType == CodeType.UserDefinePhrase)
            {
                codeType = CodeType.Pinyin;
            }
            IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType);

            if (generater == null) //未知编码方式,则不进行编码。
            {
                return;
            }
            countWord     = wordLibraryList.Count;
            currentStatus = 0;
            Regex spaceRegex     = new Regex("(?=[^a-zA-Z])\\s+");
            Regex numberRegex    = new Regex("[0-90-9]+");
            Regex englishRegex   = new Regex("[a-zA-Za-zA-Z]+");
            Regex fullWidthRegex = new Regex("[\uff00-\uff5e]+");
            // Regex fullWidthRegex = new Regex("[a-zA-Z0-9]+");
            // Regex punctuationRegex = new Regex("[-・·&%']");
            Regex punctuationRegex = new Regex("[\u0021-\u002f\u003a-\u0040\u005b-\u0060\u007b-\u008f\u00a0-\u00bf\u00d7\u00f7\u2000-\u2bff\u3000-\u303f\u30a0\u30fb\uff01-\uff0f\uff1a-\uff20\uff5b-\uff65]");


            foreach (WordLibrary wordLibrary in wordLibraryList)
            {
                currentStatus++;
                processMessage = "生成目标编码:" + currentStatus + "/" + countWord;
                if (wordLibrary.CodeType == codeType)
                {
                    continue;
                }
                if (wordLibrary.CodeType == CodeType.English)
                {
                    wordLibrary.SetCode(CodeType.English, wordLibrary.Word.ToLower());
                    continue;
                }
                try
                {
                    string word_0 = wordLibrary.Word;
                    string word   = wordLibrary.Word;

                    if (FilterConfig.FullWidth && fullWidthRegex.IsMatch(word))
                    {
                        char[] c = word.ToCharArray();
                        for (int i = 0; i < c.Length; i++)
                        {
                            if (c[i] <= 0xff5e && c[i] >= 0xff00)
                            {
                                c[i] = (char)(c[i] - 65248);
                            }
                        }
                        word = new String(c);
                    }

                    if (FilterConfig.KeepNumber_)
                    {
                        word = numberRegex.Replace(word, "");
                    }

                    if (FilterConfig.KeepEnglish_)
                    {
                        word = englishRegex.Replace(word, "");
                    }

                    if (FilterConfig.KeepSpace_)
                    {
                        if (FilterConfig.KeepSpace == false)
                        {
                            word = word.Replace(" ", "");
                        }
                        else
                        {
                            word = spaceRegex.Replace(word, "");
                        }
                    }

                    if (FilterConfig.KeepPunctuation_)
                    {
                        word = punctuationRegex.Replace(word, "");
                    }

                    if (FilterConfig.ChsNumber)
                    {
                        word = TranslateChineseNumber(word);
                    }

                    if ((englishRegex.IsMatch(word) && FilterConfig.KeepEnglish) || (numberRegex.IsMatch(word) && FilterConfig.KeepNumber) || (punctuationRegex.IsMatch(word) && FilterConfig.KeepPunctuation))
                    {
                        StringBuilder          input  = new StringBuilder();
                        List <IList <string> > output = new List <IList <string> >();

                        int clipType = -1; int type = 0;

                        foreach (char c in word)
                        {
                            if (c >= 0x30 && c <= 0x39)
                            {
                                type = 1;
                            }
                            else if (c >= 0x41 && c <= 0x5a)
                            {
                                type = 2;
                            }
                            else if (c >= 0x61 && c <= 0x7a)
                            {
                                type = 2;
                            }
                            else if (c == 0x20 && FilterConfig.KeepSpace && clipType == 2)
                            {
                                type = 2;
                            }
                            else if ("-・&%'".Contains(c))
                            {
                                type = 3;
                            }
                            else if (punctuationRegex.IsMatch(c.ToString()))
                            {
                                type = 3;
                            }
                            else
                            {
                                type = 0;
                            }
                            if (input.Length < 1)
                            {
                                clipType = type;
                                input.Append(c);
                            }
                            else if (type == clipType)
                            {
                                input.Append(c);
                            }

                            else
                            {
                                if (clipType == 2 && FilterConfig.KeepEnglish)
                                {
                                    if (FilterConfig.needEnglishTag())
                                    {
                                        output.Add(new List <string> {
                                            '_' + input.ToString()
                                        });
                                    }
                                    else
                                    {
                                        output.Add(new List <string> {
                                            input.ToString()
                                        });
                                    }
                                }
                                else if ((clipType == 1 && FilterConfig.KeepNumber) || (clipType == 3 && FilterConfig.KeepPunctuation))
                                {
                                    output.Add(new List <string> {
                                        input.ToString()
                                    });
                                }
                                else
                                {
                                    wordLibrary.Word     = input.ToString();
                                    wordLibrary.CodeType = CodeType.NoCode;
                                    generater.GetCodeOfWordLibrary(wordLibrary);
                                    output.AddRange(wordLibrary.Codes);
                                }
                                input.Clear();
                                input.Append(c);
                                clipType = type;
                            }
                        }

                        if (input.Length > 0)
                        {
                            if (clipType == 2 && FilterConfig.KeepEnglish)
                            {
                                if (FilterConfig.needEnglishTag())
                                {
                                    output.Add(new List <string> {
                                        '_' + input.ToString()
                                    });
                                }
                                else
                                {
                                    output.Add(new List <string> {
                                        input.ToString()
                                    });
                                }
                            }
                            else if ((clipType == 1 && FilterConfig.KeepNumber) || (clipType == 3 && FilterConfig.KeepPunctuation))
                            {
                                output.Add(new List <string> {
                                    input.ToString()
                                });
                            }
                            else
                            {
                                wordLibrary.Word     = input.ToString();
                                wordLibrary.CodeType = CodeType.NoCode;
                                generater.GetCodeOfWordLibrary(wordLibrary);
                                output.AddRange(wordLibrary.Codes);
                            }
                        }

                        wordLibrary.Word  = word_0;
                        wordLibrary.Codes = new Code(output);
                    }
                    else
                    {
                        if (word.Equals(word_0))
                        {
                            generater.GetCodeOfWordLibrary(wordLibrary);
                        }
                        else
                        {
                            wordLibrary.Word = word;
                            generater.GetCodeOfWordLibrary(wordLibrary);
                            wordLibrary.Word = word_0;
                        }
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine("生成编码失败" + ex.Message);
                }
                if (codeType != CodeType.Unknown)
                {
                    wordLibrary.CodeType = codeType;
                }
            }
        }