예제 #1
0
        private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType)
        {
            var generater = CodeTypeHelper.GetGenerater(codeType);

            if (generater == null)//未知编码方式,则不进行编码。
            {
                return;
            }
            countWord     = wordLibraryList.Count;
            currentStatus = 0;
            foreach (WordLibrary wordLibrary in wordLibraryList)
            {
                currentStatus++;
                processMessage = "生成目标编码:" + currentStatus + "/" + countWord;
                if (wordLibrary.CodeType == codeType)
                {
                    continue;
                }
                if (generater.IsBaseOnOldCode)
                {
                    wordLibrary.SetCode(codeType, generater.GetCodeOfWordLibrary(wordLibrary));
                }
                else
                {
                    wordLibrary.SetCode(codeType, generater.GetCodeOfString(wordLibrary.Word));
                }
            }
        }
예제 #2
0
        private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType)
        {
            IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType);

            if (generater == null) //未知编码方式,则不进行编码。
            {
                return;
            }
            countWord     = wordLibraryList.Count;
            currentStatus = 0;
            foreach (WordLibrary wordLibrary in wordLibraryList)
            {
                currentStatus++;
                processMessage = "生成目标编码:" + currentStatus + "/" + countWord;
                if (wordLibrary.CodeType == codeType)
                {
                    continue;
                }
                try
                {
                    generater.GetCodeOfWordLibrary(wordLibrary);
                }
                catch (Exception ex)
                {
                    Debug.WriteLine("生成编码失败" + ex.Message);
                }
                if (codeType != CodeType.Unknown)
                {
                    wordLibrary.CodeType = codeType;
                }
            }
        }
예제 #3
0
        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            if (codeGenerater == null)
            {
                codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
            }
            if (codeGenerater.Is1CharMutiCode)
            {
                IList <string> codes = codeGenerater.GetCodeOfString(wl.Word);
                int            i     = 0;
                foreach (string code in codes)
                {
                    sb.Append(wl.Word);
                    sb.Append("\t");
                    sb.Append(code);
                    sb.Append("\t");
                    sb.Append(wl.Count);
                    i++;
                    if (i != codes.Count)
                    {
                        sb.Append("\r\n");
                    }
                }
            }
            else
            {
                sb.Append(wl.Word);
                sb.Append("\t");
                if (CodeType == CodeType.Pinyin || CodeType == CodeType.TerraPinyin)
                {
                    sb.Append(wl.GetPinYinString(" ", BuildType.None));
                }
                else if (CodeType == wl.CodeType)
                {
                    sb.Append(wl.Codes[0][0]);
                }
                else
                {
                    if (codeGenerater.Is1Char1Code)
                    {
                        sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word), " "));
                    }
                    else
                    {
                        sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word)));
                    }
                }
                sb.Append("\t");
                sb.Append(wl.Count);
            }
            return(sb.ToString());
        }
예제 #4
0
        public string Export(WordLibraryList wlList)
        {
            codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
            var sb = new StringBuilder();

            for (int i = 0; i < wlList.Count; i++)
            {
                sb.Append(ExportLine(wlList[i]));
                sb.Append(lineSplitString);
            }
            return(sb.ToString());
        }
예제 #5
0
        private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType)
        {
            if (wordLibraryList.Count == 0)
            {
                return;
            }
            if (wordLibraryList[0].CodeType == CodeType.NoCode && codeType == CodeType.UserDefinePhrase)
            {
                codeType = CodeType.Pinyin;
            }
            IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType);

            if (generater == null) //未知编码方式,则不进行编码。
            {
                return;
            }
            countWord     = wordLibraryList.Count;
            currentStatus = 0;
            foreach (WordLibrary wordLibrary in wordLibraryList)
            {
                currentStatus++;
                processMessage = "生成目标编码:" + currentStatus + "/" + countWord;
                if (wordLibrary.CodeType == codeType)
                {
                    continue;
                }
                if (wordLibrary.CodeType == CodeType.English)
                {
                    wordLibrary.SetCode(CodeType.English, wordLibrary.Word.ToLower());
                    continue;
                }
                try
                {
                    generater.GetCodeOfWordLibrary(wordLibrary);
                }
                catch (Exception ex)
                {
                    Debug.WriteLine("生成编码失败" + ex.Message);
                }
                if (codeType != CodeType.Unknown)
                {
                    wordLibrary.CodeType = codeType;
                }
            }
        }
예제 #6
0
        public IList <string> Export(WordLibraryList wlList)
        {
            codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
            var sb = new StringBuilder();

            for (int i = 0; i < wlList.Count; i++)
            {
                var line = ExportLine(wlList[i]);
                if (!string.IsNullOrEmpty(line))
                {
                    sb.Append(line);
                    sb.Append(lineSplitString);
                }
            }
            return(new List <string>()
            {
                sb.ToString()
            });
        }
예제 #7
0
        public void Prepare()
        {
            codeGenerater = CodeTypeHelper.GetGenerater(this.UserDefiningPattern.CodeType);
            if (UserDefiningPattern.CodeType == CodeType.UserDefine)
            {
                if (string.IsNullOrEmpty(UserDefiningPattern.MappingTablePath))
                {
                    throw new Exception("未指定字符编码映射文件,无法对词库进行自定义编码的生成");
                }
                IDictionary <char, IList <string> > dict =
                    UserCodingHelper.GetCodingDict(UserDefiningPattern.MappingTablePath,
                                                   UserDefiningPattern.TextEncoding);
                var g = codeGenerater as SelfDefiningCodeGenerater;
                g.MappingDictionary  = dict;
                g.Is1Char1Code       = UserDefiningPattern.IsPinyinFormat;
                g.MutiWordCodeFormat = UserDefiningPattern.MutiWordCodeFormat;
            }

            BuildLineFormat();
        }
예제 #8
0
        //private RimeConfigForm form;

        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            if (this.CodeType == wl.CodeType && this.CodeType != CodeType.Pinyin && CodeType != CodeType.TerraPinyin)
            {
                return(wl.Word + "\t" + wl.Codes[0][0] + "\t" + wl.Rank);
            }

            if (codeGenerater == null)
            {
                codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
            }
            try
            {
                codeGenerater.GetCodeOfWordLibrary(wl);
            }
            catch (Exception ex)
            {
                Debug.Fail(ex.Message);
                return(null);
            }


            if (codeGenerater.Is1CharMutiCode)
            {
                IList <string> codes = codeGenerater.GetCodeOfString(wl.Word).ToCodeString(" ");
                int            i     = 0;
                foreach (string code in codes)
                {
                    sb.Append(wl.Word);
                    sb.Append("\t");
                    sb.Append(code);
                    sb.Append("\t");
                    sb.Append(wl.Rank);
                    i++;
                    if (i != codes.Count)
                    {
                        sb.Append(lineSplitString);
                    }
                }
            }
            else
            {
                sb.Append(wl.Word);
                sb.Append("\t");
                if (CodeType == CodeType.Pinyin || CodeType == CodeType.TerraPinyin)
                {
                    sb.Append(wl.GetPinYinString(" ", BuildType.None));
                }
                else if (CodeType == wl.CodeType)
                {
                    sb.Append(wl.Codes[0][0]);
                }
                else
                {
                    sb.Append(wl.Codes.ToCodeString(" ")[0]);
                }
                sb.Append("\t");
                sb.Append(wl.Rank);
            }
            return(sb.ToString());
        }
예제 #9
0
        private void GenerateDestinationCode(WordLibraryList wordLibraryList, CodeType codeType)
        {
            if (wordLibraryList.Count == 0)
            {
                return;
            }
            if (wordLibraryList[0].CodeType == CodeType.NoCode && codeType == CodeType.UserDefinePhrase)
            {
                codeType = CodeType.Pinyin;
            }
            IWordCodeGenerater generater = CodeTypeHelper.GetGenerater(codeType);

            if (generater == null) //未知编码方式,则不进行编码。
            {
                return;
            }
            countWord     = wordLibraryList.Count;
            currentStatus = 0;
            Regex spaceRegex     = new Regex("(?=[^a-zA-Z])\\s+");
            Regex numberRegex    = new Regex("[0-90-9]+");
            Regex englishRegex   = new Regex("[a-zA-Za-zA-Z]+");
            Regex fullWidthRegex = new Regex("[\uff00-\uff5e]+");
            // Regex fullWidthRegex = new Regex("[a-zA-Z0-9]+");
            // Regex punctuationRegex = new Regex("[-・·&%']");
            Regex punctuationRegex = new Regex("[\u0021-\u002f\u003a-\u0040\u005b-\u0060\u007b-\u008f\u00a0-\u00bf\u00d7\u00f7\u2000-\u2bff\u3000-\u303f\u30a0\u30fb\uff01-\uff0f\uff1a-\uff20\uff5b-\uff65]");


            foreach (WordLibrary wordLibrary in wordLibraryList)
            {
                currentStatus++;
                processMessage = "生成目标编码:" + currentStatus + "/" + countWord;
                if (wordLibrary.CodeType == codeType)
                {
                    continue;
                }
                if (wordLibrary.CodeType == CodeType.English)
                {
                    wordLibrary.SetCode(CodeType.English, wordLibrary.Word.ToLower());
                    continue;
                }
                try
                {
                    string word_0 = wordLibrary.Word;
                    string word   = wordLibrary.Word;

                    if (FilterConfig.FullWidth && fullWidthRegex.IsMatch(word))
                    {
                        char[] c = word.ToCharArray();
                        for (int i = 0; i < c.Length; i++)
                        {
                            if (c[i] <= 0xff5e && c[i] >= 0xff00)
                            {
                                c[i] = (char)(c[i] - 65248);
                            }
                        }
                        word = new String(c);
                    }

                    if (FilterConfig.KeepNumber_)
                    {
                        word = numberRegex.Replace(word, "");
                    }

                    if (FilterConfig.KeepEnglish_)
                    {
                        word = englishRegex.Replace(word, "");
                    }

                    if (FilterConfig.KeepSpace_)
                    {
                        if (FilterConfig.KeepSpace == false)
                        {
                            word = word.Replace(" ", "");
                        }
                        else
                        {
                            word = spaceRegex.Replace(word, "");
                        }
                    }

                    if (FilterConfig.KeepPunctuation_)
                    {
                        word = punctuationRegex.Replace(word, "");
                    }

                    if (FilterConfig.ChsNumber)
                    {
                        word = TranslateChineseNumber(word);
                    }

                    if ((englishRegex.IsMatch(word) && FilterConfig.KeepEnglish) || (numberRegex.IsMatch(word) && FilterConfig.KeepNumber) || (punctuationRegex.IsMatch(word) && FilterConfig.KeepPunctuation))
                    {
                        StringBuilder          input  = new StringBuilder();
                        List <IList <string> > output = new List <IList <string> >();

                        int clipType = -1; int type = 0;

                        foreach (char c in word)
                        {
                            if (c >= 0x30 && c <= 0x39)
                            {
                                type = 1;
                            }
                            else if (c >= 0x41 && c <= 0x5a)
                            {
                                type = 2;
                            }
                            else if (c >= 0x61 && c <= 0x7a)
                            {
                                type = 2;
                            }
                            else if (c == 0x20 && FilterConfig.KeepSpace && clipType == 2)
                            {
                                type = 2;
                            }
                            else if ("-・&%'".Contains(c))
                            {
                                type = 3;
                            }
                            else if (punctuationRegex.IsMatch(c.ToString()))
                            {
                                type = 3;
                            }
                            else
                            {
                                type = 0;
                            }
                            if (input.Length < 1)
                            {
                                clipType = type;
                                input.Append(c);
                            }
                            else if (type == clipType)
                            {
                                input.Append(c);
                            }

                            else
                            {
                                if (clipType == 2 && FilterConfig.KeepEnglish)
                                {
                                    if (FilterConfig.needEnglishTag())
                                    {
                                        output.Add(new List <string> {
                                            '_' + input.ToString()
                                        });
                                    }
                                    else
                                    {
                                        output.Add(new List <string> {
                                            input.ToString()
                                        });
                                    }
                                }
                                else if ((clipType == 1 && FilterConfig.KeepNumber) || (clipType == 3 && FilterConfig.KeepPunctuation))
                                {
                                    output.Add(new List <string> {
                                        input.ToString()
                                    });
                                }
                                else
                                {
                                    wordLibrary.Word     = input.ToString();
                                    wordLibrary.CodeType = CodeType.NoCode;
                                    generater.GetCodeOfWordLibrary(wordLibrary);
                                    output.AddRange(wordLibrary.Codes);
                                }
                                input.Clear();
                                input.Append(c);
                                clipType = type;
                            }
                        }

                        if (input.Length > 0)
                        {
                            if (clipType == 2 && FilterConfig.KeepEnglish)
                            {
                                if (FilterConfig.needEnglishTag())
                                {
                                    output.Add(new List <string> {
                                        '_' + input.ToString()
                                    });
                                }
                                else
                                {
                                    output.Add(new List <string> {
                                        input.ToString()
                                    });
                                }
                            }
                            else if ((clipType == 1 && FilterConfig.KeepNumber) || (clipType == 3 && FilterConfig.KeepPunctuation))
                            {
                                output.Add(new List <string> {
                                    input.ToString()
                                });
                            }
                            else
                            {
                                wordLibrary.Word     = input.ToString();
                                wordLibrary.CodeType = CodeType.NoCode;
                                generater.GetCodeOfWordLibrary(wordLibrary);
                                output.AddRange(wordLibrary.Codes);
                            }
                        }

                        wordLibrary.Word  = word_0;
                        wordLibrary.Codes = new Code(output);
                    }
                    else
                    {
                        if (word.Equals(word_0))
                        {
                            generater.GetCodeOfWordLibrary(wordLibrary);
                        }
                        else
                        {
                            wordLibrary.Word = word;
                            generater.GetCodeOfWordLibrary(wordLibrary);
                            wordLibrary.Word = word_0;
                        }
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine("生成编码失败" + ex.Message);
                }
                if (codeType != CodeType.Unknown)
                {
                    wordLibrary.CodeType = codeType;
                }
            }
        }