public override void GetCodeOfWordLibrary(WordLibrary wl)
        {
            if (wl.CodeType == CodeType.TerraPinyin)
            {
                return;
            }
            if (wl.CodeType == CodeType.Pinyin) //如果本来就是拼音输入法导入的,那么就用其拼音,不过得加上音调
            {

                for (int i = 0; i < wl.Codes.Count; i++)
                {
                    var row = wl.Codes[i];
                    for (int j = 0; j < row.Count; j++)
                    {
                        string s = row[j];
                        string py =PinyinHelper.AddToneToPinyin(wl.Word[i], s); //add tone
                        wl.Codes[i][j] = py;
                    }
                }

               
                return ;
            }
            base.GetCodeOfWordLibrary(wl);
        }
Esempio n. 2
0
 public void SpaceFilterTest(string word,bool isKeep)
 {
     var wl = new WordLibrary();
     wl.Word = word;
     SpaceFilter filter=new SpaceFilter();
     Assert.AreEqual(filter.IsKeep(wl), isKeep);
 }
Esempio n. 3
0
 public override void GetCodeOfWordLibrary(WordLibrary wl)
 {
     if (wl.CodeType == CodeType.Pinyin)
     {
         return;
     }
     if (wl.CodeType == CodeType.TerraPinyin) //要去掉音调
     {
         for (int i = 0; i < wl.Codes.Count; i++)
         {
             var row = wl.Codes[i];
             for (int j = 0; j < row.Count; j++)
             {
                 string s = row[j];
                 string py = s.Remove(s.Length - 1); //remove tone
                 wl.Codes[i][j] = py;
             }
         }
         return;
     }
     //不是拼音,就调用GetCode生成拼音
     var code= GetCodeOfString(wl.Word);
     wl.Codes = code;
     wl.CodeType=CodeType.Pinyin;
 }
Esempio n. 4
0
 public void ChinesePunctuationFilterTest(string word, bool isKeep)
 {
     var wl = new WordLibrary();
     wl.Word = word;
     ChinesePunctuationFilter filter = new ChinesePunctuationFilter();
     Assert.AreEqual(filter.IsKeep(wl), isKeep);
 }
 public string ExportLine(WordLibrary wl)
 {
     if (string.IsNullOrEmpty(UserDefiningPattern.MappingTablePath))
     {
         if (wl.CodeType != CodeType.Pinyin)
         {
             throw new Exception("未指定字符编码映射文件,无法对词库进行自定义编码的生成");
         }
         else if (wl.Codes.Count == 0 || wl.Codes[0].Count == 0)
         {//是拼音,但是没有给出拼音
             throw new Exception("未指定字符编码映射文件,无法对词库进行自定义编码的生成");
         }
         //自定义拼音格式
         IDictionary<char,string> dic=new Dictionary<char, string>();
         for (var i=0;i< wl.Word.Length;i++)
         {
             if(!dic.ContainsKey(wl.Word[i]))
             dic.Add(wl.Word[i],wl.PinYin[i]);
         }
         return UserDefiningPattern.BuildWLString(dic,wl.Count);
     }
     else//自定义编码模式
     {
         var codes = codeGenerater.GetCodeOfString(wl.Word);
         return UserDefiningPattern.BuildWLString(wl.Word, codes[0], wl.Count);
     }
 }
Esempio n. 6
0
        public virtual WordLibraryList ImportLine(string line)
        {
            var wlList = new WordLibraryList();
            string[] strs = line.Split(' ');

            for (int i = 1; i < strs.Length; i++)
            {
                var oriWord = strs[i];
                string word = oriWord.Replace(",", ""); //把汉字中带有逗号的都去掉逗号
                //var list = pinyinFactory.GetCodeOfString(word);
                //for (int j = 0; j < list.Count; j++)
                //{
                var wl = new WordLibrary();
                wl.Word = oriWord;
                //if (IsWubi)
                //{
                //    wl.SetCode(CodeType.Wubi, strs[0]);
                //}
                //wl.PinYin = CollectionHelper.ToArray(list);
                wl.SetCode(this.CodeType,strs[0]);
                wlList.Add(wl);
                //}
            }
            return wlList;
        }
Esempio n. 7
0
 public IList<string> GetCodeOfWordLibrary(WordLibrary str, string charCodeSplit = "")
 {
     if (str.CodeType == CodeType.Pinyin)
     {
         return new List<string> {str.GetPinYinString("", BuildType.None)};
     }
     return CollectionHelper.Descartes(str.Codes);
 }
Esempio n. 8
0
 public string ExportLine(WordLibrary wl)
 {
     var sb = new StringBuilder();
     sb.Append(wl.SingleCode);
     sb.Append(" ");
     sb.Append(wl.Word);
     return sb.ToString();
 }
Esempio n. 9
0
        public string ExportLine(WordLibrary wl)
        {
            //StringBuilder sb = new StringBuilder();

            string str = wl.GetPinYinString("'", BuildType.LeftContain) + " " + wl.Word;

            return str;
        }
Esempio n. 10
0
        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            sb.Append(wl.GetPinYinString("'", BuildType.None));
            sb.Append("\t");
            sb.Append(wl.Word);
            return sb.ToString();
        }
Esempio n. 11
0
 public ParsePattern()
 {
     Sort = new List<int> { 1, 2, 3 };
     sample = new WordLibrary();
     sample.Count = 1234;
     sample.Word = "深蓝词库转换";
     sample.PinYin = new[] { "shen", "lan", "ci", "ku", "zhuan", "huan" };
     IsPinyinFormat = true;
 }
        public IList<string> GetCodeOfWordLibrary(WordLibrary wl, string charCodeSplit = "")
        {
            if (wl.CodeType == CodeType.Pinyin && IsPinyinCode)
            {
                return CollectionHelper.DescarteIndex1(wl.Codes);
            }

            return GetCodeOfString(wl.Word, charCodeSplit);
        }
Esempio n. 13
0
        public override string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();
            sb.Append(factory.GetCodeOfString(wl.Word)[0]);
            sb.Append(" ");
            sb.Append(wl.Word);

            return sb.ToString();
        }
Esempio n. 14
0
 public override void GetCodeOfWordLibrary(WordLibrary wl)
 {
     if (wl.CodeType == CodeType.Pinyin)
     {
         wl.SetCode(CodeType.UserDefinePhrase,wl.GetPinYinString("", BuildType.None));
     }
     var codes= CollectionHelper.Descartes(wl.Codes);
     wl.SetCode(CodeType.UserDefinePhrase, codes[0]);
 }
Esempio n. 15
0
 public void TestPinyin2TerraPinyin()
 {
     WordLibrary wl=new WordLibrary(){Word = "深蓝",Rank = 123,PinYin = new []{"shen","lan"},CodeType = CodeType.Pinyin};
      generater.GetCodeOfWordLibrary(wl);
     foreach (var py in wl.Codes)
     {
         Debug.WriteLine(py);
     }
   
 }
Esempio n. 16
0
        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            sb.Append(wubiGenerater.GetCodeOfString(wl.Word)[0]);
            sb.Append(" ");
            sb.Append(wl.Word);

            return sb.ToString();
        }
Esempio n. 17
0
        public void TestChar2TerraPinyin(string word,string pinyin)
        {
            WordLibrary wl = new WordLibrary() { Word =word, Rank = 123, CodeType = CodeType.NoCode };
            generater.GetCodeOfWordLibrary(wl);
            foreach (var py in wl.Codes.ToCodeString(" "))
            {
                Debug.WriteLine(py);
            }

        }
Esempio n. 18
0
 /// <summary>
 /// 将一行纯文本转换为对象
 /// </summary>
 /// <param name="line"></param>
 /// <returns></returns>
 public virtual WordLibraryList ImportLine(string line)
 {
     IList<string> py = pinyinFactory.GetCodeOfString(line);
     var wl = new WordLibrary();
     wl.Word = line;
     wl.PinYin = CollectionHelper.ToArray(py);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
Esempio n. 19
0
        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();
            sb.Append("<ns1:DictionaryEntry>\r\n");
            sb.Append("<ns1:InputString>" + GetPinyinWithTone(wl) + "</ns1:InputString>\r\n");
            sb.Append("<ns1:OutputString>" + wl.Word + "</ns1:OutputString>\r\n");
            sb.Append("<ns1:Exist>1</ns1:Exist>\r\n");
            sb.Append("</ns1:DictionaryEntry>");

            return sb.ToString();
        }
Esempio n. 20
0
 public WordLibraryList ImportLine(string line)
 {
     string[] c = line.Split('\t');
     var wl = new WordLibrary();
     wl.Word = c[0];
     wl.Rank = Convert.ToInt32(c[2]);
     wl.PinYin = c[1].Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
Esempio n. 21
0
 public void GetCodeOfWordLibrary(WordLibrary wl)
 {
     if (wl.CodeType == CodeType.Pinyin)
     {
         var code = ChaoyinHelper.GetChaoyin(wl.PinYin);
         wl.SetCode(CodeType.Chaoyin, code);
     }
     else
     {
         wl.SetCode(CodeType.Chaoyin, GetCodeOfString(wl.Word));
     }
 }
Esempio n. 22
0
 public WordLibraryList ImportLine(string line)
 {
     string py = line.Split(' ')[0];
     string word = line.Split(' ')[1];
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Rank = 1;
     wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
Esempio n. 23
0
        public WordLibraryList ImportLine(string line)
        {
            string[] wp = line.Split('\t');

            string word = wp[0];
            var wl = new WordLibrary();
            wl.Word = word;
            wl.Count = Convert.ToInt32(wp[1]);
            wl.PinYin = new string[] {};
            var wll = new WordLibraryList();
            wll.Add(wl);
            return wll;
        }
Esempio n. 24
0
 public WordLibraryList ImportLine(string line)
 {
     string[] c = line.Split(' ');
     var wl = new WordLibrary();
     string code = c[0];
     wl.Word = c[1];
     wl.Count = DefaultRank;
     wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word));
     wl.AddCode(CodeType, code);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
Esempio n. 25
0
 public WordLibraryList ImportLine(string line)
 {
     string[] c = line.Split(' ');
     var wl = new WordLibrary();
     string code = c[0];
     wl.Word = c[1];
     wl.Rank = DefaultRank;
     wl.SetCode(CodeType.Cangjie, pyGenerater.GetCodeOfString(wl.Word));
     wl.SetCode(CodeType, code);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
Esempio n. 26
0
 public string ExportLine(WordLibrary wl)
 {
     var sb = new StringBuilder();
     if (codeGenerater == null)
     {
         codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
     }
     if (codeGenerater.Is1CharMutiCode)
     {
         IList<string> codes = codeGenerater.GetCodeOfString(wl.Word);
         int i = 0;
         foreach (string code in codes)
         {
             sb.Append(wl.Word);
             sb.Append("\t");
             sb.Append(code);
             sb.Append("\t");
             sb.Append(wl.Count);
             i++;
             if (i != codes.Count)
                 sb.Append("\r\n");
         }
     }
     else
     {
         sb.Append(wl.Word);
         sb.Append("\t");
         if (CodeType == CodeType.Pinyin||CodeType==CodeType.TerraPinyin)
         {
             sb.Append(wl.GetPinYinString(" ", BuildType.None));
         }
         else if (CodeType == wl.CodeType)
         {
             sb.Append(wl.Codes[0][0]);
         }
         else
         {
             if (codeGenerater.Is1Char1Code)
             {
                 sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word), " "));
             }
             else
             {
                 sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word)));
             }
         }
         sb.Append("\t");
         sb.Append(wl.Count);
     }
     return sb.ToString();
 }
Esempio n. 27
0
        public WordLibraryList ImportLine(string line)
        {
            string[] sp = line.Split(' ');
            string py = sp[1];
            string word = sp[0];

            var wl = new WordLibrary {CodeType = CodeType.Pinyin};
            wl.Word = word;
            wl.Rank = DefaultRank;
            wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries);
            var wll = new WordLibraryList();
            wll.Add(wl);
            return wll;
        }
Esempio n. 28
0
        public WordLibraryList ImportLine(string line)
        {
            string[] sp = line.Split(',');

            string word = sp[0];
            int count = Convert.ToInt32(sp[1]);
            var wl = new WordLibrary();
            wl.Word = word;
            wl.Rank = count;
            wl.PinYin = new string[] {};
            var wll = new WordLibraryList();
            wll.Add(wl);
            return wll;
        }
Esempio n. 29
0
        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            sb.Append(wl.Word);
            sb.Append("\t");
            if (!wl.IsEnglish)
            {
                sb.Append(wl.GetPinYinString("'", BuildType.RightContain));
                sb.Append("\t");
            }
            sb.Append(wl.Count);
            return sb.ToString();
        }
Esempio n. 30
0
 public string ExportLine(WordLibrary wl)
 {
     var sb = new StringBuilder();
     string py = wl.GetPinYinString("'", BuildType.None);
     sb.Append(py);
     sb.Append(" ");
     sb.Append(wl.Word);
     sb.Append(" ");
     sb.Append(number);
     sb.Append(" Z, ");
     sb.Append(py);
     sb.Append(" ");
     sb.Append(number);
     return sb.ToString();
 }
Esempio n. 31
0
        public string BuildWlString(WordLibrary wl)
        {
            string code = "";

            if (ContainCode)
            {
                if (IsPinyinFormat)
                {
                    code = CollectionHelper.GetString(CollectionHelper.DescarteIndex1(wl.Codes), CodeSplitString, CodeSplitType);
                }
                else//多字一码,根据用户设置的编码规则,生成编码
                {
                    code = wl.SingleCode;
                }
            }
            return(BuildWlString(wl.Word, code, wl.Count));
        }
Esempio n. 32
0
        ////没有什么思路,接下来的代码写得乱七八糟的,但是好像还是对的。zengyi20101114
        ////如果wl中提供了拼音数组,而且自定义格式也是拼音格式,那么就只转换格式即可。
        //public string BuildWLString(WordLibrary wl)
        //{
        //    string py = "", cp = "";
        //    var sb = new StringBuilder();
        //    if (ContainCode)
        //    {
        //        if (IsPinyinFormat)
        //        {
        //            py = wl.GetPinYinString(CodeSplitString, CodeSplitType);
        //        }
        //        else
        //        {
        //            selfFactory.MutiWordCodeFormat = MutiWordCodeFormat;
        //            py = selfFactory.GetCodeOfString(wl.Word)[0];
        //        }
        //    }
        //    if (ContainRank)
        //    {
        //        cp = wl.Count.ToString();
        //    }
        //    var dic = new Dictionary<int, string>();
        //    dic.Add(Sort[0], py);
        //    dic.Add(Sort[1], wl.Word);
        //    dic.Add(Sort[2], cp);
        //    var newSort = new List<int>(Sort);
        //    newSort.Sort();
        //    foreach (int x in newSort)
        //    {
        //        if (dic[x] != "")
        //        {
        //            sb.Append(dic[x] + SplitString);
        //        }
        //    }
        //    string str = sb.ToString();
        //    return str.Substring(0, str.LastIndexOf(SplitString));
        //}

        public WordLibrary BuildWordLibrary(string line)
        {
            var wl = new WordLibrary();

            string[] strlist = line.Split(new[] { SplitString }, StringSplitOptions.RemoveEmptyEntries);
            var      newSort = new List <int>(Sort);

            newSort.Sort();
            if (isPinyin)
            {
                int index1 = Sort.FindIndex(i => i == newSort[0]); //最小的一个
                if (index1 == 0 && ContainCode)                    //第一个是拼音
                {
                    wl.PinYinString = strlist[0];
                }
                if (index1 == 1)
                {
                    wl.Word = strlist[0];
                }
                if (index1 == 2 && ContainRank)
                {
                    wl.Count = Convert.ToInt32(strlist[0]);
                }
                if (strlist.Length > 1)
                {
                    int index2 = Sort.FindIndex(i => i == newSort[1]); //中间的一个
                    if (index2 == 0 && ContainCode)                    //第一个是拼音
                    {
                        wl.PinYinString = strlist[1];
                    }
                    if (index2 == 1)
                    {
                        wl.Word = strlist[1];
                    }
                    if (index2 == 2 && ContainRank)
                    {
                        wl.Count = Convert.ToInt32(strlist[1]);
                    }
                }
                if (strlist.Length > 2)
                {
                    int index2 = Sort.FindIndex(i => i == newSort[2]); //最大的一个
                    if (index2 == 0 && ContainCode)                    //第一个是拼音
                    {
                        wl.PinYinString = strlist[2];
                    }
                    if (index2 == 1)
                    {
                        wl.Word = strlist[2];
                    }
                    if (index2 == 2 && ContainRank)
                    {
                        wl.Count = Convert.ToInt32(strlist[2]);
                    }
                }

                wl.PinYin = wl.PinYinString.Split(new[] { CodeSplitString }, StringSplitOptions.RemoveEmptyEntries);
            }
            else//不是拼音,那么就抛弃直接加入Unknown Code。
            {
                int index1 = Sort.FindIndex(i => i == newSort[0]); //最小的一个
                if (index1 == 0 && ContainCode) //第一个是Code
                {
                    wl.SetCode(CodeType.Unknown, strlist[0]);
                }
                if (index1 == 1)
                {
                    wl.Word = strlist[0];
                }
                if (index1 == 2 && ContainRank)
                {
                    wl.Count = Convert.ToInt32(strlist[0]);
                }
                if (strlist.Length > 1)
                {
                    int index2 = Sort.FindIndex(i => i == newSort[1]); //中间的一个
                    if (index2 == 0 && ContainCode)                    //第一个是Code
                    {
                        wl.SetCode(CodeType.Unknown, strlist[1]);
                    }
                    if (index2 == 1)
                    {
                        wl.Word = strlist[1];
                    }
                    if (index2 == 2 && ContainRank)
                    {
                        wl.Count = Convert.ToInt32(strlist[1]);
                    }
                }
                if (strlist.Length > 2)
                {
                    int index2 = Sort.FindIndex(i => i == newSort[2]); //最大的一个
                    if (index2 == 0 && ContainCode)                    //第一个是拼音
                    {
                        wl.SetCode(CodeType.Unknown, strlist[2]);
                    }
                    if (index2 == 1)
                    {
                        wl.Word = strlist[2];
                    }
                    if (index2 == 2 && ContainRank)
                    {
                        wl.Count = Convert.ToInt32(strlist[2]);
                    }
                }
            }
            return(wl);
        }