Exemple #1
0
        public string ExportLine(WordLibrary wl)
        {
            //StringBuilder sb = new StringBuilder();

            string str = wl.GetPinYinString("'", BuildType.None) + "," + wl.Word;

            return str;
        }
Exemple #2
0
 public string ExportLine(WordLibrary wl)
 {
     var sb = new StringBuilder();
     sb.Append(wl.GetPinYinString("", BuildType.None));
     sb.Append(" ");
     sb.Append(wl.Word);
     return sb.ToString();
 }
 /// <summary>
 /// 将一行纯文本转换为对象
 /// </summary>
 /// <param name="line"></param>
 /// <returns></returns>
 public virtual WordLibraryList ImportLine(string line)
 {
     var py = pinyinFactory.GetCodeOfString(line);
     var wl = new WordLibrary();
     wl.Word = line;
     wl.PinYin = ToArray(py);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
Exemple #4
0
        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            sb.Append(wubiFactory.GetCodeOfString(wl.Word)[0]);
            sb.Append(" ");
            sb.Append(wl.Word);

            return sb.ToString();
        }
Exemple #5
0
        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            sb.Append(WubiHelper.GetStringWubi86Code(wl.Word));
            sb.Append(" ");
            sb.Append(wl.Word);

            return sb.ToString();
        }
 public WordLibraryList ImportLine(string line)
 {
     string[] c = line.Split('\t');
     var wl = new WordLibrary();
     wl.Word = c[0];
     wl.Count = Convert.ToInt32(c[1]);
     wl.PinYin = c[2].Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
Exemple #7
0
 public WordLibraryList ImportLine(string line)
 {
     string py = line.Split(',')[0];
     string word = line.Split(',')[1];
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Count = 1;
     wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
        public void TestPinyin2TerraPinyin()
        {
            WordLibrary wl = new WordLibrary()
            {
                Word = "深蓝", Rank = 123, PinYin = new [] { "shen", "lan" }, CodeType = CodeType.Pinyin
            };

            generater.GetCodeOfWordLibrary(wl);
            foreach (var py in wl.Codes)
            {
                Debug.WriteLine(py);
            }
        }
        public void TestChar2TerraPinyin(string word, string pinyin)
        {
            WordLibrary wl = new WordLibrary()
            {
                Word = word, Rank = 123, CodeType = CodeType.NoCode
            };

            generater.GetCodeOfWordLibrary(wl);
            foreach (var py in wl.Codes.ToCodeString(" "))
            {
                Debug.WriteLine(py);
            }
        }
Exemple #10
0
 public WordLibraryList ImportLine(string line)
 {
     string[] lineArray = line.Split('\t');
     string py = lineArray[1];
     string word = lineArray[0];
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Count = Convert.ToInt32(lineArray[2]);
     wl.PinYin = py.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
Exemple #11
0
        /// <summary>
        ///     将一行纯文本转换为对象
        /// </summary>
        /// <param name="line"></param>
        /// <returns></returns>
        public virtual WordLibraryList ImportLine(string line)
        {
            //IList<string> py = pinyinFactory.GetCodeOfString(line);
            var wl = new WordLibrary();

            wl.Word     = line;
            wl.CodeType = CodeType;
            //wl.PinYin = CollectionHelper.ToArray(py);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
        public WordLibraryList ImportLine(string line)
        {
            string[] wp = line.Split('\t');

            string word = wp[0];
            var wl = new WordLibrary();
            wl.Word = word;
            wl.Count = Convert.ToInt32(wp[1]);
            wl.PinYin = new string[] {};
            var wll = new WordLibraryList();
            wll.Add(wl);
            return wll;
        }
Exemple #13
0
        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            sb.Append(wl.Word);
            sb.Append("\t");
            sb.Append(wl.Rank);
            sb.Append("\t");
            sb.Append(wl.GetPinYinString(" ", BuildType.None));


            return(sb.ToString());
        }
Exemple #14
0
        public WordLibraryList ImportLine(string line)
        {
            string[] c  = line.Split('\t');
            var      wl = new WordLibrary();

            wl.Word   = c[0];
            wl.Rank   = Convert.ToInt32(c[1]);
            wl.PinYin = c[2].Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
Exemple #15
0
 public IList <string> GetCodeOfWordLibrary(WordLibrary str, string charCodeSplit = "")
 {
     if (str.CodeType == CodeType.Pinyin)
     {
         return(new List <string>()
         {
             str.GetPinYinString("", BuildType.None)
         });
     }
     else
     {
         return(CollectionHelper.Descartes(str.Codes));
     }
 }
        public WordLibraryList ImportLine(string line)
        {
            string py   = line.Split('\t')[1];
            string word = line.Split('\t')[0];
            var    wl   = new WordLibrary();

            wl.Word   = word;
            wl.Count  = 1;
            wl.PinYin = py.Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
        public override void GetCodeOfWordLibrary(WordLibrary wl)
        {
            if (wl.CodeType == CodeType.English)
            {
                wl.SetCode(CodeType.UserDefinePhrase, wl.Word);
            }
            else if (wl.CodeType == CodeType.Pinyin)
            {
                wl.SetCode(CodeType.UserDefinePhrase, wl.GetPinYinString("", BuildType.None));
            }
            var codes = CollectionHelper.Descartes(wl.Codes);

            wl.SetCode(CodeType.UserDefinePhrase, codes[0]);
        }
        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            sb.Append(wl.Word);
            sb.Append("\t");
            if (!wl.IsEnglish)
            {
                sb.Append(wl.GetPinYinString("'", BuildType.RightContain));
                sb.Append("\t");
            }
            sb.Append(wl.Count);
            return(sb.ToString());
        }
 public override void GetCodeOfWordLibrary(WordLibrary wl)
 {
     base.GetCodeOfWordLibrary(wl);
     for (int i = 0; i < wl.Codes.Count; i++)
     {
         var row = wl.Codes[i];
         for (int j = 0; j < row.Count; j++)
         {
             string s  = row[j];
             string zy = ZhuyinHelper.GetZhuyin(s);
             wl.Codes[i][j] = zy;
         }
     }
 }
Exemple #20
0
        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            sb.Append(wl.Word);
            sb.Append("\t");
            if (!wl.IsEnglish)
            {
                sb.Append(wl.GetPinYinString("'", BuildType.RightContain));
                sb.Append("\t");
            }
            sb.Append(wl.Count);
            return sb.ToString();
        }
Exemple #21
0
        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            if (codeGenerater.Is1CharMutiCode)
            {
                IList <string> codes = codeGenerater.GetCodeOfString(wl.Word);
                int            i     = 0;
                foreach (string code in codes)
                {
                    sb.Append(wl.Word);
                    sb.Append("\t");
                    sb.Append(code);
                    sb.Append("\t");
                    sb.Append(wl.Count);
                    i++;
                    if (i != codes.Count)
                    {
                        sb.Append("\r\n");
                    }
                }
            }
            else
            {
                sb.Append(wl.Word);
                sb.Append("\t");
                if (CodeType == CodeType.Pinyin || CodeType == CodeType.TerraPinyin)
                {
                    sb.Append(wl.GetPinYinString(" ", BuildType.None));
                }
                else if (CodeType == wl.CodeType)
                {
                    sb.Append(wl.Codes[0][0]);
                }
                else
                {
                    if (codeGenerater.Is1Char1Code)
                    {
                        sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word), " "));
                    }
                    else
                    {
                        sb.Append(CollectionHelper.ListToString(codeGenerater.GetCodeOfString(wl.Word)));
                    }
                }
                sb.Append("\t");
                sb.Append(wl.Count);
            }
            return(sb.ToString());
        }
Exemple #22
0
 public WordLibraryList ImportLine(string line)
 {
     string code = line.Split(' ')[0];
     string word = line.Split(' ')[1];
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Count = DefaultRank;
     wl.PinYin = ToArray(pinyinFactory.GetCodeOfString(word));
     var wll = new WordLibraryList();
     if (wl.PinYin.Length > 0)
     {
         wll.Add(wl);
     }
     return wll;
 }
        public WordLibraryList ImportLine(string line)
        {
            string[] c    = line.Split(' ');
            var      wl   = new WordLibrary();
            string   code = c[0];

            wl.Word   = c[1];
            wl.Count  = DefaultRank;
            wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word));
            wl.SetCode(CodeType, code);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
Exemple #24
0
        public WordLibraryList ImportLine(string line)
        {
            string[] lineArray = line.Split('\t');
            string   py        = lineArray[1];
            string   word      = lineArray[0];
            var      wl        = new WordLibrary();

            wl.Word   = word;
            wl.Count  = Convert.ToInt32(lineArray[2]);
            wl.PinYin = py.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
Exemple #25
0
 public WordLibraryList ImportLine(string line)
 {
     line = line.Split(',')[0]; //如果有逗号,就只取第一个
     string[] sp = line.Split(' ');
     string py = sp[0];
     string word = sp[1];
     int count = Convert.ToInt32(sp[2]);
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Count = count;
     wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
        public override WordLibraryList ImportLine(string line)
        {
            string[] c    = line.Split(' ');
            var      wl   = new WordLibrary();
            string   code = c[0];

            wl.Word = c[1];
            wl.Rank = DefaultRank;
            wl.SetCode(CodeType.Cangjie, pyGenerater.GetCodeOfString(wl.Word));
            wl.SetCode(CodeType, code);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
Exemple #27
0
        public override WordLibraryList ImportLine(string line)
        {
            string[] wp = line.Split('\t');

            string word = wp[0];
            var    wl   = new WordLibrary();

            wl.Word   = word;
            wl.Rank   = Convert.ToInt32(wp[1]);
            wl.PinYin = new string[] {};
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
        //public Dictionary<char,string > CharAndPinyin=new Dictionary<char, string>();
        //private void AddWordAndPinyin(char word,string pinyin)
        //{
        //    if (!CharAndPinyin.ContainsKey(word))
        //    {
        //        CharAndPinyin.Add(word,pinyin);
        //    }
        //}
        /// <summary>
        ///     读取一个词语,格式为:
        ///     4字节的长度len
        ///     len*2字节的拼音(对于每个字,声母、韵母各1字节)
        ///     len*2字节的汉字(Unicode编码)
        /// </summary>
        /// <param name="fs"></param>
        /// <returns></returns>
        private WordLibrary ImportWord(FileStream fs)
        {
            int show        = 0;
            var wordLibrary = new WordLibrary();
            var temp        = new byte[4];

            fs.Read(temp, 0, 4);
            int len = BitConverter.ToInt32(temp, 0);

            if (len == 0)
            {
                Debug.WriteLine(fs.Position);
                return(null);
                //return SpecialWord(fs);
            }
            var pinyinList = new List <string>();

            for (int i = 0; i < len; i++)
            {
                temp = new byte[2];
                fs.Read(temp, 0, 2);
                try
                {
                    string sm = Shengmu[temp[0]];
                    string ym = Yunmu[temp[1]];

                    pinyinList.Add(sm + ym);
                }
                catch (Exception e)
                {
                    Debug.WriteLine(e.Message);
                    show = temp[0];
                }
            }
            wordLibrary.PinYin = pinyinList.ToArray();
            temp = new byte[2 * len];
            fs.Read(temp, 0, 2 * len);
            wordLibrary.Word = Encoding.Unicode.GetString(temp);
            //for (var i = 0; i < wordLibrary.Word.Length;i++ )
            //{
            //    AddWordAndPinyin(wordLibrary.Word[i], wordLibrary.PinYin[i]);
            //}
            if (show > 0)
            {
                Debug.WriteLine(show + "  " + wordLibrary.Word + "----" + wordLibrary.PinYinString);
            }
            return(wordLibrary);
        }
Exemple #29
0
        //private RimeConfigForm form;

        public string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            if (codeGenerater == null)
            {
                codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
            }
            codeGenerater.GetCodeOfWordLibrary(wl);
            if (codeGenerater.Is1CharMutiCode)
            {
                IList <string> codes = codeGenerater.GetCodeOfString(wl.Word).ToCodeString(" ");
                int            i     = 0;
                foreach (string code in codes)
                {
                    sb.Append(wl.Word);
                    sb.Append("\t");
                    sb.Append(code);
                    sb.Append("\t");
                    sb.Append(wl.Rank);
                    i++;
                    if (i != codes.Count)
                    {
                        sb.Append(lineSplitString);
                    }
                }
            }
            else
            {
                sb.Append(wl.Word);
                sb.Append("\t");
                if (CodeType == CodeType.Pinyin || CodeType == CodeType.TerraPinyin)
                {
                    sb.Append(wl.GetPinYinString(" ", BuildType.None));
                }
                else if (CodeType == wl.CodeType)
                {
                    sb.Append(wl.Codes[0][0]);
                }
                else
                {
                    sb.Append(wl.Codes.ToCodeString(" ")[0]);
                }
                sb.Append("\t");
                sb.Append(wl.Rank);
            }
            return(sb.ToString());
        }
Exemple #30
0
        public WordLibraryList Import(string path)
        {
            var wordLibraryList = new WordLibraryList();
            var fs = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x350;
            do
            {
                try
                {
                    WordLibrary wl = ImportWord(fs);
                    if (wl.Word != "" && wl.PinYin.Length > 0)
                    {
                        wordLibraryList.Add(wl);
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
            } while (fs.Position != fs.Length);
            fs.Close();
            //StreamWriter sw=new StreamWriter("D:\\py.txt",true,Encoding.Unicode);
            //SinglePinyin singlePinyin=new SinglePinyin();

            //foreach (var cpy in CharAndPinyin)
            //{
            //    var py = "";
            //    try
            //    {
            //        py = singlePinyin.GetPinYinOfChar(cpy.Key)[0];
            //    }
            //    catch
            //    {
            //        Debug.Write(cpy.Key);
            //    }
            //    sw.WriteLine(cpy.Key+"\t"+ py+"\t"+cpy.Value);
            //}
            //sw.Close();

            //wordLibraryList.ForEach(delegate(WordLibrary wl) { if(wl.Word==""||wl.PinYin.Length==0)
            //{
            //    Debug.WriteLine(wl.ToDisplayString());
            //}
            //});

            return(wordLibraryList);
        }
Exemple #31
0
        public WordLibraryList ImportLine(string line)
        {
            var wl = new WordLibrary();

            wl.Word      = line.Split('\t')[1];
            wl.CodeType  = CodeType;
            wl.IsEnglish = IsEnglish(wl.Word);
            if (wl.IsEnglish)
            {
                wl.SetCode(CodeType.English, wl.Word);
            }
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
 public override WordLibraryList ImportLine(string line)
 {
     if (line.IndexOf("'") == 0)
     {
         string py   = line.Split(' ')[0];
         string word = line.Split(' ')[1];
         var    wl   = new WordLibrary();
         wl.Word   = word;
         wl.Rank   = 1;
         wl.PinYin = py.Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries);
         var wll = new WordLibraryList();
         wll.Add(wl);
         return(wll);
     }
     return(null);
 }
Exemple #33
0
        public string ExportLine(WordLibrary wl)
        {
            var    sb = new StringBuilder();
            string py = wl.GetPinYinString("'", BuildType.None);

            sb.Append(py);
            sb.Append(" ");
            sb.Append(wl.Word);
            sb.Append(" ");
            sb.Append(number);
            sb.Append(" Z, ");
            sb.Append(py);
            sb.Append(" ");
            sb.Append(number);
            return(sb.ToString());
        }
        public WordLibraryList ImportLine(string line)
        {
            string[] sp = line.Split(',');

            string word  = sp[0];
            int    count = Convert.ToInt32(sp[1]);
            var    wl    = new WordLibrary();

            wl.Word   = word;
            wl.Count  = count;
            wl.PinYin = new string[] {};
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
        public void TestExport1()
        {
            WordLibraryList wl  = new WordLibraryList();
            var             wl1 = new WordLibrary()
            {
                Word = "曾毅曾诚", PinYin = new string[] { "zeng", "yi", "zeng", "cheng" }, CodeType = CodeType.Pinyin
            };

            wl.Add(wl1);

            var export = new Win10MsPinyinSelfStudy();
            //export.ExportFilePath = "c:\\Temp\\win10selfstudy5.dat";
            var filePath = export.Export(wl);

            Debug.WriteLine(filePath[0]);
        }
Exemple #36
0
        public WordLibraryList ImportLine(string line)
        {
            string code = line.Split(' ')[0];
            string word = line.Split(' ')[1];
            var    wl   = new WordLibrary();

            wl.Word   = word;
            wl.Count  = DefaultRank;
            wl.PinYin = ToArray(pinyinFactory.GetCodeOfString(word));
            var wll = new WordLibraryList();

            if (wl.PinYin.Length > 0)
            {
                wll.Add(wl);
            }
            return(wll);
        }
Exemple #37
0
        //4字节使用同一个拼音的词条数x,2字节拼音长度n,n字节拼音的编号,(2字节汉字的长度y,y*2字节汉字的内容Unicode编码,2字节词频,2字节未知,4字节未知)*x

        #region IWordLibraryImport Members

        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position   = 0x18;
            CountWord     = BinFileHelper.ReadInt32(fs);
            CurrentStatus = 0;
            fs.Position   = 0x30;

            while (CurrentStatus < CountWord)
            {
                int   samePyCount = BinFileHelper.ReadInt16(fs);
                int   unkown1     = BinFileHelper.ReadInt16(fs);
                short pyLength    = BinFileHelper.ReadInt16(fs);
                var   pyArray     = new string[pyLength / 2];
                for (int i = 0; i < pyLength / 2; i++)
                {
                    short idx = BinFileHelper.ReadInt16(fs);
                    try
                    {
                        pyArray[i] = PinYinDic[idx];
                    }
                    catch
                    {
                        pyArray[i] = "--";
                    }
                }
                for (int i = 0; i < samePyCount; i++)
                {
                    short wordByteLength = BinFileHelper.ReadInt16(fs);
                    var   wordArray      = new byte[wordByteLength];
                    fs.Read(wordArray, 0, wordByteLength);
                    string word    = Encoding.Unicode.GetString(wordArray);
                    short  count   = BinFileHelper.ReadInt16(fs);
                    short  count2  = BinFileHelper.ReadInt16(fs);
                    int    unknown = BinFileHelper.ReadInt32(fs); //不知道干啥的
                    var    wl      = new WordLibrary {
                        Count = count, Word = word, PinYin = pyArray
                    };
                    pyAndWord.Add(wl);
                    CurrentStatus++;
                }
            }
            return(pyAndWord);
        }
Exemple #38
0
        //private IWordCodeGenerater pyGenerater = new PinyinGenerater();
        public WordLibraryList ImportLine(string str)
        {
            var list = new WordLibraryList();

            string[] words = str.Split(' ');
            for (int i = 1; i < words.Length; i++)
            {
                string word = words[i];
                var    wl   = new WordLibrary();
                wl.Word  = word;
                wl.Count = DefaultRank;
                wl.SetCode(CodeType, words[0]);
                //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(word));
                list.Add(wl);
            }
            return(list);
        }
        public string ExportLine(WordLibrary wl)
        {
            var codes = wl.Codes;

            if (IsShortCode)
            {
                codes = new Code();
                foreach (var c in wl.Codes)
                {
                    codes.Add(new List <string>()
                    {
                        c[0][0].ToString()
                    });
                }
            }
            return(string.Format(PhraseFormat, wl.Word, CollectionHelper.Descartes(codes)[0], wl.Rank == 0?DefaultRank:wl.Rank));
        }
Exemple #40
0
        public virtual string ExportLine(WordLibrary wl)
        {
            var sb = new StringBuilder();

            if (string.IsNullOrEmpty(wl.WubiCode))
            {
                sb.Append(wubiFactory.GetCodeOfString(wl.Word)[0]);
            }
            else
            {
                sb.Append(wl.WubiCode);
            }
            sb.Append(" ");
            sb.Append(wl.Word);

            return(sb.ToString());
        }
Exemple #41
0
        public WordLibraryList ImportLine(string line)
        {
            line = line.Split(',')[0]; //如果有逗号,就只取第一个
            string[] sp    = line.Split(' ');
            string   py    = sp[0];
            string   word  = sp[1];
            int      count = Convert.ToInt32(sp[2]);
            var      wl    = new WordLibrary();

            wl.Word   = word;
            wl.Count  = count;
            wl.PinYin = py.Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
Exemple #42
0
 public virtual WordLibraryList ImportLine(string line)
 {
     var wlList = new WordLibraryList();
     string[] strs = line.Split(' ');
     for (int i = 1; i < strs.Length; i++)
     {
         string word = strs[i].Replace(",", ""); //把汉字中带有逗号的都去掉逗号
         var list = pinyinFactory.GetCodeOfString(word);
         for (int j = 0; j < list.Count; j++)
         {
             var wl = new WordLibrary();
             wl.Word = word;
             wl.PinYin = ToArray(list);
             wlList.Add(wl);
         }
     }
     return wlList;
 }
Exemple #43
0
        public override WordLibraryList ImportLine(string line)
        {
            var wll = new WordLibraryList();

            var    array1 = line.Split('(');
            string word   = array1[0];
            string py     = array1[1].Split(')')[0];

            var wl = new WordLibrary();

            wl.Word   = word;
            wl.Rank   = 1;
            wl.PinYin = py.Split(new[] { '|' }, StringSplitOptions.RemoveEmptyEntries);

            wll.Add(wl);

            return(wll);
        }
        public string ExportLine(WordLibrary wl)
        {
            try
            {
                var sb = new StringBuilder();

                string str = wl.Word;
                for (int j = 0; j < str.Length; j++)
                {
                    sb.Append(str[j] + wl.PinYin[j]);
                }

                return sb.ToString();
            }
            catch
            {
                return "";
            }
        }
Exemple #45
0
        public WordLibraryList ImportLine(string line)
        {
            var wl = new WordLibrary();
            string[] array = line.Split('\t');

            wl.Word = array[0];
            if (array.Length == 2) //English
            {
                wl.IsEnglish = true;
                wl.Count = Convert.ToInt32(array[1]);
            }
            else
            {
                string py = line.Split('\t')[1];
                wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries);
                wl.Count = Convert.ToInt32(array[2]);
            }

            var wll = new WordLibraryList();
            wll.Add(wl);
            return wll;
        }
Exemple #46
0
 public WordLibraryList Import(string path)
 {
     IWordCodeGenerater pinyinFactory = new WordPinyinGenerater();
     IList<string> words = Parse(path);
     var wll = new WordLibraryList();
     foreach (string word in words)
     {
         var wl = new WordLibrary();
         if (IsChinese(word)) //是中文就要进行注音
         {
             var list = pinyinFactory.GetCodeOfString(word);
             wl.PinYin = ToArray(list);
         }
         else
         {
             wl.IsEnglish = true;
         }
         wl.Word = word;
         wl.Count = DefaultRank;
         wll.Add(wl);
     }
     return wll;
 }
        private WordLibrary ImportWord(FileStream fs)
        {
            var wordLibrary = new WordLibrary();
            var temp = new byte[2];
            fs.Read(temp, 0, 2);
            short len = BitConverter.ToInt16(temp, 0);
            fs.Read(temp, 0, 2); //what's the meaning of these 2 bytes?
            var pinyinList = new List<string>();
            for (int i = 0; i < len; i++)
            {
                temp = new byte[2];
                fs.Read(temp, 0, 2);

                pinyinList.Add(Shengmu[temp[0]] + Yunmu[temp[1]]);
            }
            wordLibrary.PinYin = pinyinList.ToArray();
            temp = new byte[2*len];
            fs.Read(temp, 0, 2*len);
            wordLibrary.Word = Encoding.Unicode.GetString(temp);
            //for (var i = 0; i < wordLibrary.Word.Length;i++ )
            //{
            //    AddWordAndPinyin(wordLibrary.Word[i], wordLibrary.PinYin[i]);
            //}
            return wordLibrary;
        }
 public WordLibraryList ImportLine(string word)
 {
     string hz = "";
     var py = new List<string>();
     int j;
     for (j = 0; j < word.Length - 1; j++)
     {
         hz += word[j];
         if (word[j + 1] > 'z') //而且后面跟的不是拼音
         {
             py.Add(single.GetCodeOfChar(word[j]));
         }
         else //后面跟拼音
         {
             int k = 1;
             string py1 = "";
             while (j + k != word.Length && word[j + k] <= 'z')
             {
                 py1 += word[j + k];
                 k++;
             }
             py.Add(py1);
             j += k - 1; //减1是因为接下来会运行j++
         }
     }
     if (j == word.Length - 1) //最后一个字是汉字
     {
         hz += word[j];
         py.Add(single.GetCodeOfChar(word[j]));
     }
     var wl = new WordLibrary();
     wl.PinYin = py.ToArray();
     wl.Word = hz;
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
 public string ExportLine(WordLibrary wl)
 {
     return wl.Word + "\t" + (54999 + wl.Count);
 }
 public bool IsKeep(WordLibrary wl)
 {
     return !englishRegex.IsMatch(wl.Word);
 }
        public string ExportLine(WordLibrary wl)
        {
            string line = UserDefiningPattern.BuildWLString(wl);

            return line;
        }
Exemple #52
0
 private bool IsKeep(WordLibrary wordLibrary)
 {
     foreach (ISingleFilter filter in Filters)
     {
         if (!filter.IsKeep(wordLibrary))
         {
             return false;
         }
     }
     return true;
 }
 private void GenerateCode( WordLibrary wl)
 {
     var word = wl.Word;
     if (SelectedParsePattern.IsPinyin&&SelectedParsePattern.IsPinyinFormat)
     {
         var py = pyFactory.GetCodeOfString(word, SelectedParsePattern.CodeSplitString);
         wl.PinYin = CollectionHelper.ToArray(py);
     }
     else
     {
         if (!string.IsNullOrEmpty(SelectedParsePattern.MappingTablePath))
         {
             SelectedParsePattern.MappingTable = UserCodingHelper.GetCodingDict(SelectedParsePattern.MappingTablePath);
         }
         selfFactory.MappingDictionary = SelectedParsePattern.MappingTable;
         selfFactory.Is1Char1Code = SelectedParsePattern.IsPinyinFormat;
         selfFactory.MutiWordCodeFormat = SelectedParsePattern.MutiWordCodeFormat;
         wl.SetCode(CodeType.UserDefine, selfFactory.GetCodeOfString(word, SelectedParsePattern.CodeSplitString));
     }
 }
Exemple #54
0
 public WordLibraryList Import(string str)
 {
     GlobalCache.CharList.Clear();
     GlobalCache.Stackes.Clear();
     GlobalCache.WordList.Clear();
     var fs = new FileStream(str, FileMode.Open, FileAccess.Read);
     ParseHeader(fs);
     TouchPalChar rootChar = TouchPalChar.Load(fs); //载入第一个字
     LoadTree(fs, rootChar);
     fs.Close();
     var wwl = new WordLibraryList();
     foreach (int i in GlobalCache.WordList.Keys)
     {
         TouchPalWord w = GlobalCache.WordList[i];
         var wl = new WordLibrary();
         wl.Count = w.Count;
         wl.PinYin = w.PinYin.ToArray();
         wl.Word = w.ChineseWord;
         //sb.AppendLine(py + "\t" + GlobalCache.WordList[i].ChineseWord + "\t" + GlobalCache.WordList[i].Count);
         wwl.Add(wl);
     }
     return wwl;
 }
Exemple #55
0
 /// <summary>
 /// 将一个词加入到最后一个节点
 /// </summary>
 /// <param name="rootChar"></param>
 /// <param name="wl"></param>
 /// <param name="begin"></param>
 /// <returns></returns>
 private TouchPalChar AddWordLink2Char(TouchPalChar rootChar, WordLibrary wl, int begin)
 {
     var rootWord = new TouchPalWord {ChineseWord = wl.Word, Count = wl.Count};
     TouchPalChar lastChar = rootChar;
     if (begin > 0)
     {
         lastChar = lastChar.Word.Chars[begin - 1];
     }
     var chars = new TouchPalChar[wl.Word.Length];
     rootWord.Chars = chars;
     for (int i = 0; i < begin; i++)
     {
         chars[i] = rootChar.Word.Chars[i];
     }
     for (int i = begin; i < wl.Word.Length; i++)
     {
         char c = wl.Word[i];
         string py = wl.PinYin[i];
         var tpc = new TouchPalChar();
         tpc.Char = c;
         tpc.PinyinCode = GlobalCache.PinyinIndexMapping[py];
         tpc.WordIndex = i + 1;
         short s = tpc.IndexAndPinYin;
         AddChar2Next(lastChar, tpc);
         lastChar = tpc;
         chars[i] = lastChar;
     }
     lastChar.Word = rootWord;
     return lastChar;
 }
 private void btnTest_Click(object sender, EventArgs e)
 {
     var lines= rtbFrom.Text.Split(new char[] {'\r', '\n'}, StringSplitOptions.RemoveEmptyEntries);
     StringBuilder sb=new StringBuilder();
     foreach (var line in lines)
     {
         WordLibrary wl=new WordLibrary(){Word = line.Trim(),Count = 1234};
         GenerateCode( wl);
         sb.Append(SelectedParsePattern.BuildWlString(wl)+"\r\n");
     }
     rtbTo.Text = sb.ToString();
 }
        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs = new FileStream(path, FileMode.Open, FileAccess.Read);
            fs.Position = 0x18;
            CountWord = BinFileHelper.ReadInt32(fs);
            CurrentStatus = 0;
            fs.Position = 0x30;

            while (CurrentStatus < CountWord)
            {
                int samePyCount = BinFileHelper.ReadInt16(fs);
                int unkown1 = BinFileHelper.ReadInt16(fs);
                short pyLength = BinFileHelper.ReadInt16(fs);
                var pyArray = new string[pyLength/2];
                for (int i = 0; i < pyLength/2; i++)
                {
                    short idx = BinFileHelper.ReadInt16(fs);
                    try
                    {
                        pyArray[i] = PinYinDic[idx];
                    }
                    catch
                    {
                        pyArray[i] = "--";
                    }
                }
                for (int i = 0; i < samePyCount; i++)
                {
                    short wordByteLength = BinFileHelper.ReadInt16(fs);
                    var wordArray = new byte[wordByteLength];
                    fs.Read(wordArray, 0, wordByteLength);
                    string word = Encoding.Unicode.GetString(wordArray);
                    short count = BinFileHelper.ReadInt16(fs);
                    short count2 = BinFileHelper.ReadInt16(fs);
                    int unknown = BinFileHelper.ReadInt32(fs); //不知道干啥的
                    var wl = new WordLibrary {Count = count, Word = word, PinYin = pyArray};
                    pyAndWord.Add(wl);
                    CurrentStatus++;
                }
            }
            return pyAndWord;
        }
Exemple #58
0
 public string ExportLine(WordLibrary wl)
 {
     throw new NotImplementedException();
 }
Exemple #59
0
        /// <summary>
        /// 把一个词条一个字一个字的写入词库文件中
        /// </summary>
        /// <param name="fs"></param>
        /// <param name="wl"></param>
        /// <param name="isLastWord"></param>
        /// <param name="from"></param>
        /// <returns></returns>
        public int WriteWord(FileStream fs, WordLibrary wl, bool isLastWord)
        {
            var beginPosition = (int) fs.Position;
            int wordLength = wl.Word.Length;
            int charIndex;
            TouchPalChar stackChar = FindBeginPosition(wl.Word, out charIndex);

            for (int i = charIndex; i < wordLength; i++)
            {
                var item = new TouchPalChar();
                item.Char = wl.Word[i];
                item.BeginPosition = (int) fs.Position;
                string py = wl.PinYin[i];
                int pyIndex = GlobalCache.PinyinIndexMapping[py];
                var code = (short) (((i + 1) << 11) + pyIndex);
                fs.Write(BitConverter.GetBytes(code), 0, 2);
                int p1 = 0; //词频位置
                if (i == wordLength - 1) //最后一个字
                {
                    p1 = beginPosition + wordLength*26;
                }
                fs.Write(BitConverter.GetBytes(p1), 0, 4);
                int p2 = 0; //下个字位置
                if (i != wordLength - 1)
                {
                    p2 = beginPosition + (i + 1)*26;
                }
                fs.Write(BitConverter.GetBytes(p2), 0, 4);
                int p3 = 0; //跳转位置
                if (!isLastWord && i == 0)
                {
                    p3 = beginPosition + wordLength*28 + 5;
                }
                fs.Write(BitConverter.GetBytes(p3), 0, 4);
                int p4 = 0; //上个字位置
                if (charIndex == 0)
                {
                    if (i == 0)
                    {
                        p4 = GlobalCache.JumpChar.BeginPosition;
                        GlobalCache.JumpChar = item;
                    }
                    else
                    {
                        p4 = beginPosition + (i - 1)*26;
                    }
                }
                else
                {
                    p4 = stackChar.BeginPosition;
                }
                fs.Write(BitConverter.GetBytes(p4), 0, 4);
                int p5 = 4;
                if (charIndex == 0)
                {
                    if (i != 0)
                    {
                        p5 = p4;
                    }
                }
                else
                {
                    p5 = stackChar.PrevValidCharPosition;
                }
                item.PrevValidCharPosition = p5;
                fs.Write(BitConverter.GetBytes(p5), 0, 4);
                int p6 = 0;
                fs.Write(BitConverter.GetBytes(p6), 0, 4);
                GlobalCache.ExportStackes.Push(item);
            }
            int count = 96; // wl.Count;
            fs.Write(BitConverter.GetBytes(count), 0, 4);
            fs.WriteByte(0); //这个字节不知道干什么的
            byte[] wordByte = Encoding.Unicode.GetBytes(wl.Word);
            fs.Write(wordByte, 0, wordByte.Length);
            return beginPosition;
        }
 public virtual string ExportLine(WordLibrary wl)
 {
     return wl.Word;
 }