public override void GetCodeOfWordLibrary(WordLibrary wl)
        {
            if (wl.CodeType == CodeType.Pinyin)
            {
                wl.SetCode(CodeType.UserDefinePhrase, wl.GetPinYinString("", BuildType.None));
            }
            var codes = CollectionHelper.Descartes(wl.Codes);

            wl.SetCode(CodeType.UserDefinePhrase, codes[0]);
        }
 public void GetCodeOfWordLibrary(WordLibrary wl)
 {
     if (wl.CodeType == CodeType.Pinyin)
     {
         var code = ChaoyinHelper.GetChaoyin(wl.PinYin);
         wl.SetCode(CodeType.Chaoyin, code);
     }
     else
     {
         wl.SetCode(CodeType.Chaoyin, GetCodeOfString(wl.Word));
     }
 }
        public override WordLibraryList ImportLine(string line)
        {
            string[] c    = line.Split(' ');
            var      wl   = new WordLibrary();
            string   code = c[0];

            wl.Word = c[1];
            wl.Rank = DefaultRank;
            wl.SetCode(CodeType.Cangjie, pyGenerater.GetCodeOfString(wl.Word));
            wl.SetCode(CodeType, code);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
示例#4
0
        //private IWordCodeGenerater pyGenerater=new PinyinGenerater();
        public override WordLibraryList ImportLine(string line)
        {
            string[] lineArray = line.Split('\t');

            string word = lineArray[0];
            string code = lineArray[1];
            var    wl   = new WordLibrary();

            wl.Word = word;
            wl.Rank = Convert.ToInt32(lineArray[2]);
            if (CodeType == CodeType.Pinyin)
            {
                wl.PinYin = code.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            }
            else
            {
                //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word));
                wl.SetCode(CodeType, code);
            }


            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
示例#5
0
        public override WordLibraryList ImportLine(string line)
        {
            var wlList = new WordLibraryList();

            string[] strs = line.Split(' ');

            for (int i = 1; i < strs.Length; i++)
            {
                string oriWord = strs[i];
                string word    = oriWord.Replace(",", ""); //把汉字中带有逗号的都去掉逗号
                //var list = pinyinFactory.GetCodeOfString(word);
                //for (int j = 0; j < list.Count; j++)
                //{
                var wl = new WordLibrary();
                wl.Word = oriWord;
                //if (IsWubi)
                //{
                //    wl.SetCode(CodeType.Wubi, strs[0]);
                //}
                //wl.PinYin = CollectionHelper.ToArray(list);
                wl.SetCode(CodeType, strs[0]);
                wlList.Add(wl);
                //}
            }
            return(wlList);
        }
示例#6
0
        private WordLibrary ReadOnePhrase(FileStream fs, int nextStartPosition)
        {
            WordLibrary wl           = new WordLibrary();
            var         magic        = BinFileHelper.ReadInt32(fs);
            var         hanzi_offset = BinFileHelper.ReadInt16(fs);

            wl.Rank = fs.ReadByte();
            var x6           = fs.ReadByte();               //不知道干啥的
            var unknown8     = BinFileHelper.ReadInt64(fs); //新增的,不知道什么意思
            var pyBytesLen   = hanzi_offset - 18;
            var pyBytes      = BinFileHelper.ReadArray(fs, pyBytesLen);
            var wubiStr      = Encoding.Unicode.GetString(pyBytes);
            var split        = BinFileHelper.ReadInt16(fs);              //00 00 分割拼音和汉字
            var wordBytesLen = nextStartPosition - (int)fs.Position - 2; //结尾还有个00 00
            var wordBytes    = BinFileHelper.ReadArray(fs, wordBytesLen);

            BinFileHelper.ReadInt16(fs); //00 00分割
            var word = Encoding.Unicode.GetString(wordBytes);

            wl.Word = word;
            try
            {
                wl.SetCode(CodeType.Wubi98, wubiStr);
            }
            catch
            {
                return(null);
            }
            wl.CodeType = CodeType.Wubi98;
            return(wl);
        }
示例#7
0
        public WordLibraryList ImportLine(string line)
        {
            string[] c    = line.Split(' ');
            var      wl   = new WordLibrary();
            string   code = c[0];

            wl.Word   = c[1];
            wl.Count  = DefaultRank;
            wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word));
            wl.SetCode(CodeType, code);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
示例#8
0
        public WordLibraryList ImportLine(string line)
        {
            var wl = new WordLibrary();

            wl.Word      = line.Split('\t')[1];
            wl.CodeType  = CodeType;
            wl.IsEnglish = IsEnglish(wl.Word);
            if (wl.IsEnglish)
            {
                wl.SetCode(CodeType.English, wl.Word);
            }
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
示例#9
0
        //private IWordCodeGenerater pyGenerater = new PinyinGenerater();
        public WordLibraryList ImportLine(string str)
        {
            var list = new WordLibraryList();

            string[] words = str.Split(' ');
            for (int i = 1; i < words.Length; i++)
            {
                string word = words[i];
                var    wl   = new WordLibrary();
                wl.Word  = word;
                wl.Count = DefaultRank;
                wl.SetCode(CodeType, words[0]);
                //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(word));
                list.Add(wl);
            }
            return(list);
        }
示例#10
0
        public override WordLibraryList ImportLine(string line)
        {
            string code = line.Split(' ')[0];
            string word = line.Split(' ')[1];
            var    wl   = new WordLibrary();

            wl.Word = word;
            wl.Rank = DefaultRank;
            wl.SetCode(CodeType.Wubi, code);
            //wl.PinYin = CollectionHelper.ToArray(pinyinFactory.GetCodeOfString(word));
            var wll = new WordLibraryList();

            if (wl.PinYin.Length > 0)
            {
                wll.Add(wl);
            }
            return(wll);
        }
        private WordLibrary ReadOnePhrase(FileStream fs, DictCodeType type)
        {
            WordLibrary wl           = new WordLibrary();
            var         codeBytesLen = fs.ReadByte();
            var         wordBytesLen = fs.ReadByte();
            var         split        = fs.ReadByte();

            // 0x64对应正常词组(包含中英混拼,如"阿Q")。
            Debug.Assert(split.Equals(0x64) || split.Equals(0x32) ||
                         split.Equals(0x10) || split.Equals(0x66) ||
                         split.Equals(0x67)); // 0x67: "$X[计算器]calc"
            var codeBytes = BinFileHelper.ReadArray(fs, codeBytesLen);
            var codeStr   = Encoding.ASCII.GetString(codeBytes);

            var wordBytes = BinFileHelper.ReadArray(fs, wordBytesLen);
            var word      = Encoding.Unicode.GetString(wordBytes);

            if (split.Equals(0x32))          // 如“醃(腌)”,后者是相应简化字?
            {
                word = word.Substring(0, 1); // 暂定只取首字
            }
            Debug.Assert(word.IndexOf("(") < 0);
            wl.Word = word;
            try
            {
                if (type == DictCodeType.Pinyin)
                {
                    wl.CodeType = CodeType.Pinyin;
                    wl.SetPinyinString(codeStr);
                }
                else if (type == DictCodeType.Wubi98)
                {
                    wl.CodeType = CodeType.Wubi98;
                    wl.SetCode(CodeType.Wubi98, codeStr);
                }
            }
            catch
            {
                wl.CodeType = CodeType.NoCode;
                ImportLineErrorNotice?.Invoke(wl.Word + " 的编码缺失");
            }
            return(wl);
        }
示例#12
0
        /// <summary>
        /// 根据Pattern设置的格式,对输入的一行该格式的字符串转换成WordLibrary
        /// </summary>
        /// <param name="line"></param>
        /// <returns></returns>
        public WordLibrary BuildWordLibrary(string line)
        {
            var wl = new WordLibrary();

            wl.CodeType = UserDefiningPattern.CodeType;
            string[] strlist = line.Split(new[] { UserDefiningPattern.SplitString }, StringSplitOptions.RemoveEmptyEntries);
            var      newSort = new List <int>(UserDefiningPattern.Sort);

            newSort.Sort();
            string code = "", word = "";
            int    rank = 0;

            int index1 = UserDefiningPattern.Sort.FindIndex(i => i == newSort[0]); //最小的一个

            if (index1 == 0 && UserDefiningPattern.ContainCode)                    //第一个是编码
            {
                code = strlist[0];
            }
            if (index1 == 1) //第一个是汉字
            {
                word = strlist[0];
            }
            if (index1 == 2 && UserDefiningPattern.ContainRank) //第一个是词频
            {
                rank = Convert.ToInt32(strlist[0]);
            }
            if (strlist.Length > 1)
            {
                int index2 = UserDefiningPattern.Sort.FindIndex(i => i == newSort[1]); //中间的一个
                if (index2 == 0 && UserDefiningPattern.ContainCode)                    //一个是Code
                {
                    code = strlist[1];
                }
                if (index2 == 1)
                {
                    word = strlist[1];
                }
                if (index2 == 2 && UserDefiningPattern.ContainRank)
                {
                    rank = Convert.ToInt32(strlist[1]);
                }
            }
            if (strlist.Length > 2)
            {
                int index2 = UserDefiningPattern.Sort.FindIndex(i => i == newSort[2]); //最大的一个
                if (index2 == 0 && UserDefiningPattern.ContainCode)                    //第一个是拼音
                {
                    code = strlist[2];
                }
                if (index2 == 1)
                {
                    word = strlist[2];
                }
                if (index2 == 2 && UserDefiningPattern.ContainRank)
                {
                    rank = Convert.ToInt32(strlist[2]);
                }
            }
            wl.Word = word;
            wl.Rank = rank;
            if (code != "")
            {
                if (UserDefiningPattern.IsPinyinFormat)
                {
                    string[] codes = code.Split(new[] { UserDefiningPattern.CodeSplitString },
                                                StringSplitOptions.RemoveEmptyEntries);
                    wl.SetCode(UserDefiningPattern.CodeType, new List <string>(codes), UserDefiningPattern.IsPinyinFormat);
                }
                else
                {
                    wl.SetCode(UserDefiningPattern.CodeType, code);
                }
            }

            return(wl);
        }