public WordLibraryList ImportLine(string line)
        {
            if (line.Length > 0 && line[0] == ';')
            {
                return(null);
            }
            string[] sp = line.Split(' ');

            string word = sp[0];
            var    py   = new string[word.Length];

            for (int i = 0; i < word.Length; i++)
            {
                py[i] = sp[i + 1];
            }
            var wl = new WordLibrary();

            wl.Word   = word;
            wl.Rank   = 1;
            wl.PinYin = py;
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
Example #2
0
        private WordLibraryList Filter(WordLibraryList wlList)
        {
            var            result  = new WordLibraryList();
            IReplaceFilter replace = null;

            if (PinyinType != PinyinType.FullPinyin)
            {
                replace = new ShuangpinReplacer(PinyinType);
            }
            foreach (var wl in wlList)
            {
                if (replace != null)
                {
                    replace.Replace(wl);
                }

                //if (wl.GetPinYinLength() > 32)
                //    continue;
                //if (wl.Word.Length > 64)
                //    continue;

                result.Add(wl);
            }
            return(result);
        }
Example #3
0
        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x10;
            var phrase_offset_start = BinFileHelper.ReadInt32(fs);
            var phrase_start        = BinFileHelper.ReadInt32(fs);
            var phrase_end          = BinFileHelper.ReadInt32(fs);
            var phrase_count        = BinFileHelper.ReadInt32(fs);

            fs.Position = phrase_offset_start;
            var offsets = ReadOffsets(fs, phrase_count);

            offsets.Add(phrase_end - phrase_start);

            fs.Position = phrase_start;
            for (var i = 0; i < phrase_count; i++)
            {
                var wl = ReadOnePhrase(fs, phrase_start + offsets[i + 1]);
                if (wl != null)
                {
                    pyAndWord.Add(wl);
                }
            }
            return(pyAndWord);
        }
        public void ImportNoPinyin()
        {
            WordLibraryList wl = importer.ImportLine("深蓝测试");

            Assert.AreEqual(wl.Count, 1);
            Assert.AreEqual(wl[0].PinYinString, "shen'lan'ce'shi");
        }
        public WordLibraryList Import(string path)
        {
            WordLibraryList re             = new WordLibraryList();
            FileStream      fp             = File.OpenRead(path);
            int             user_word_base = 0x2400;

            //get word num
            byte[] bytes = new byte[50];
            fp.Seek(12, SeekOrigin.Begin);
            fp.Read(bytes, 0, 4);
            int cnt = bytesToIntLittle(bytes, 0, 4);

            //get each word
            for (int i = 0; i < cnt; i++)
            {
                int cur_idx = user_word_base + i * 60;
                //get word len
                fp.Seek(cur_idx + 10, SeekOrigin.Begin);
                fp.Read(bytes, 0, 1);
                int wordLen = bytesToIntLittle(bytes, 0, 1);
                //get word
                fp.Seek(cur_idx + 12, SeekOrigin.Begin);
                fp.Read(bytes, 0, wordLen * 2);
                string word = Encoding.Unicode.GetString(bytes, 0, wordLen * 2);
                re.Add(new WordLibrary()
                {
                    Word = word, CodeType = this.CodeType,
                });
            }
            fp.Close();
            return(re);
        }
Example #6
0
        public void TestGeneratePinyinThen2String()
        {
            ParsePattern parser = new ParsePattern()
            {
                IsPinyinFormat  = true,
                CodeSplitType   = BuildType.FullContain,
                CodeSplitString = "~",
                ContainCode     = true,
                ContainRank     = true,
                SplitString     = "|",
                CodeType        = CodeType.Pinyin,
                LineSplitString = "\r",
                Sort            = new List <int>()
                {
                    2, 1, 3
                }
            };
            WordLibraryList wll = new WordLibraryList();
            WordLibrary     wl  = new WordLibrary()
            {
                Word = "深蓝", Rank = 123, CodeType = CodeType.UserDefine
            };

            wl.Codes = new Code();
            wl.Codes.Add(new[] { "sn" });
            wl.Codes.Add(new[] { "ln" });
            wll.Add(wl);
            var selfDefining = new SelfDefining();

            selfDefining.UserDefiningPattern = parser;
            var str = selfDefining.Export(wll);

            Assert.AreEqual(str[0], "深蓝|~shen~lan~|123\r");
        }
Example #7
0
        public void TestImport(string file)
        {
            WordLibraryList wlList = importer.Import(GetFullPath(file));

            Assert.IsNotNull(wlList);
            Assert.Greater(wlList.Count, 0);
        }
Example #8
0
        //{0x05 2word

        //4字节使用同一个拼音的词条数x,2字节拼音长度n,n字节拼音的编号,(2字节汉字的长度y,y*2字节汉字的内容Unicode编码,2字节词频,2字节未知,4字节未知)*x

        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x44;
            CountWord   = BinFileHelper.ReadInt32(fs);
            int segmentCount = BinFileHelper.ReadInt32(fs); //分为几段

            CurrentStatus = 0;
            for (int i = 0; i < segmentCount; i++)
            {
                try
                {
                    fs.Position = 0xC00 + 1024 * i;
                    var segment = new Segment(fs);
                    pyAndWord.AddWordLibraryList(segment.WordLibraryList);
                    CurrentStatus += segment.WordLibraryList.Count;
                }
                catch (Exception e)
                {
                    Debug.WriteLine(e.Message);
                }
            }


            return(pyAndWord);
        }
Example #9
0
        public IList <string> Export(WordLibraryList wlList)
        {
            var sb   = new StringBuilder();
            var dict = new Dictionary <string, WordLibraryList>();

            for (int i = 0; i < wlList.Count; i++)
            {
                var wl = wlList[i];
                if (dict.ContainsKey(wl.SingleCode))
                {
                    dict[wl.SingleCode].Add(wl);
                }
                else
                {
                    dict.Add(wl.SingleCode, new WordLibraryList {
                        wl
                    });
                }
            }
            foreach (var key in dict.Keys)
            {
                sb.Append(ExportLine(key, dict[key]));
                sb.Append("\r\n");
            }
            return(new List <string>()
            {
                sb.ToString()
            });
        }
Example #10
0
        public IList <string> Export(WordLibraryList wlList)
        {
            //对全拼方案进行编码转换
            wlList = Filter(wlList);
            string tempPath = Path.Combine(FileOperationHelper.GetCurrentFolderPath(), "dictionary.txt");

            if (File.Exists(tempPath))
            {
                File.Delete(tempPath);
            }
            var sb = new StringBuilder();

            sb.Append("# Gboard Dictionary version:1\n");
            for (int i = 0; i < wlList.Count; i++)
            {
                sb.Append(ExportLine(wlList[i]));
                sb.Append("\n");
            }
            FileOperationHelper.WriteFile(tempPath, new UTF8Encoding(false), sb.ToString());
            string zipPath = Path.Combine(FileOperationHelper.GetCurrentFolderPath(), "Gboard词库.zip");

            if (File.Exists(zipPath))
            {
                File.Delete(zipPath);
            }
            FileOperationHelper.ZipFile(tempPath, zipPath);
            return(new List <string>()
            {
                "词库文件在:" + zipPath
            });
            //return new List<string>() { sb.ToString() };
        }
Example #11
0
        //private SelfDefiningCodeGenerater codeGenerater = new SelfDefiningCodeGenerater();

        #region IWordLibraryExport Members
        /// <summary>
        /// 导出词库为自定义格式。
        /// 如果没有指定自定义编码文件,而且词库是包含拼音编码的,那么就按拼音编码作为每个字的码。
        /// 如果导出指定了自定义编码文件,那么就忽略词库的已有编码,使用自定义编码文件重新生成编码。
        /// 如果词库没有包含拼音编码,而且导出也没有指定编码文件,那就抛错吧~~~~
        /// </summary>
        /// <param name="wlList"></param>
        /// <returns></returns>
        public string Export(WordLibraryList wlList)
        {
            if (string.IsNullOrEmpty(UserDefiningPattern.MappingTablePath) && !UserDefiningPattern.IsPinyin)
            {
                if (wlList.Count == 0 || wlList[0].CodeType != CodeType.Pinyin)
                {
                    throw new Exception("未指定字符编码映射文件,无法对词库进行自定义编码的生成");
                }
            }
            else
            {
                //var dict = UserCodingHelper.GetCodingDict(UserDefiningPattern.MappingTablePath);
                //codeGenerater.MappingDictionary = dict;
                //codeGenerater.MutiWordCodeFormat = UserDefiningPattern.MutiWordCodeFormat;
            }
            var sb = new StringBuilder();

            foreach (WordLibrary wordLibrary in wlList)
            {
                try
                {
                    sb.Append(ExportLine(wordLibrary));
                    sb.Append("\r\n");
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
            }
            return(sb.ToString());
        }
Example #12
0
        /// <summary>
        /// 构造一棵词库的树
        /// </summary>
        /// <param name="wlList"></param>
        /// <returns></returns>
        private TouchPalChar BuildTree(WordLibraryList wlList)
        {
            //先对词库进行排序再生成词库树
            wlList.Sort((a, b) => a.Word.CompareTo(b.Word));
            WordLibrary rootWL = wlList[0];
            var rootChar = new TouchPalChar(); //这个只是一个根Char,没有实际的字
            TouchPalChar lastChar = AddWordLink2Char(rootChar, rootWL, 0);


            for (int i = 1; i < wlList.Count; i++)
            {
                WordLibrary wl = wlList[i];
                wl.Count = 96; //默认是96的词频
                string a = lastChar.Word.ChineseWord;
                string b = wl.Word;
                int len = FindSameWordLen(a, b);
                if (len == 0)
                {
                    lastChar = AddWordLink2Char(rootChar, wl, 0);
                }
                else
                {
                    lastChar = AddWordLink2Char(lastChar, wl, len);
                }
            }
            return rootChar;
        }
Example #13
0
        public WordLibraryList ImportText(string str)
        {
            var xmlDoc = new XmlDocument();

            xmlDoc.LoadXml(str);
            var namespaceManager = new XmlNamespaceManager(xmlDoc.NameTable);

            namespaceManager.AddNamespace("ns1", "http://www.microsoft.com/ime/dctx");
            var         wlList = new WordLibraryList();
            XmlNodeList xns    = xmlDoc.SelectNodes("//ns1:Dictionary/ns1:DictionaryEntry", namespaceManager);

            CountWord = xns.Count;
            for (int i = 0; i < xns.Count; i++)
            {
                XmlNode xn   = xns[i];
                string  py   = xn.SelectSingleNode("ns1:InputString", namespaceManager).InnerText;
                string  word = xn.SelectSingleNode("ns1:OutputString", namespaceManager).InnerText;
                var     wl   = new WordLibrary();
                wl.Word       = word;
                wl.Rank       = 1;
                wl.PinYin     = py.Split(new[] { ' ', '1', '2', '3', '4' }, StringSplitOptions.RemoveEmptyEntries);
                CurrentStatus = i;
                wlList.Add(wl);
            }

            return(wlList);
        }
Example #14
0
        public IList <string> Export(WordLibraryList wlList)
        {
            var sb = new StringBuilder();

            IDictionary <string, string> xiaoxiaoDic = new Dictionary <string, string>();

            for (int i = 0; i < wlList.Count; i++)
            {
                string      key   = "";
                WordLibrary wl    = wlList[i];
                string      value = wl.Word;
                foreach (var code in wl.Codes)
                {
                    key = code[0];
                    if (xiaoxiaoDic.ContainsKey(key))
                    {
                        xiaoxiaoDic[key] += " " + value;
                    }
                    else
                    {
                        xiaoxiaoDic.Add(key, value);
                    }
                }
            }
            foreach (var keyValuePair in xiaoxiaoDic)
            {
                sb.Append(keyValuePair.Key + " " + keyValuePair.Value + "\n");
            }

            return(new List <string>()
            {
                sb.ToString()
            });
        }
        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs = new FileStream(path, FileMode.Open, FileAccess.Read);
            fs.Position = 0x44;
            CountWord = BinFileHelper.ReadInt32(fs);
            int segmentCount = BinFileHelper.ReadInt32(fs); //分为几段
            CurrentStatus = 0;
            for (int i = 0; i < segmentCount; i++)
            {
                try
                {
                    fs.Position = 0xC00 + 1024*i;
                    var segment = new Segment(fs);
                    pyAndWord.AddWordLibraryList(segment.WordLibraryList);
                    CurrentStatus += segment.WordLibraryList.Count;
                }
                catch (Exception e)
                {
                    Debug.WriteLine(e.Message);
                }
            }


            return pyAndWord;
        }
Example #16
0
        public void TestImport()
        {
            WordLibraryList list = ((IWordLibraryTextImport)importer).ImportText(StringData);

            Assert.IsNotNull(list);
            Assert.AreEqual(list.Count, 10);
        }
Example #17
0
        //private IWordCodeGenerater pyGenerater=new PinyinGenerater();
        public WordLibraryList ImportLine(string line)
        {
            string[] lineArray = line.Split('\t');

            string word = lineArray[0];
            string code = lineArray[1];
            var    wl   = new WordLibrary();

            wl.Word  = word;
            wl.Count = Convert.ToInt32(lineArray[2]);
            if (CodeType == CodeType.Pinyin)
            {
                wl.PinYin = code.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            }
            else
            {
                //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word));
                wl.SetCode(CodeType, code);
            }


            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
Example #18
0
 public string Export(WordLibraryList wlList)
 {
     if (wlList.Count == 0)
     {
         return "";
     }
     var sb = new StringBuilder();
     for (int i = 0; i < wlList.Count - 1; i++)
     {
         string line = ExportLine(wlList[i]);
         if (line != "")
         {
             sb.Append(line);
             sb.Append("\r\n");
         }
     }
     WordLibrary last = wlList[wlList.Count - 1];
     sb.Append(ExportLine(last));
     sb.Append(", ");
     sb.Append(last.GetPinYinString("'", BuildType.None));
     sb.Append(" ");
     sb.Append(last.Count);
     sb.Append("\r\n");
     return sb.ToString();
 }
Example #19
0
        public virtual WordLibraryList ImportLine(string line)
        {
            var wlList = new WordLibraryList();

            string[] strs = line.Split(' ');

            for (int i = 1; i < strs.Length; i++)
            {
                string oriWord = strs[i];
                string word    = oriWord.Replace(",", ""); //把汉字中带有逗号的都去掉逗号
                //var list = pinyinFactory.GetCodeOfString(word);
                //for (int j = 0; j < list.Count; j++)
                //{
                var wl = new WordLibrary();
                wl.Word = oriWord;
                //if (IsWubi)
                //{
                //    wl.SetCode(CodeType.Wubi, strs[0]);
                //}
                //wl.PinYin = CollectionHelper.ToArray(list);
                wl.SetCode(CodeType, strs[0]);
                wlList.Add(wl);
                //}
            }
            return(wlList);
        }
Example #20
0
        public virtual WordLibraryList ImportLine(string line)
        {
            var wlList = new WordLibraryList();
            string[] strs = line.Split(' ');

            for (int i = 1; i < strs.Length; i++)
            {
                string oriWord = strs[i];
                string word = oriWord.Replace(",", ""); //把汉字中带有逗号的都去掉逗号
                //var list = pinyinFactory.GetCodeOfString(word);
                //for (int j = 0; j < list.Count; j++)
                //{
                var wl = new WordLibrary();
                wl.Word = oriWord;
                //if (IsWubi)
                //{
                //    wl.SetCode(CodeType.Wubi, strs[0]);
                //}
                //wl.PinYin = CollectionHelper.ToArray(list);
                wl.SetCode(CodeType, strs[0]);
                wlList.Add(wl);
                //}
            }
            return wlList;
        }
Example #21
0
 public WordLibraryList ImportLine(string line)
 {
     var wlList = new WordLibraryList();
     WordLibrary wl = UserDefiningPattern.BuildWordLibrary(line);
     wlList.Add(wl);
     return wlList;
 }
Example #22
0
        /// <summary>
        /// 将词库写入一个二进制文件,然后返回二进制文件的路径
        /// </summary>
        /// <param name="wlList"></param>
        /// <returns></returns>
        public string Export(WordLibraryList wlList)
        {
            TouchPalChar rootChar = BuildTree(wlList);
            int endPositon = InitTreeNodePosition(rootChar, 4);

            //创建一个临时文件
            string tempPath = Application.StartupPath + "\\temp" +
                              DateTime.Now.ToString("yyyyMMddHHmmss") + ".bak";
            var fs = new FileStream(tempPath, FileMode.OpenOrCreate, FileAccess.Write);
            fs.Write(BitConverter.GetBytes(endPositon), 0, 4);
            WriteBinaryTree(rootChar, fs);
            fs.Close();
            //int totalLength = 30;
            //foreach (WordLibrary wl in wlList)
            //{
            //    totalLength += wl.Word.Length * 28 + 5;
            //}
            //fs.Write(BitConverter.GetBytes(totalLength), 0, 4);
            //byte[] head = new byte[] { 0, 0, 0, 0, 0, 0, 0x1E, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
            //fs.Write(head, 0, 26);
            //int from = 4;
            //GlobalCache.JumpChar = new TouchPalChar() {BeginPosition = 4};
            //for (int i = 0; i < wlList.Count; i++)
            //{
            //    WordLibrary wl = wlList[i];
            //    from = WriteWord(fs, wl, i == wlList.Count - 1);
            //}
            fs.Close();
            return tempPath;
        }
        public IList <string> Export(WordLibraryList wlList)
        {
            if (wlList.Count == 0)
            {
                return(new List <string>());
            }
            var sb = new StringBuilder();

            for (int i = 0; i < wlList.Count - 1; i++)
            {
                string line = ExportLine(wlList[i]);
                if (line != "")
                {
                    sb.Append(line);
                    sb.Append("\r\n");
                }
            }
            WordLibrary last = wlList[wlList.Count - 1];

            sb.Append(ExportLine(last));
            sb.Append(", ");
            sb.Append(last.GetPinYinString("'", BuildType.None));
            sb.Append(" ");
            sb.Append(last.Rank);
            sb.Append("\r\n");
            return(new List <string>()
            {
                sb.ToString()
            });
        }
Example #24
0
        public string Export(WordLibraryList wlList)
        {
            var sb = new StringBuilder();
            //sb.Append(GetFileHeader());
            IDictionary<string, string> xiaoxiaoDic = new Dictionary<string, string>();

            for (int i = 0; i < wlList.Count; i++)
            {
                string key = "";
                var wl = wlList[i];
                string value = wl.Word;
                if (CodeType == CodeType.Pinyin)
                {
                    key = (wl.GetPinYinString("", BuildType.None));
                }
                else if (CodeType == wl.CodeType)
                {
                    key = (wl.Codes[0][0]);
                }
                else
                {
                    IList<string> codes = CodeGenerater.GetCodeOfString(wl.Word);
                    if (CodeGenerater.Is1CharMutiCode)
                    {
                        foreach (string code in codes)
                        {
                            if (xiaoxiaoDic.ContainsKey(code))
                            {
                                xiaoxiaoDic[code] += " " + value;
                            }
                            else
                            {
                                xiaoxiaoDic.Add(code, value);
                            }
                        }
                        continue;
                    }
                    else
                    {
                        key = (CollectionHelper.ListToString(codes));
                    }
                }


                if (xiaoxiaoDic.ContainsKey(key))
                {
                    xiaoxiaoDic[key] += " " + value;
                }
                else
                {
                    xiaoxiaoDic.Add(key, value);
                }
            }
            foreach (var keyValuePair in xiaoxiaoDic)
            {
                sb.Append(keyValuePair.Key + " " + keyValuePair.Value + "\n");
            }

            return sb.ToString();
        }
Example #25
0
 /// <summary>
 /// 导出词库为自定义格式。
 /// 如果没有指定自定义编码文件,而且词库是包含拼音编码的,那么就按拼音编码作为每个字的码。
 /// 如果导出指定了自定义编码文件,那么就忽略词库的已有编码,使用自定义编码文件重新生成编码。
 /// 如果词库没有包含拼音编码,而且导出也没有指定编码文件,那就抛错吧~~~~
 /// </summary>
 /// <param name="wlList"></param>
 /// <returns></returns>
 public string Export(WordLibraryList wlList)
 {
     if (string.IsNullOrEmpty(UserDefiningPattern.MappingTablePath)&& !UserDefiningPattern.IsPinyin)
     {
         if (wlList.Count ==0 || wlList[0].CodeType != CodeType.Pinyin)
         {
             throw new Exception("未指定字符编码映射文件,无法对词库进行自定义编码的生成");
         }
     }
     else
     {
         //var dict = UserCodingHelper.GetCodingDict(UserDefiningPattern.MappingTablePath);
         //codeGenerater.MappingDictionary = dict;
         //codeGenerater.MutiWordCodeFormat = UserDefiningPattern.MutiWordCodeFormat;
     }
     var sb = new StringBuilder();
     foreach (WordLibrary wordLibrary in wlList)
     {
         try
         {
             sb.Append(ExportLine(wordLibrary));
             sb.Append("\r\n");
         }
         catch(Exception ex)
         {
             Debug.WriteLine(ex.Message);
         }
     }
     return sb.ToString();
 }
Example #26
0
        public WordLibraryList ImportText(string str)
        {
            var xmlDoc = new XmlDocument();

            xmlDoc.LoadXml(str);

            var         wlList = new WordLibraryList();
            XmlNodeList xns    = xmlDoc.SelectNodes("//plist/array/dict");

            CountWord = xns.Count;
            for (int i = 0; i < xns.Count; i++)
            {
                XmlNode xn    = xns[i];
                var     nodes = xn.SelectNodes("string");

                var wl = new WordLibrary();
                wl.Word = nodes[0].InnerText;
                wl.Rank = 1;
                wl.SetPinyinString(nodes[1].InnerText);
                CurrentStatus = i;
                wlList.Add(wl);
            }

            return(wlList);
        }
        public virtual WordLibraryList ImportText(string str)
        {
            //pinyinFactory = new PinyinGenerater();

            var wlList = new WordLibraryList();
            string[] words = str.Split(new[] {'\r', '\n'}, StringSplitOptions.RemoveEmptyEntries);
            CountWord = words.Length;
            CurrentStatus = 0;
            for (int i = 0; i < words.Length; i++)
            {
                try
                {
                    string word = words[i].Trim();
                    if (word != string.Empty)
                    {
                        wlList.AddWordLibraryList(ImportLine(word));
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
                CurrentStatus++;
            }
            return wlList;
        }
        public string Export(WordLibraryList wlList)
        {
            var sb = new StringBuilder();

            IDictionary<string, string> xiaoxiaoDic = new Dictionary<string, string>();

            for (int i = 0; i < wlList.Count; i++)
            {
                string key = "";
                var wl = wlList[i];
                string value = wl.Word;
                key = wl.SingleCode;
                if (xiaoxiaoDic.ContainsKey(key))
                {
                    xiaoxiaoDic[key] += " " + value;
                }
                else
                {
                    xiaoxiaoDic.Add(key, value);
                }
            }
            foreach (var keyValuePair in xiaoxiaoDic)
            {
                sb.Append(keyValuePair.Key + " " + keyValuePair.Value + "\n");
            }

            return sb.ToString();
        }
Example #29
0
        public WordLibraryList ImportLine(string line)
        {
            var         wlList = new WordLibraryList();
            WordLibrary wl     = BuildWordLibrary(line);

            wlList.Add(wl);
            return(wlList);
        }
Example #30
0
        public void ImportWithPinyinFull()
        {
            WordLibraryList wl = importer.ImportLine("深shen蓝lan居ju");

            Assert.AreEqual(wl.Count, 1);
            Assert.AreEqual(wl[0].PinYinString, "shen'lan'ju");
            Assert.AreEqual(wl[0].Word, "深蓝居");
        }
Example #31
0
        public IList <string> Export(WordLibraryList wlList)
        {
            //Win10拼音只支持最多32个字符的编码
            wlList = Filter(wlList);
            string tempPath = Path.GetDirectoryName(Process.GetCurrentProcess().MainModule.FileName) + "\\Win10微软五笔词库.dat";

            if (File.Exists(tempPath))
            {
                File.Delete(tempPath);
            }
            var          fs = new FileStream(tempPath, FileMode.OpenOrCreate, FileAccess.Write);
            BinaryWriter bw = new BinaryWriter(fs);

            bw.Write(Encoding.ASCII.GetBytes("mschxudp"));            //proto8
            bw.Write(BitConverter.GetBytes(0x00600002));              //Unknown
            bw.Write(BitConverter.GetBytes(1));                       //version
            bw.Write(BitConverter.GetBytes(0x40));                    //phrase_offset_start
            bw.Write(BitConverter.GetBytes(0x40 + 4 * wlList.Count)); //phrase_start=phrase_offset_start + 4*phrase_count
            bw.Write(BitConverter.GetBytes(0));                       //phrase_end input after process all!
            bw.Write(BitConverter.GetBytes(wlList.Count));            //phrase_count
            bw.Write(BitConverter.GetBytes(DateTime.Now.Ticks));      //timestamp
            bw.Write(BitConverter.GetBytes((long)0));                 //0
            bw.Write(BitConverter.GetBytes((long)0));                 //0
            bw.Write(BitConverter.GetBytes((long)0));                 //0
            int offset = 0;

            for (var i = 0; i < wlList.Count; i++)
            {
                bw.Write(BitConverter.GetBytes(offset));
                var wl = wlList[i];
                offset += 8 + 8 + wl.Word.Length * 2 + 2 + wl.GetPinYinLength() * 2 + 2;
            }
            for (var i = 0; i < wlList.Count; i++)
            {
                bw.Write(BitConverter.GetBytes(0x00100010)); //magic
                var wl           = wlList[i];
                var hanzi_offset = 8 + 8 + wl.GetPinYinLength() * 2 + 2;
                bw.Write(BitConverter.GetBytes((short)hanzi_offset));
                bw.Write((byte)wl.Rank);                     //1是詞頻
                bw.Write((byte)0x6);                         //6不知道
                bw.Write(BitConverter.GetBytes(0x00000000)); //Unknown
                bw.Write(BitConverter.GetBytes(0xE679CD20)); //Unknown
                var py = wl.GetPinYinString("", BuildType.None);
                bw.Write(Encoding.Unicode.GetBytes(py));
                bw.Write(BitConverter.GetBytes((short)0));
                bw.Write(Encoding.Unicode.GetBytes(wl.Word));
                bw.Write(BitConverter.GetBytes((short)0));
            }

            fs.Position = 0x18;
            fs.Write(BitConverter.GetBytes(fs.Length), 0, 4);

            fs.Close();
            return(new List <string>()
            {
                "词库文件在:" + tempPath
            });
        }
        private void ExportTo1File(string tempPath, WordLibraryList wlList)
        {
            if (File.Exists(tempPath))
            {
                File.Delete(tempPath);
            }
            var          fs = new FileStream(tempPath, FileMode.OpenOrCreate, FileAccess.Write);
            BinaryWriter bw = new BinaryWriter(fs);

            bw.Write(HexStringToByteArray("55AA88810200600055AA55AA")); //Unknown

            bw.Write(BitConverter.GetBytes((long)wlList.Count));        //phrase_count
            bw.Write(BitConverter.GetBytes((int)DateTime.Now.Ticks));   //timestamp
            for (var i = 0; i < 9192; i++)
            {
                bw.Write((byte)0);
            }
            //0x2400词条开始
            for (var i = 0; i < wlList.Count; i++)
            {
                var wl = wlList[i];
                try
                {
                    // bw.Write(new byte[] { 0x6D, 0x1B });
                    bw.Write(BitConverter.GetBytes((Int16)(i + 0x6D1B))); //Unknown,怀疑是词频
                    bw.Write(new byte[] { 0x1A, 0x26 });                  //Unknown
                    bw.Write(new byte[] { 0x00, 0x00, 0x00 });            //前3个字的拼音?
                    bw.Write(new byte[] { 0x00, 0x00, 0x04 });
                    bw.Write((byte)wl.Word.Length);
                    bw.Write((byte)0x5A);
                    bw.Write(Encoding.Unicode.GetBytes(wl.Word));
                    foreach (string py1 in wl.PinYin)
                    {
                        var py1Index = PinyinMap[py1];
                        bw.Write(py1Index);
                    }
                    var used = 12 + 4 * wl.Word.Length;
                    //一个词条60字节,剩下的补0
                    for (var j = used; j < 60; j++)
                    {
                        bw.Write((byte)0);
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
            }
            //最后一堆0,补到nK (n>=10)
            var k = (int)Math.Ceiling(fs.Position / 1024.0);

            while (fs.Position < k * 1024)
            {
                bw.Write((byte)0);
            }

            fs.Close();
        }
        public WordLibraryList Import(string path)
        {
            int endPosition     = 0;
            var wordLibraryList = new WordLibraryList();
            var fs = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position   = 0x60;
            endPosition   = BinFileHelper.ReadInt32(fs);
            fs.Position   = 0x350;
            CurrentStatus = 0;
            do
            {
                //CurrentStatus++;
                try
                {
                    WordLibrary wl = ImportWord(fs);
                    if (wl == null)
                    {
                        break;
                    }
                    if (wl.Word != "" && wl.PinYin.Length > 0)
                    {
                        wordLibraryList.Add(wl);
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
            } while (fs.Position != endPosition); //< fs.Length
            fs.Close();
            //StreamWriter sw=new StreamWriter("D:\\py.txt",true,Encoding.Unicode);
            //SinglePinyin singlePinyin=new SinglePinyin();

            //foreach (var cpy in CharAndPinyin)
            //{
            //    var py = "";
            //    try
            //    {
            //        py = singlePinyin.GetPinYinOfChar(cpy.Key)[0];
            //    }
            //    catch
            //    {
            //        Debug.Write(cpy.Key);
            //    }
            //    sw.WriteLine(cpy.Key+"\t"+ py+"\t"+cpy.Value);
            //}
            //sw.Close();

            //wordLibraryList.ForEach(delegate(WordLibrary wl) { if(wl.Word==""||wl.PinYin.Length==0)
            //{
            //    Debug.WriteLine(wl.ToDisplayString());
            //}
            //});

            return(wordLibraryList);
        }
Example #34
0
        //public Form ExportConfigForm { get { return form; } }
        public IList <string> Export(WordLibraryList wlList)
        {
            var sb = new StringBuilder();
            //sb.Append(GetFileHeader());
            IDictionary <string, string> xiaoxiaoDic = new Dictionary <string, string>();

            for (int i = 0; i < wlList.Count; i++)
            {
                string      key   = "";
                WordLibrary wl    = wlList[i];
                string      value = wl.Word;
                if (CodeType == CodeType.Pinyin)
                {
                    key = (wl.GetPinYinString("", BuildType.None));
                }
                else if (CodeType == wl.CodeType)
                {
                    key = (wl.Codes[0][0]);
                }
                else
                {
                    var codes = CodeGenerater.GetCodeOfString(wl.Word);
                    var list  = codes.ToCodeString();
                    foreach (var code in list)
                    {
                        if (xiaoxiaoDic.ContainsKey(code))
                        {
                            xiaoxiaoDic[code] += " " + value;
                        }
                        else
                        {
                            xiaoxiaoDic.Add(code, value);
                        }
                    }
                }


                if (xiaoxiaoDic.ContainsKey(key))
                {
                    xiaoxiaoDic[key] += " " + value;
                }
                else
                {
                    xiaoxiaoDic.Add(key, value);
                }
            }
            foreach (var keyValuePair in xiaoxiaoDic)
            {
                sb.Append(keyValuePair.Key + " " + keyValuePair.Value + "\n");
            }

            return(new List <string>()
            {
                sb.ToString()
            });
        }
 /// <summary>
 /// 将一行纯文本转换为对象
 /// </summary>
 /// <param name="line"></param>
 /// <returns></returns>
 public virtual WordLibraryList ImportLine(string line)
 {
     var py = pinyinFactory.GetCodeOfString(line);
     var wl = new WordLibrary();
     wl.Word = line;
     wl.PinYin = ToArray(py);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
Example #36
0
 public string Export(WordLibraryList wlList)
 {
     var sb = new StringBuilder();
     for (int i = 0; i < wlList.Count; i++)
     {
         sb.Append(ExportLine(wlList[i]));
         sb.Append("\r\n");
     }
     return sb.ToString();
 }
 public IList<string> Export(WordLibraryList wlList)
 {
     var sb = new StringBuilder();
     foreach (WordLibrary wordLibrary in wlList)
     {
         sb.Append(ExportLine(wordLibrary));
         sb.Append("\r\n");
     }
     return new List<string>() { sb.ToString() };
 }
Example #38
0
        public IList <string> Export(WordLibraryList wlList)
        {
            var sb = new StringBuilder();

            sb.Append(
                "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n<ns1:Dictionary xmlns:ns1=\"http://www.microsoft.com/ime/dctx\">");
            sb.Append(
                @"<ns1:DictionaryHeader>
    <ns1:DictionaryGUID>{" + Guid.NewGuid() + @"}</ns1:DictionaryGUID>
    <ns1:DictionaryLanguage>zh-cn</ns1:DictionaryLanguage>
    <ns1:FormatVersion>0</ns1:FormatVersion>
    <ns1:DictionaryVersion>1</ns1:DictionaryVersion>
    <ns1:DictionaryInfo Language=""zh-cn"">
      <ns1:ShortName>深蓝词库</ns1:ShortName>
      <ns1:LongName>深蓝词库转换而成</ns1:LongName>
      <ns1:Description>Dictionary for IME</ns1:Description>
      <ns1:Copyright>深蓝词库转换</ns1:Copyright>
      <ns1:CommentHeader1>CommentTitle1</ns1:CommentHeader1>
      <ns1:CommentHeader2>CommentTitle1</ns1:CommentHeader2>
      <ns1:CommentHeader3>CommentTitle1</ns1:CommentHeader3>
    </ns1:DictionaryInfo>
    <ns1:DictionaryInfo Language=""en-us"">
      <ns1:ShortName>Shenlan</ns1:ShortName>
      <ns1:LongName>Shenlan</ns1:LongName>
      <ns1:Description>Shenlan</ns1:Description>
      <ns1:Copyright>Shenlan</ns1:Copyright>
      <ns1:CommentHeader1>CommentTitle1</ns1:CommentHeader1>
      <ns1:CommentHeader2>CommentTitle1</ns1:CommentHeader2>
      <ns1:CommentHeader3>CommentTitle1</ns1:CommentHeader3>
    </ns1:DictionaryInfo>
    <ns1:ContentCategory>Genral</ns1:ContentCategory>
    <ns1:DictionaryType>Conversion</ns1:DictionaryType>
    <ns1:SourceURL>
    </ns1:SourceURL>
    <ns1:CommentInsertion>true</ns1:CommentInsertion>
    <ns1:IconID>25</ns1:IconID>
  </ns1:DictionaryHeader>
");
            for (int i = 0; i < wlList.Count; i++)
            {
                try
                {
                    sb.Append(ExportLine(wlList[i]));
                    sb.Append("\r\n");
                }
                catch
                {
                }
            }
            sb.Append("</ns1:Dictionary>");
            return(new List <string>()
            {
                sb.ToString()
            });
        }
 public string Export(WordLibraryList wlList)
 {
  
     StringBuilder sb = new StringBuilder();
     foreach (WordLibrary wordLibrary in wlList)
     {
         sb.Append(ExportLine(wordLibrary));
         sb.Append("\r\n");
     }
     return sb.ToString();
 }
Example #40
0
 public IList<string> Export(WordLibraryList wlList)
 {
     codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
     var sb = new StringBuilder();
     for (int i = 0; i < wlList.Count; i++)
     {
         sb.Append(ExportLine(wlList[i]));
         sb.Append(lineSplitString);
     }
     return new List<string>() { sb.ToString() };
 }
Example #41
0
 public IList<string> Export(WordLibraryList wlList)
 {
     var sb = new StringBuilder();
     for (int i = 0; i < wlList.Count; i++)
     {
         number = (int) Math.Ceiling((wlList.Count - i)*100.0/wlList.Count);
         sb.Append(ExportLine(wlList[i]));
         sb.Append("\r\n");
     }
     return new List<string>() { sb.ToString() };
 }
Example #42
0
        public string Export(WordLibraryList wlList)
        {
            StringBuilder sb = new StringBuilder();

            foreach (WordLibrary wordLibrary in wlList)
            {
                sb.Append(ExportLine(wordLibrary));
                sb.Append("\r\n");
            }
            return(sb.ToString());
        }
Example #43
0
 public string Export(WordLibraryList wlList)
 {
     codeGenerater = CodeTypeHelper.GetGenerater(CodeType);
     var sb = new StringBuilder();
     for (int i = 0; i < wlList.Count; i++)
     {
         sb.Append(ExportLine(wlList[i]));
         sb.Append("\r\n");
     }
     return sb.ToString();
 }
Example #44
0
 public WordLibraryList ImportLine(string line)
 {
     string[] c = line.Split('\t');
     var wl = new WordLibrary();
     wl.Word = c[0];
     wl.Rank = Convert.ToInt32(c[2]);
     wl.PinYin = c[1].Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
 /// <summary>
 ///     将一行纯文本转换为对象
 /// </summary>
 /// <param name="line"></param>
 /// <returns></returns>
 public virtual WordLibraryList ImportLine(string line)
 {
     //IList<string> py = pinyinFactory.GetCodeOfString(line);
     var wl = new WordLibrary();
     wl.Word = line;
     wl.CodeType = CodeType;
     //wl.PinYin = CollectionHelper.ToArray(py);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
        public virtual IList<string> Export(WordLibraryList wlList)
        {
            var sb = new StringBuilder();
            for (int i = 0; i < wlList.Count; i++)
            {
                sb.Append(wlList[i].Word);
                sb.Append("\r\n");
            }

            return new List<string>() { sb.ToString()};
        }
Example #47
0
        public string Export(WordLibraryList wlList)
        {
            var sb = new StringBuilder();

            for (int i = 0; i < wlList.Count; i++)
            {
                sb.Append(ExportLine(wlList[i]));
                sb.Append("\r\n");
            }
            return(sb.ToString());
        }
Example #48
0
        /// <summary>
        /// 将一行纯文本转换为对象
        /// </summary>
        /// <param name="line"></param>
        /// <returns></returns>
        public virtual WordLibraryList ImportLine(string line)
        {
            var py = pinyinFactory.GetCodeOfString(line);
            var wl = new WordLibrary();

            wl.Word   = line;
            wl.PinYin = ToArray(py);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
Example #49
0
        public IList<string> Export(WordLibraryList wlList)
        {
            var sb = new StringBuilder();
            sb.Append(";; -*- coding: utf-8 -*--\n");

            for (int i = 0; i < wlList.Count; i++)
            {
                sb.Append(ExportLine(wlList[i]));
                sb.Append("\n");
            }
            return new List<string>() { sb.ToString() };
        }
Example #50
0
 public string Export(WordLibraryList wlList)
 {
     StringBuilder sb = new StringBuilder();
     for (int i = 0; i < wlList.Count; i++)
     {
         sb.Append(wlList[i].GetPinYinString("'", BuildType.None));
         sb.Append(",");
         sb.Append(wlList[i].Word);
         sb.Append("\r\n");
     }
     return sb.ToString();
 }
Example #51
0
        public void ImportWithPinyinPart()
        {
            WordLibraryList wl = ((IWordLibraryTextImport)importer).ImportText(StringData);

            Assert.AreEqual(wl.Count, 10);
            Assert.AreEqual(wl[0].PinYinString, "ren'min'hen'xing");
            Assert.AreEqual(wl[0].Word, "人民很行");
            Assert.AreEqual(wl[1].PinYinString, "ren'min'yin'hang");
            Assert.AreEqual(wl[1].Word, "人民银行");
            Assert.AreEqual(wl[2].PinYinString, "dong'li'wu'xian");
            Assert.AreEqual(wl[2].Word, "栋力无限");
        }
Example #52
0
        //4字节使用同一个拼音的词条数x,2字节拼音长度n,n字节拼音的编号,(2字节汉字的长度y,y*2字节汉字的内容Unicode编码,2字节词频,2字节未知,4字节未知)*x

        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position   = 0x18;
            CountWord     = BinFileHelper.ReadInt32(fs);
            CurrentStatus = 0;
            fs.Position   = 0x30;

            while (CurrentStatus < CountWord)
            {
                int   samePyCount = BinFileHelper.ReadInt16(fs);
                int   unkown1     = BinFileHelper.ReadInt16(fs);
                short pyLength    = BinFileHelper.ReadInt16(fs);
                var   pyArray     = new string[pyLength / 2];
                for (int i = 0; i < pyLength / 2; i++)
                {
                    short idx = BinFileHelper.ReadInt16(fs);
                    try
                    {
                        pyArray[i] = PinYinDic[idx];
                    }
                    catch
                    {
                        pyArray[i] = "--";
                    }
                }
                for (int i = 0; i < samePyCount; i++)
                {
                    short wordByteLength = BinFileHelper.ReadInt16(fs);
                    var   wordArray      = new byte[wordByteLength];
                    fs.Read(wordArray, 0, wordByteLength);
                    string word    = Encoding.Unicode.GetString(wordArray);
                    short  count   = BinFileHelper.ReadInt16(fs);
                    short  count2  = BinFileHelper.ReadInt16(fs);
                    int    unknown = BinFileHelper.ReadInt32(fs); //不知道干啥的
                    if (pyArray.Length == word.Length)
                    {
                        var wl = new WordLibrary {
                            Rank = count, Word = word, PinYin = pyArray
                        };
                        pyAndWord.Add(wl);
                    }
                    else
                    {
                        Debug.WriteLine("Error data: word:[" + word + "] pinyin:[" + string.Join(",", pyArray) + "]");
                    }
                    CurrentStatus++;
                }
            }
            return(pyAndWord);
        }
Example #53
0
 public WordLibraryList ImportLine(string line)
 {
     string py = line.Split(' ')[0];
     string word = line.Split(' ')[1];
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Rank = 1;
     wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
Example #54
0
 public string Export(WordLibraryList wlList)
 {
     var sb = new StringBuilder();
     sb.Append(
         "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n<ns1:Dictionary xmlns:ns1=\"http://www.microsoft.com/ime/dctx\">");
     sb.Append(
         @"<ns1:DictionaryHeader>
     <ns1:DictionaryGUID>{" + Guid.NewGuid() + @"}</ns1:DictionaryGUID>
     <ns1:DictionaryLanguage>zh-cn</ns1:DictionaryLanguage>
     <ns1:FormatVersion>0</ns1:FormatVersion>
     <ns1:DictionaryVersion>1</ns1:DictionaryVersion>
     <ns1:DictionaryInfo Language=""zh-cn"">
       <ns1:ShortName>深蓝词库</ns1:ShortName>
       <ns1:LongName>深蓝词库转换而成</ns1:LongName>
       <ns1:Description>Dictionary for IME</ns1:Description>
       <ns1:Copyright>深蓝词库转换</ns1:Copyright>
       <ns1:CommentHeader1>CommentTitle1</ns1:CommentHeader1>
       <ns1:CommentHeader2>CommentTitle1</ns1:CommentHeader2>
       <ns1:CommentHeader3>CommentTitle1</ns1:CommentHeader3>
     </ns1:DictionaryInfo>
     <ns1:DictionaryInfo Language=""en-us"">
       <ns1:ShortName>Shenlan</ns1:ShortName>
       <ns1:LongName>Shenlan</ns1:LongName>
       <ns1:Description>Shenlan</ns1:Description>
       <ns1:Copyright>Shenlan</ns1:Copyright>
       <ns1:CommentHeader1>CommentTitle1</ns1:CommentHeader1>
       <ns1:CommentHeader2>CommentTitle1</ns1:CommentHeader2>
       <ns1:CommentHeader3>CommentTitle1</ns1:CommentHeader3>
     </ns1:DictionaryInfo>
     <ns1:ContentCategory>Genral</ns1:ContentCategory>
     <ns1:DictionaryType>Conversion</ns1:DictionaryType>
     <ns1:SourceURL>
     </ns1:SourceURL>
     <ns1:CommentInsertion>true</ns1:CommentInsertion>
     <ns1:IconID>25</ns1:IconID>
       </ns1:DictionaryHeader>
     ");
     for (int i = 0; i < wlList.Count; i++)
     {
         try
         {
             sb.Append(ExportLine(wlList[i]));
             sb.Append("\r\n");
         }
         catch
         {
             continue;
         }
     }
     sb.Append("</ns1:Dictionary>");
     return sb.ToString();
 }
Example #55
0
        public WordLibraryList ImportLine(string line)
        {
            string[] wp = line.Split('\t');

            string word = wp[0];
            var wl = new WordLibrary();
            wl.Word = word;
            wl.Count = Convert.ToInt32(wp[1]);
            wl.PinYin = new string[] {};
            var wll = new WordLibraryList();
            wll.Add(wl);
            return wll;
        }
Example #56
0
 public WordLibraryList ImportLine(string line)
 {
     string[] lineArray = line.Split('\t');
     string py = lineArray[1];
     string word = lineArray[0];
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Count = Convert.ToInt32(lineArray[2]);
     wl.PinYin = py.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
Example #57
0
 public IList<string> Export(WordLibraryList wlList)
 {
     var sb = new StringBuilder();
     sb.Append("名称=用户词库\r\n");
     sb.Append("作者=深蓝词库转换\r\n");
     sb.Append("编辑=1\r\n\r\n");
     for (int i = 0; i < wlList.Count; i++)
     {
         sb.Append(ExportLine(wlList[i]));
         sb.Append("\r\n");
     }
     return new List<string>() { sb.ToString() };
 }
Example #58
0
 public WordLibraryList Import(string path)
 {
     var wll = new WordLibraryList();
     string txt = ParseQpyd(path);
     foreach (string line in txt.Split('\n'))
     {
         if (line != "")
         {
             wll.AddWordLibraryList(ImportLine(line));
         }
     }
     return wll;
 }
Example #59
0
 public WordLibraryList ImportLine(string line)
 {
     string[] c = line.Split(' ');
     var wl = new WordLibrary();
     string code = c[0];
     wl.Word = c[1];
     wl.Rank = DefaultRank;
     wl.SetCode(CodeType.Cangjie, pyGenerater.GetCodeOfString(wl.Word));
     wl.SetCode(CodeType, code);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
Example #60
0
 public WordLibraryList ImportText(string str)
 {
     var wlList = new WordLibraryList();
     string[] lines = str.Split(new[] {'\r','\n'}, StringSplitOptions.RemoveEmptyEntries);
     CountWord = lines.Length;
     for (int i = 1; i < lines.Length; i++)
     {
         string line = lines[i];
         CurrentStatus = i;
         wlList.AddWordLibraryList(ImportLine(line));
     }
     return wlList;
 }