public WordLibraryList Import(string path)
        {
            WordLibraryList re             = new WordLibraryList();
            FileStream      fp             = File.OpenRead(path);
            int             user_word_base = 0x2400;

            //get word num
            byte[] bytes = new byte[50];
            fp.Seek(12, SeekOrigin.Begin);
            fp.Read(bytes, 0, 4);
            int cnt = bytesToIntLittle(bytes, 0, 4);

            //get each word
            for (int i = 0; i < cnt; i++)
            {
                int cur_idx = user_word_base + i * 60;
                //get word len
                fp.Seek(cur_idx + 10, SeekOrigin.Begin);
                fp.Read(bytes, 0, 1);
                int wordLen = bytesToIntLittle(bytes, 0, 1);
                //get word
                fp.Seek(cur_idx + 12, SeekOrigin.Begin);
                fp.Read(bytes, 0, wordLen * 2);
                string word = Encoding.Unicode.GetString(bytes, 0, wordLen * 2);
                re.Add(new WordLibrary()
                {
                    Word = word, CodeType = this.CodeType,
                });
            }
            fp.Close();
            return(re);
        }
예제 #2
0
        public override WordLibraryList ImportLine(string line)
        {
            var wlList = new WordLibraryList();

            string[] strs = line.Split(' ');

            for (int i = 1; i < strs.Length; i++)
            {
                string oriWord = strs[i];
                string word    = oriWord.Replace(",", ""); //把汉字中带有逗号的都去掉逗号
                //var list = pinyinFactory.GetCodeOfString(word);
                //for (int j = 0; j < list.Count; j++)
                //{
                var wl = new WordLibrary();
                wl.Word = oriWord;
                //if (IsWubi)
                //{
                //    wl.SetCode(CodeType.Wubi, strs[0]);
                //}
                //wl.PinYin = CollectionHelper.ToArray(list);
                wl.SetCode(CodeType, strs[0]);
                wlList.Add(wl);
                //}
            }
            return(wlList);
        }
예제 #3
0
        public WordLibraryList Import(string path)
        {
            //IWordCodeGenerater pinyinFactory = new PinyinGenerater();
            IList <string> words = Parse(path);
            var            wll   = new WordLibraryList();

            foreach (string word in words)
            {
                var wl = new WordLibrary();
                //词典转换,不进行注音操作,以提高速度
                //if (IsChinese(word)) //是中文就要进行注音
                //{
                //    var list = pinyinFactory.GetCodeOfString(word);
                //    wl.PinYin = CollectionHelper.ToArray(list);
                //}
                //else
                {
                    wl.IsEnglish = true;
                }
                wl.Word  = word;
                wl.Count = DefaultRank;
                wll.Add(wl);
            }
            return(wll);
        }
예제 #4
0
        public virtual WordLibraryList ImportLine(string line)
        {
            var wlList = new WordLibraryList();
            string[] strs = line.Split(' ');

            for (int i = 1; i < strs.Length; i++)
            {
                var oriWord = strs[i];
                string word = oriWord.Replace(",", ""); //把汉字中带有逗号的都去掉逗号
                //var list = pinyinFactory.GetCodeOfString(word);
                //for (int j = 0; j < list.Count; j++)
                //{
                var wl = new WordLibrary();
                wl.Word = oriWord;
                //if (IsWubi)
                //{
                //    wl.SetCode(CodeType.Wubi, strs[0]);
                //}
                //wl.PinYin = CollectionHelper.ToArray(list);
                wl.SetCode(this.CodeType,strs[0]);
                wlList.Add(wl);
                //}
            }
            return wlList;
        }
예제 #5
0
        public WordLibraryList ImportText(string str)
        {
            var xmlDoc = new XmlDocument();

            xmlDoc.LoadXml(str);

            var         wlList = new WordLibraryList();
            XmlNodeList xns    = xmlDoc.SelectNodes("//plist/array/dict");

            CountWord = xns.Count;
            for (int i = 0; i < xns.Count; i++)
            {
                XmlNode xn    = xns[i];
                var     nodes = xn.SelectNodes("string");

                var wl = new WordLibrary();
                wl.Word = nodes[0].InnerText;
                wl.Rank = 1;
                wl.SetPinyinString(nodes[1].InnerText);
                CurrentStatus = i;
                wlList.Add(wl);
            }

            return(wlList);
        }
예제 #6
0
        public WordLibraryList ImportLine(string line)
        {
            string[] c  = line.Split('\t');
            var      wl = new WordLibrary();

            wl.Word  = c[0];
            wl.Count = DefaultRank;
            string zhuyin = c[1];
            var    pys    = new List <string>();

            foreach (string zy in zhuyin.Split(','))
            {
                try
                {
                    string py = ZhuyinHelper.GetPinyin(zy);
                    pys.Add(py);
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
            }
            wl.PinYin = pys.ToArray();
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
예제 #7
0
        public void TestGeneratePinyinThen2String()
        {
            ParsePattern parser = new ParsePattern()
            {
                IsPinyinFormat  = true,
                CodeSplitType   = BuildType.FullContain,
                CodeSplitString = "~",
                ContainCode     = true,
                ContainRank     = true,
                SplitString     = "|",
                CodeType        = CodeType.Pinyin,
                LineSplitString = "\r",
                Sort            = new List <int>()
                {
                    2, 1, 3
                }
            };
            WordLibraryList wll = new WordLibraryList();
            WordLibrary     wl  = new WordLibrary()
            {
                Word = "深蓝", Rank = 123, CodeType = CodeType.UserDefine
            };

            wl.Codes = new Code();
            wl.Codes.Add(new[] { "sn" });
            wl.Codes.Add(new[] { "ln" });
            wll.Add(wl);
            selfDefining.UserDefiningPattern = parser;
            var str = selfDefining.Export(wll);

            Assert.AreEqual(str, "深蓝|~shen~lan~|123\r");
        }
예제 #8
0
        public WordLibraryList ImportLine(string line)
        {
            if (line.Length > 0 && line[0] == ';')
            {
                return(null);
            }
            string[] sp = line.Split(' ');

            string word = sp[0];

            string[] py = new string[word.Length];
            for (var i = 0; i < word.Length; i++)
            {
                py[i] = sp[i + 1];
            }
            var wl = new WordLibrary();

            wl.Word   = word;
            wl.Count  = 1;
            wl.PinYin = py;
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
예제 #9
0
        public WordLibraryList Import(string str)
        {
            GlobalCache.CharList.Clear();
            GlobalCache.Stackes.Clear();
            GlobalCache.WordList.Clear();
            var fs = new FileStream(str, FileMode.Open, FileAccess.Read);

            ParseHeader(fs);
            TouchPalChar rootChar = TouchPalChar.Load(fs); //载入第一个字

            LoadTree(fs, rootChar);
            fs.Close();
            var wwl = new WordLibraryList();

            foreach (int i in GlobalCache.WordList.Keys)
            {
                TouchPalWord w  = GlobalCache.WordList[i];
                var          wl = new WordLibrary();
                wl.Count  = w.Count;
                wl.PinYin = w.PinYin.ToArray();
                wl.Word   = w.ChineseWord;
                //sb.AppendLine(py + "\t" + GlobalCache.WordList[i].ChineseWord + "\t" + GlobalCache.WordList[i].Count);
                wwl.Add(wl);
            }
            return(wwl);
        }
예제 #10
0
        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x10;
            var phrase_offset_start = BinFileHelper.ReadInt32(fs);
            var phrase_start        = BinFileHelper.ReadInt32(fs);
            var phrase_end          = BinFileHelper.ReadInt32(fs);
            var phrase_count        = BinFileHelper.ReadInt32(fs);

            fs.Position = phrase_offset_start;
            var offsets = ReadOffsets(fs, phrase_count);

            offsets.Add(phrase_end - phrase_start);

            fs.Position = phrase_start;
            for (var i = 0; i < phrase_count; i++)
            {
                var wl = ReadOnePhrase(fs, phrase_start + offsets[i + 1]);
                if (wl != null)
                {
                    pyAndWord.Add(wl);
                }
            }
            return(pyAndWord);
        }
예제 #11
0
        private WordLibraryList Filter(WordLibraryList wlList)
        {
            var            result  = new WordLibraryList();
            IReplaceFilter replace = null;

            if (PinyinType != PinyinType.FullPinyin)
            {
                replace = new ShuangpinReplacer(PinyinType);
            }
            foreach (var wl in wlList)
            {
                if (replace != null)
                {
                    replace.Replace(wl);
                }

                if (wl.GetPinYinLength() > 32)
                {
                    continue;
                }
                if (wl.Word.Length > 64)
                {
                    continue;
                }

                result.Add(wl);
            }
            return(result);
        }
예제 #12
0
 public WordLibraryList ImportLine(string line)
 {
     var wlList = new WordLibraryList();
     WordLibrary wl = UserDefiningPattern.BuildWordLibrary(line);
     wlList.Add(wl);
     return wlList;
 }
예제 #13
0
        //private IWordCodeGenerater pyGenerater=new PinyinGenerater();
        public override WordLibraryList ImportLine(string line)
        {
            string[] lineArray = line.Split('\t');

            string word = lineArray[0];
            string code = lineArray[1];
            var    wl   = new WordLibrary();

            wl.Word = word;
            wl.Rank = Convert.ToInt32(lineArray[2]);
            if (CodeType == CodeType.Pinyin)
            {
                wl.PinYin = code.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            }
            else
            {
                //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word));
                wl.SetCode(CodeType, code);
            }


            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
예제 #14
0
        public WordLibraryList ImportText(string str)
        {
            var xmlDoc = new XmlDocument();

            xmlDoc.LoadXml(str);
            var namespaceManager = new XmlNamespaceManager(xmlDoc.NameTable);

            namespaceManager.AddNamespace("ns1", "http://www.microsoft.com/ime/dctx");
            var         wlList = new WordLibraryList();
            XmlNodeList xns    = xmlDoc.SelectNodes("//ns1:Dictionary/ns1:DictionaryEntry", namespaceManager);

            CountWord = xns.Count;
            for (int i = 0; i < xns.Count; i++)
            {
                XmlNode xn   = xns[i];
                string  py   = xn.SelectSingleNode("ns1:InputString", namespaceManager).InnerText;
                string  word = xn.SelectSingleNode("ns1:OutputString", namespaceManager).InnerText;
                var     wl   = new WordLibrary();
                wl.Word       = word;
                wl.Rank       = 1;
                wl.PinYin     = py.Split(new[] { ' ', '1', '2', '3', '4' }, StringSplitOptions.RemoveEmptyEntries);
                CurrentStatus = i;
                wlList.Add(wl);
            }

            return(wlList);
        }
예제 #15
0
        public WordLibraryList ImportLine(string line)
        {
            var         wlList = new WordLibraryList();
            WordLibrary wl     = BuildWordLibrary(line);

            wlList.Add(wl);
            return(wlList);
        }
        public WordLibraryList Import(string path)
        {
            int endPosition     = 0;
            var wordLibraryList = new WordLibraryList();
            var fs = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position   = 0x60;
            endPosition   = BinFileHelper.ReadInt32(fs);
            fs.Position   = 0x350;
            CurrentStatus = 0;
            do
            {
                //CurrentStatus++;
                try
                {
                    WordLibrary wl = ImportWord(fs);
                    if (wl == null)
                    {
                        break;
                    }
                    if (wl.Word != "" && wl.PinYin.Length > 0)
                    {
                        wordLibraryList.Add(wl);
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
            } while (fs.Position != endPosition); //< fs.Length
            fs.Close();
            //StreamWriter sw=new StreamWriter("D:\\py.txt",true,Encoding.Unicode);
            //SinglePinyin singlePinyin=new SinglePinyin();

            //foreach (var cpy in CharAndPinyin)
            //{
            //    var py = "";
            //    try
            //    {
            //        py = singlePinyin.GetPinYinOfChar(cpy.Key)[0];
            //    }
            //    catch
            //    {
            //        Debug.Write(cpy.Key);
            //    }
            //    sw.WriteLine(cpy.Key+"\t"+ py+"\t"+cpy.Value);
            //}
            //sw.Close();

            //wordLibraryList.ForEach(delegate(WordLibrary wl) { if(wl.Word==""||wl.PinYin.Length==0)
            //{
            //    Debug.WriteLine(wl.ToDisplayString());
            //}
            //});

            return(wordLibraryList);
        }
예제 #17
0
 /// <summary>
 /// 将一行纯文本转换为对象
 /// </summary>
 /// <param name="line"></param>
 /// <returns></returns>
 public virtual WordLibraryList ImportLine(string line)
 {
     var py = pinyinFactory.GetCodeOfString(line);
     var wl = new WordLibrary();
     wl.Word = line;
     wl.PinYin = ToArray(py);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
예제 #18
0
 /// <summary>
 ///     将一行纯文本转换为对象
 /// </summary>
 /// <param name="line"></param>
 /// <returns></returns>
 public virtual WordLibraryList ImportLine(string line)
 {
     //IList<string> py = pinyinFactory.GetCodeOfString(line);
     var wl = new WordLibrary();
     wl.Word = line;
     wl.CodeType = CodeType;
     //wl.PinYin = CollectionHelper.ToArray(py);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
예제 #19
0
 public WordLibraryList ImportLine(string line)
 {
     string[] c = line.Split('\t');
     var wl = new WordLibrary();
     wl.Word = c[0];
     wl.Rank = Convert.ToInt32(c[2]);
     wl.PinYin = c[1].Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
예제 #20
0
 public WordLibraryList ImportLine(string line)
 {
     string py = line.Split(' ')[0];
     string word = line.Split(' ')[1];
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Rank = 1;
     wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
예제 #21
0
        //4字节使用同一个拼音的词条数x,2字节拼音长度n,n字节拼音的编号,(2字节汉字的长度y,y*2字节汉字的内容Unicode编码,2字节词频,2字节未知,4字节未知)*x

        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position   = 0x18;
            CountWord     = BinFileHelper.ReadInt32(fs);
            CurrentStatus = 0;
            fs.Position   = 0x30;

            while (CurrentStatus < CountWord)
            {
                int   samePyCount = BinFileHelper.ReadInt16(fs);
                int   unkown1     = BinFileHelper.ReadInt16(fs);
                short pyLength    = BinFileHelper.ReadInt16(fs);
                var   pyArray     = new string[pyLength / 2];
                for (int i = 0; i < pyLength / 2; i++)
                {
                    short idx = BinFileHelper.ReadInt16(fs);
                    try
                    {
                        pyArray[i] = PinYinDic[idx];
                    }
                    catch
                    {
                        pyArray[i] = "--";
                    }
                }
                for (int i = 0; i < samePyCount; i++)
                {
                    short wordByteLength = BinFileHelper.ReadInt16(fs);
                    var   wordArray      = new byte[wordByteLength];
                    fs.Read(wordArray, 0, wordByteLength);
                    string word    = Encoding.Unicode.GetString(wordArray);
                    short  count   = BinFileHelper.ReadInt16(fs);
                    short  count2  = BinFileHelper.ReadInt16(fs);
                    int    unknown = BinFileHelper.ReadInt32(fs); //不知道干啥的
                    if (pyArray.Length == word.Length)
                    {
                        var wl = new WordLibrary {
                            Rank = count, Word = word, PinYin = pyArray
                        };
                        pyAndWord.Add(wl);
                    }
                    else
                    {
                        Debug.WriteLine("Error data: word:[" + word + "] pinyin:[" + string.Join(",", pyArray) + "]");
                    }
                    CurrentStatus++;
                }
            }
            return(pyAndWord);
        }
예제 #22
0
        /// <summary>
        /// 将一行纯文本转换为对象
        /// </summary>
        /// <param name="line"></param>
        /// <returns></returns>
        public virtual WordLibraryList ImportLine(string line)
        {
            var py = pinyinFactory.GetCodeOfString(line);
            var wl = new WordLibrary();

            wl.Word   = line;
            wl.PinYin = ToArray(py);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
예제 #23
0
 public WordLibraryList ImportLine(string line)
 {
     string[] lineArray = line.Split('\t');
     string py = lineArray[1];
     string word = lineArray[0];
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Count = Convert.ToInt32(lineArray[2]);
     wl.PinYin = py.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
예제 #24
0
        public WordLibraryList ImportLine(string line)
        {
            string[] wp = line.Split('\t');

            string word = wp[0];
            var wl = new WordLibrary();
            wl.Word = word;
            wl.Count = Convert.ToInt32(wp[1]);
            wl.PinYin = new string[] {};
            var wll = new WordLibraryList();
            wll.Add(wl);
            return wll;
        }
        /// <summary>
        ///     将一行纯文本转换为对象
        /// </summary>
        /// <param name="line"></param>
        /// <returns></returns>
        public virtual WordLibraryList ImportLine(string line)
        {
            //IList<string> py = pinyinFactory.GetCodeOfString(line);
            var wl = new WordLibrary();

            wl.Word     = line;
            wl.CodeType = CodeType;
            //wl.PinYin = CollectionHelper.ToArray(py);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
예제 #26
0
 public WordLibraryList ImportLine(string line)
 {
     string[] c = line.Split(' ');
     var wl = new WordLibrary();
     string code = c[0];
     wl.Word = c[1];
     wl.Rank = DefaultRank;
     wl.SetCode(CodeType.Cangjie, pyGenerater.GetCodeOfString(wl.Word));
     wl.SetCode(CodeType, code);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
예제 #27
0
        public WordLibraryList ImportLine(string line)
        {
            string[] c  = line.Split('\t');
            var      wl = new WordLibrary();

            wl.Word   = c[0];
            wl.Rank   = Convert.ToInt32(c[2]);
            wl.PinYin = c[1].Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
예제 #28
0
 public WordLibraryList ImportLine(string line)
 {
     string[] c = line.Split(' ');
     var wl = new WordLibrary();
     string code = c[0];
     wl.Word = c[1];
     wl.Count = DefaultRank;
     wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word));
     wl.AddCode(CodeType, code);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
예제 #29
0
        public WordLibraryList ImportLine(string line)
        {
            string[] sp = line.Split(',');

            string word = sp[0];
            int count = Convert.ToInt32(sp[1]);
            var wl = new WordLibrary();
            wl.Word = word;
            wl.Rank = count;
            wl.PinYin = new string[] {};
            var wll = new WordLibraryList();
            wll.Add(wl);
            return wll;
        }
예제 #30
0
        public WordLibraryList ImportLine(string line)
        {
            string[] sp = line.Split(' ');
            string py = sp[1];
            string word = sp[0];

            var wl = new WordLibrary {CodeType = CodeType.Pinyin};
            wl.Word = word;
            wl.Rank = DefaultRank;
            wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries);
            var wll = new WordLibraryList();
            wll.Add(wl);
            return wll;
        }
예제 #31
0
        public WordLibraryList ImportLine(string line)
        {
            string py   = line.Split('\t')[1];
            string word = line.Split('\t')[0];
            var    wl   = new WordLibrary();

            wl.Word   = word;
            wl.Count  = 1;
            wl.PinYin = py.Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
예제 #32
0
        public WordLibraryList Import(string path)
        {
            var pyAndWord = new WordLibraryList();
            var fs        = new FileStream(path, FileMode.Open, FileAccess.Read);

            fs.Position = 0x00;
            var headerstr = "Freeime Dictionary";
            var header    = Encoding.ASCII.GetString(BinFileHelper.ReadArray(fs, headerstr.Length));

            Debug.Assert(header.Equals(headerstr));

            DictCodeType curType;

            fs.Position = 0x23;
            var headerTypeBytes = BinFileHelper.ReadArray(fs, 4);
            var headerTypeStr   = Encoding.Unicode.GetString(headerTypeBytes);

            if (headerTypeStr.Equals("拼音"))
            {
                curType = DictCodeType.Pinyin;
            }
            else if (headerTypeStr.Equals("五笔"))
            {
                curType = DictCodeType.Wubi98;
            }
            else
            {
                throw new NotImplementedException("未知词库,请在反馈中提交文件");
            }

            var phrase_start = 0x1B620; // 'a'词条所在

            fs.Position = phrase_start;
            while (true)
            {
                var wl = ReadOnePhrase(fs, curType);
                if (wl != null)
                {
                    pyAndWord.Add(wl);
                }

                if (fs.Length == fs.Position) //文件结束
                {
                    fs.Close();
                    break;
                }
            }
            return(pyAndWord);
        }
예제 #33
0
        public override WordLibraryList ImportLine(string line)
        {
            string[] wp = line.Split('\t');

            string word = wp[0];
            var    wl   = new WordLibrary();

            wl.Word   = word;
            wl.Rank   = Convert.ToInt32(wp[1]);
            wl.PinYin = new string[] {};
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
예제 #34
0
        public WordLibraryList ImportLine(string line)
        {
            string[] lineArray = line.Split('\t');
            string   py        = lineArray[1];
            string   word      = lineArray[0];
            var      wl        = new WordLibrary();

            wl.Word   = word;
            wl.Count  = Convert.ToInt32(lineArray[2]);
            wl.PinYin = py.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
예제 #35
0
 public WordLibraryList ImportLine(string line)
 {
     string code = line.Split(' ')[0];
     string word = line.Split(' ')[1];
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Count = DefaultRank;
     wl.PinYin = ToArray(pinyinFactory.GetCodeOfString(word));
     var wll = new WordLibraryList();
     if (wl.PinYin.Length > 0)
     {
         wll.Add(wl);
     }
     return wll;
 }
예제 #36
0
 public WordLibraryList Filter(WordLibraryList list)
 {
     if (Percentage == 100)
     {
         return list;
     }
     int count = list.Count*Percentage/100;
     list.Sort((a, b) => a.Rank - b.Rank);
     var result = new WordLibraryList();
     for (int i = 0; i < count; i++)
     {
         result.Add(list[i]);
     }
     return result;
 }
예제 #37
0
 public WordLibraryList ImportLine(string line)
 {
     line = line.Split(',')[0]; //如果有逗号,就只取第一个
     string[] sp = line.Split(' ');
     string py = sp[0];
     string word = sp[1];
     int count = Convert.ToInt32(sp[2]);
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Rank = count;
     wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
        public override WordLibraryList ImportLine(string line)
        {
            string[] c    = line.Split(' ');
            var      wl   = new WordLibrary();
            string   code = c[0];

            wl.Word = c[1];
            wl.Rank = DefaultRank;
            wl.SetCode(CodeType.Cangjie, pyGenerater.GetCodeOfString(wl.Word));
            wl.SetCode(CodeType, code);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
        private WordLibraryList Filter(WordLibraryList wlList)
        {
            var result = new WordLibraryList();

            foreach (var wl in wlList)
            {
                if (wl.Word.Length > 12 || wl.Word.Length == 1)//最多支持12个字
                {
                    continue;
                }

                result.Add(wl);
            }
            return(result);
        }
예제 #40
0
        public WordLibraryList ImportLine(string line)
        {
            string[] c    = line.Split(' ');
            var      wl   = new WordLibrary();
            string   code = c[0];

            wl.Word   = c[1];
            wl.Count  = DefaultRank;
            wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(wl.Word));
            wl.SetCode(CodeType, code);
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
예제 #41
0
        public WordLibraryList ImportLine(string line)
        {
            string[] sp = line.Split(',');

            string word  = sp[0];
            int    count = Convert.ToInt32(sp[1]);
            var    wl    = new WordLibrary();

            wl.Word   = word;
            wl.Count  = count;
            wl.PinYin = new string[] {};
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
예제 #42
0
 public override WordLibraryList ImportLine(string line)
 {
     if (line.IndexOf("'") == 0)
     {
         string py   = line.Split(' ')[0];
         string word = line.Split(' ')[1];
         var    wl   = new WordLibrary();
         wl.Word   = word;
         wl.Rank   = 1;
         wl.PinYin = py.Split(new[] { '\'' }, StringSplitOptions.RemoveEmptyEntries);
         var wll = new WordLibraryList();
         wll.Add(wl);
         return(wll);
     }
     return(null);
 }
예제 #43
0
        public WordLibraryList ImportLine(string line)
        {
            var wl = new WordLibrary();

            wl.Word      = line.Split('\t')[1];
            wl.CodeType  = CodeType;
            wl.IsEnglish = IsEnglish(wl.Word);
            if (wl.IsEnglish)
            {
                wl.SetCode(CodeType.English, wl.Word);
            }
            var wll = new WordLibraryList();

            wll.Add(wl);
            return(wll);
        }
        public void TestExport1()
        {
            WordLibraryList wl  = new WordLibraryList();
            var             wl1 = new WordLibrary()
            {
                Word = "曾毅曾诚", PinYin = new string[] { "zeng", "yi", "zeng", "cheng" }, CodeType = CodeType.Pinyin
            };

            wl.Add(wl1);

            var export = new Win10MsPinyinSelfStudy();
            //export.ExportFilePath = "c:\\Temp\\win10selfstudy5.dat";
            var filePath = export.Export(wl);

            Debug.WriteLine(filePath[0]);
        }
예제 #45
0
 public WordLibraryList ImportLine(string line)
 {
     string code = line.Split(' ')[0];
     string word = line.Split(' ')[1];
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Rank = DefaultRank;
     wl.SetCode(CodeType.Wubi, code);
     //wl.PinYin = CollectionHelper.ToArray(pinyinFactory.GetCodeOfString(word));
     var wll = new WordLibraryList();
     if (wl.PinYin.Length > 0)
     {
         wll.Add(wl);
     }
     return wll;
 }
예제 #46
0
        //private IWordCodeGenerater pyGenerater = new PinyinGenerater();
        public WordLibraryList ImportLine(string str)
        {
            var list = new WordLibraryList();

            string[] words = str.Split(' ');
            for (int i = 1; i < words.Length; i++)
            {
                string word = words[i];
                var    wl   = new WordLibrary();
                wl.Word  = word;
                wl.Count = DefaultRank;
                wl.SetCode(CodeType, words[0]);
                //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(word));
                list.Add(wl);
            }
            return(list);
        }
예제 #47
0
        public WordLibraryList Import(string str)
        {
            WordLibraryList wlList = new WordLibraryList();
            var lines = str.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0; i < lines.Length; i++)
            {
                string line = lines[i];
                var c = line.Split('\t');

                WordLibrary wl = new WordLibrary();
                wl.Word = c[0];
                wl.Count = Convert.ToInt32(c[1]);
                wl.PinYin = c[2].Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                wlList.Add(wl);
            }
            return wlList;
        }
        public void TestExportExtCodeLots()
        {
            string str="深蓝词库转换测试代码";
            var list = new WordLibraryList();
            var ts = "";
            foreach (var c in str)
            {
                ts += c;
                list.Add(new WordLibrary() {Count = 10, IsEnglish = false, Word = ts});
            }

            export.UserDefiningPattern = InitPattern();
            export.UserDefiningPattern.MappingTablePath = "Test\\array30.txt";
            var x = export.Export(list);
            Debug.WriteLine(x);
            Assert.IsNotNullOrEmpty(str);
        }
예제 #49
0
 public WordLibraryList ImportLine(string line)
 {
     string[] sp = line.Split('\t');
     string word = sp[0];
     string py = sp[1];
     int count = 1;
     var wl = new WordLibrary();
     wl.Word = word;
     wl.Rank = count;
     wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries);
     var wll = new WordLibraryList();
     if (!string.IsNullOrEmpty(py))
     {
         wll.Add(wl);
     }
     return wll;
 }
예제 #50
0
 public virtual WordLibraryList ImportLine(string line)
 {
     var wlList = new WordLibraryList();
     string[] strs = line.Split(' ');
     for (int i = 1; i < strs.Length; i++)
     {
         string word = strs[i].Replace(",", ""); //把汉字中带有逗号的都去掉逗号
         var list = pinyinFactory.GetCodeOfString(word);
         for (int j = 0; j < list.Count; j++)
         {
             var wl = new WordLibrary();
             wl.Word = word;
             wl.PinYin = ToArray(list);
             wlList.Add(wl);
         }
     }
     return wlList;
 }
예제 #51
0
        public WordLibraryList ImportLine(string line)
        {
            if (line.Length > 0 && line[0] == ';')
                return null;
            string[] sp = line.Split(' ');

            string word = sp[0];
            var py = new string[word.Length];
            for (int i = 0; i < word.Length; i++)
            {
                py[i] = sp[i + 1];
            }
            var wl = new WordLibrary();
            wl.Word = word;
            wl.Rank = 1;
            wl.PinYin = py;
            var wll = new WordLibraryList();
            wll.Add(wl);
            return wll;
        }
예제 #52
0
        public WordLibraryList ImportLine(string line)
        {
            var wll = new WordLibraryList();
            try
            {
                string py = line.Split(' ')[1];
                string word = line.Split(' ')[0];
                var wl = new WordLibrary();
                wl.Word = word;
                wl.Rank = 1;
                wl.PinYin = py.Split(new[] {'|'}, StringSplitOptions.RemoveEmptyEntries);

                wll.Add(wl);
            }
            catch (Exception ex)
            {
                Debug.WriteLine(line + "\t" + ex.Message);
            }
            return wll;
        }
예제 #53
0
        public WordLibraryList ImportLine(string line)
        {
            var wl = new WordLibrary();
            string[] array = line.Split('\t');

            wl.Word = array[0];
            if (array.Length == 2) //English
            {
                wl.IsEnglish = true;
                wl.Count = Convert.ToInt32(array[1]);
            }
            else
            {
                string py = line.Split('\t')[1];
                wl.PinYin = py.Split(new[] {'\''}, StringSplitOptions.RemoveEmptyEntries);
                wl.Count = Convert.ToInt32(array[2]);
            }

            var wll = new WordLibraryList();
            wll.Add(wl);
            return wll;
        }
예제 #54
0
 public WordLibraryList Import(string path)
 {
     IWordCodeGenerater pinyinFactory = new WordPinyinGenerater();
     IList<string> words = Parse(path);
     var wll = new WordLibraryList();
     foreach (string word in words)
     {
         var wl = new WordLibrary();
         if (IsChinese(word)) //是中文就要进行注音
         {
             var list = pinyinFactory.GetCodeOfString(word);
             wl.PinYin = ToArray(list);
         }
         else
         {
             wl.IsEnglish = true;
         }
         wl.Word = word;
         wl.Count = DefaultRank;
         wll.Add(wl);
     }
     return wll;
 }
예제 #55
0
 public WordLibraryList Import(string path)
 {
     //IWordCodeGenerater pinyinFactory = new PinyinGenerater();
     IList<string> words = Parse(path);
     var wll = new WordLibraryList();
     foreach (string word in words)
     {
         var wl = new WordLibrary();
         //词典转换,不进行注音操作,以提高速度
         //if (IsChinese(word)) //是中文就要进行注音
         //{
         //    var list = pinyinFactory.GetCodeOfString(word);
         //    wl.PinYin = CollectionHelper.ToArray(list);
         //}
         //else
         {
             wl.IsEnglish = true;
         }
         wl.Word = word;
         wl.Rank = DefaultRank;
         wll.Add(wl);
     }
     return wll;
 }
예제 #56
0
 public void TestGeneratePinyinThen2String()
 {
     ParsePattern parser = new ParsePattern()
     {
         IsPinyinFormat = true,
         CodeSplitType = BuildType.FullContain,
         CodeSplitString = "~",
         ContainCode = true,
         ContainRank = true,
         SplitString = "|",
         CodeType = CodeType.Pinyin,
         LineSplitString = "\r",
         Sort = new List<int>() { 2, 1, 3 }
     };
     WordLibraryList wll = new WordLibraryList();
     WordLibrary wl = new WordLibrary() { Word = "深蓝", Rank = 123, CodeType = CodeType.UserDefine };
     wl.Codes = new Code();
     wl.Codes.Add(new[] { "sn" });
     wl.Codes.Add( new[] { "ln" });
     wll.Add(wl);
     selfDefining.UserDefiningPattern = parser;
     var str = selfDefining.Export(wll);
     Assert.AreEqual(str, "深蓝|~shen~lan~|123\r");
 }
예제 #57
0
 //private IWordCodeGenerater pyGenerater = new PinyinGenerater();
 public WordLibraryList ImportLine(string str)
 {
     var list = new WordLibraryList();
     string[] words = str.Split(' ');
     for (int i = 1; i < words.Length; i++)
     {
         string word = words[i];
         var wl = new WordLibrary();
         wl.Word = word;
         wl.Count = DefaultRank;
         wl.AddCode(CodeType, words[0]);
         //wl.PinYin = CollectionHelper.ToArray(pyGenerater.GetCodeOfString(word));
         list.Add(wl);
     }
     return list;
 }
예제 #58
0
 public WordLibraryList ImportLine(string line)
 {
     string[] c = line.Split('\t');
     var wl = new WordLibrary();
     wl.Word = c[0];
     wl.Rank = DefaultRank;
     string zhuyin = c[1];
     var pys = new List<string>();
     foreach (string zy in zhuyin.Split(','))
     {
         try
         {
             string py = ZhuyinHelper.GetPinyin(zy);
             pys.Add(py);
         }
         catch (Exception ex)
         {
             Debug.WriteLine(ex.Message);
         }
     }
     wl.PinYin = pys.ToArray();
     var wll = new WordLibraryList();
     wll.Add(wl);
     return wll;
 }
예제 #59
0
        public WordLibraryList Import(string path)
        {
            var wordLibraryList = new WordLibraryList();
            var fs = new FileStream(path, FileMode.Open, FileAccess.Read);
            fs.Position = 0x350;
            do
            {
                try
                {
                    WordLibrary wl = ImportWord(fs);
                    if (wl.Word != "" && wl.PinYin.Length > 0)
                    {
                        wordLibraryList.Add(wl);
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex.Message);
                }
            } while (fs.Position != fs.Length);
            fs.Close();
            //StreamWriter sw=new StreamWriter("D:\\py.txt",true,Encoding.Unicode);
            //SinglePinyin singlePinyin=new SinglePinyin();

            //foreach (var cpy in CharAndPinyin)
            //{
            //    var py = "";
            //    try
            //    {
            //        py = singlePinyin.GetPinYinOfChar(cpy.Key)[0];
            //    }
            //    catch
            //    {
            //        Debug.Write(cpy.Key);
            //    }
            //    sw.WriteLine(cpy.Key+"\t"+ py+"\t"+cpy.Value);
            //}
            //sw.Close();

            //wordLibraryList.ForEach(delegate(WordLibrary wl) { if(wl.Word==""||wl.PinYin.Length==0)
            //{
            //    Debug.WriteLine(wl.ToDisplayString());
            //}
            //});

            return wordLibraryList;
        }
예제 #60
0
 private WordLibraryList Filter(WordLibraryList list)
 {
     var result = new WordLibraryList();
     foreach (WordLibrary wordLibrary in list)
     {
         if (IsKeep(wordLibrary))
         {
             result.Add(wordLibrary);
         }
     }
     return result;
 }