Пример #1
0
 //没有什么思路,接下来的代码写得乱七八糟的,但是好像还是对的。zengyi20101114
 public string BuildWLString(WordLibrary wl)
 {
     string py = "", cp = "";
     var sb = new StringBuilder();
     if (ContainPinyin)
     {
         CodingString(wl, Factory);
         py = wl.GetPinYinString(PinyinSplitString, PinyinSplitType);
     }
     if (ContainCipin)
     {
         cp = wl.Count.ToString();
     }
     var dic = new Dictionary<int, string>();
     dic.Add(Sort[0], py);
     dic.Add(Sort[1], wl.Word);
     dic.Add(Sort[2], cp);
     var newSort = new List<int>(Sort);
     newSort.Sort();
     foreach (int x in newSort)
     {
         if (dic[x] != "")
         {
             sb.Append(dic[x] + SplitString);
         }
     }
     string str = sb.ToString();
     return str.Substring(0, str.LastIndexOf(SplitString));
 }
Пример #2
0
 public ParsePattern()
 {
     Sort = new List<int> {1, 2, 3};
     sample = new WordLibrary();
     sample.Count = 1234;
     sample.Word = "深蓝词库转换";
     sample.PinYin = new[] {"shen", "lan", "ci", "ku", "zhuan", "huan"};
     Factory = new WordPinyinGenerater();
 }
Пример #3
0
 public ParsePattern()
 {
     Sort = new List<int> {1, 2, 3};
     sample = new WordLibrary();
     sample.Count = 1234;
     sample.Word = "深蓝词库转换";
     sample.PinYin = new[] {"shen", "lan", "ci", "ku", "zhuan", "huan"};
     IsPinyinFormat = true;
 }
Пример #4
0
        private void btnTest_Click(object sender, EventArgs e)
        {
            var           lines = rtbFrom.Text.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
            StringBuilder sb    = new StringBuilder();

            foreach (var line in lines)
            {
                WordLibrary wl = new WordLibrary()
                {
                    Word = line.Trim(), Count = 1234
                };
                GenerateCode(wl);
                sb.Append(SelectedParsePattern.BuildWlString(wl) + "\r\n");
            }
            rtbTo.Text = sb.ToString();
        }
Пример #5
0
        public WordLibraryList Import(string str)
        {
            WordLibraryList wlList = new WordLibraryList();
            var lines = str.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0; i < lines.Length; i++)
            {
                string line = lines[i];
                var c = line.Split('\t');

                WordLibrary wl = new WordLibrary();
                wl.Word = c[0];
                wl.Count = Convert.ToInt32(c[1]);
                wl.PinYin = c[2].Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                wlList.Add(wl);
            }
            return wlList;
        }
Пример #6
0
        public WordLibraryList Import(string str)
        {
            WordLibraryList wlList = new WordLibraryList();
            var             lines  = str.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);

            for (int i = 0; i < lines.Length; i++)
            {
                string line = lines[i];
                var    c    = line.Split('\t');

                WordLibrary wl = new WordLibrary();
                wl.Word   = c[0];
                wl.Count  = Convert.ToInt32(c[1]);
                wl.PinYin = c[2].Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                wlList.Add(wl);
            }
            return(wlList);
        }
Пример #7
0
        private void GenerateCode(WordLibrary wl)
        {
            var word = wl.Word;

            if (SelectedParsePattern.IsPinyin && SelectedParsePattern.IsPinyinFormat)
            {
                var py = pyFactory.GetCodeOfString(word, SelectedParsePattern.CodeSplitString);
                wl.PinYin = CollectionHelper.ToArray(py);
            }
            else
            {
                if (!string.IsNullOrEmpty(SelectedParsePattern.MappingTablePath))
                {
                    SelectedParsePattern.MappingTable = UserCodingHelper.GetCodingDict(SelectedParsePattern.MappingTablePath);
                }
                selfFactory.MappingDictionary  = SelectedParsePattern.MappingTable;
                selfFactory.Is1Char1Code       = SelectedParsePattern.IsPinyinFormat;
                selfFactory.MutiWordCodeFormat = SelectedParsePattern.MutiWordCodeFormat;
                wl.SetCode(CodeType.UserDefine, selfFactory.GetCodeOfString(word, SelectedParsePattern.CodeSplitString));
            }
        }
Пример #8
0
 private void btnParse_Click(object sender, EventArgs e)
 {
     if (SelectedParsePattern == null)
     {
         MessageBox.Show("请点击右上角按钮选择匹配规则");
         return;
     }
     rtbTo.Clear();
     try
     {
         string[] fromList = rtbFrom.Text.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
         foreach (string str in fromList)
         {
             string      s  = str.Trim();
             WordLibrary wl = SelectedParsePattern.BuildWordLibrary(s);
             rtbTo.AppendText(wl.ToDisplayString() + "\r\n");
         }
     }
     catch
     {
         MessageBox.Show("无法识别源内容,请确认源内容与自定义规则匹配!");
     }
 }
        private void btnConvertTest_Click(object sender, EventArgs e)
        {
            if (SelectedParsePattern == null)
            {
                MessageBox.Show("请点击右上角按钮选择匹配规则");
                return;
            }
            IWordCodeGenerater factory = null;
            if (string.IsNullOrEmpty(txbFilePath.Text))
            {
                factory = new WordPinyinGenerater();
            }
            else
            {
                factory = new SelfDefiningCodeGenerater();
                UserCodingHelper.FilePath = txbFilePath.Text;
            }
            SelectedParsePattern.Factory = factory;

            rtbTo.Clear();
            string[] fromList = rtbFrom.Text.Split(new[] {'\r', '\n'}, StringSplitOptions.RemoveEmptyEntries);
            foreach (string str in fromList)
            {
                string s = str.Trim();
                var wl = new WordLibrary {Word = s};
                string result = SelectedParsePattern.BuildWLString(wl);
                rtbTo.AppendText(result + "\r\n");
            }
        }
Пример #10
0
 public void CodingString(WordLibrary wl, IWordCodeGenerater factory)
 {
     var codes = new List<string>();
     foreach (char c in wl.Word)
     {
         string code = factory.GetCodeOfChar(c);
         codes.Add(code);
     }
     wl.PinYin = codes.ToArray();
 }
Пример #11
0
        public WordLibrary BuildWordLibrary(string line)
        {
            var wl = new WordLibrary();
            string[] strlist = line.Split(new[] {SplitString}, StringSplitOptions.RemoveEmptyEntries);
            var newSort = new List<int>(Sort);
            newSort.Sort();
            int index1 = Sort.FindIndex(i => i == newSort[0]); //最小的一个
            if (index1 == 0 && ContainPinyin) //第一个是拼音
            {
                wl.PinYinString = strlist[0];
            }
            if (index1 == 1)
            {
                wl.Word = strlist[0];
            }
            if (index1 == 2 && ContainCipin)
            {
                wl.Count = Convert.ToInt32(strlist[0]);
            }
            if (strlist.Length > 1)
            {
                int index2 = Sort.FindIndex(i => i == newSort[1]); //中间的一个
                if (index2 == 0 && ContainPinyin) //第一个是拼音
                {
                    wl.PinYinString = strlist[1];
                }
                if (index2 == 1)
                {
                    wl.Word = strlist[1];
                }
                if (index2 == 2 && ContainCipin)
                {
                    wl.Count = Convert.ToInt32(strlist[1]);
                }
            }
            if (strlist.Length > 2)
            {
                int index2 = Sort.FindIndex(i => i == newSort[2]); //最大的一个
                if (index2 == 0 && ContainPinyin) //第一个是拼音
                {
                    wl.PinYinString = strlist[2];
                }
                if (index2 == 1)
                {
                    wl.Word = strlist[2];
                }
                if (index2 == 2 && ContainCipin)
                {
                    wl.Count = Convert.ToInt32(strlist[2]);
                }
            }

            wl.PinYin = wl.PinYinString.Split(new[] {PinyinSplitString}, StringSplitOptions.RemoveEmptyEntries);
            return wl;
        }
Пример #12
0
        public string BuildWLStringSample()
        {
            var samFactory = new SelfDefiningCodeGenerater();

            samFactory.MappingDictionary = new Dictionary<char, string>()
                {
                    {'深', "shen"},
                    {'蓝', "lan"},
                    {'词', "ci"},
                    {'库', "ku"},
                    {'转', "zhuan"},
                    {'换', "huan"}
                };
            var temp = selfFactory;
            selfFactory = samFactory;
            string word = "";
            string result = "";
            List<string> codes = new List<string>();
            foreach (var c in sample.Word)
            {
                word += c;
                codes.Add(sample.PinYin[word.Length - 1]);
                var s = new WordLibrary();
                s.Count = 1234;
                s.Word = word;
                s.PinYin = codes.ToArray();
                result += BuildWLString(s) + "\r\n";
            }
            selfFactory = temp;
            return result;
        }
Пример #13
0
 //没有什么思路,接下来的代码写得乱七八糟的,但是好像还是对的。zengyi20101114
 //如果wl中提供了拼音数组,而且自定义格式也是拼音格式,那么就只转换格式即可。
 public string BuildWLString(WordLibrary wl)
 {
     string py = "", cp = "";
     var sb = new StringBuilder();
     if (ContainCode)
     {
         if (IsPinyinFormat)
         {
             py = wl.GetPinYinString(CodeSplitString, CodeSplitType);
         }
         else
         {
             selfFactory.MutiWordCodeFormat = MutiWordCodeFormat;
             py = selfFactory.GetCodeOfString(wl.Word)[0];
         }
     }
     if (ContainRank)
     {
         cp = wl.Count.ToString();
     }
     var dic = new Dictionary<int, string>();
     dic.Add(Sort[0], py);
     dic.Add(Sort[1], wl.Word);
     dic.Add(Sort[2], cp);
     var newSort = new List<int>(Sort);
     newSort.Sort();
     foreach (int x in newSort)
     {
         if (dic[x] != "")
         {
             sb.Append(dic[x] + SplitString);
         }
     }
     string str = sb.ToString();
     return str.Substring(0, str.LastIndexOf(SplitString));
 }
Пример #14
0
 private bool IsKeep(WordLibrary wordLibrary)
 {
     foreach (ISingleFilter filter in Filters)
     {
         if (!filter.IsKeep(wordLibrary))
         {
             return false;
         }
     }
     return true;
 }