private InfoPoco CatchInfoAndReturnInfoPoco(string word) { string template = @"http://tw.dictionary.search.yahoo.com/search?p={0}"; var url = string.Format(template, AppHelper.UrlEncode(word)); var html = ""; var pattern = ""; using (WebClient wc = new WebClient()) { wc.Headers["Host"] = "tw.dictionary.search.yahoo.com"; wc.Headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:40.0) Gecko/20100101 Firefox/40.0"; wc.Headers["Accept"] = "*/*"; wc.Headers["Accept-Language"] = "zh-TW,zh;q=0.8,en-US;q=0.5,en;q=0.3"; wc.Headers["Referer"] = "http://tw.dictionary.search.yahoo.com/search"; wc.Headers["Origin"] = "http://tw.dictionary.search.yahoo.com"; byte[] myByte = wc.DownloadData(url); var script = Encoding.GetEncoding((int)65001).GetString(myByte, 0, myByte.Length); //chinese pattern = @"<ol class="".+?searchCenterMiddle.+?"">.+?</ol>"; html = Regex.Match(script, pattern).Value; } pattern = @"<div class=""dd algo.+?>(?<Content>.+?)</div></li>"; var matchArr = Regex.Matches(html, pattern).OfType<Match>() .Skip(1) .Reverse() .Skip(1) .Reverse();//trim head and tail var mm = new InfoPoco(word); var descriptionList = new List<string>(); foreach (Match m in matchArr) { var v = m.Groups["Content"].Value; if (v.IndexOf("pronunciation_pos", StringComparison.Ordinal) != -1) { pattern = @"<span.+?id=""pronunciation_pos"".+?>(?<Phonetic>.+?)</span>"; var phonetic = Regex.Match(v, pattern).Groups["Phonetic"].Value; mm.Phonetic = Regex.Replace(phonetic, @"<.+?>", ""); } else { descriptionList.Add(AppHelper.GetClearAttributeHtml(v)); } } var description = string.Join("", descriptionList.ToArray()); if (!string.IsNullOrEmpty(description)) { description = string.Format(Resource1.DescriptTemplate, description); mm.Description = description; } return mm; }
private void UpdateVocabularyInfo(Vocabulary vocabulary, InfoPoco infoPoco) { vocabulary.Phonetic = infoPoco.Phonetic; vocabulary.Description = infoPoco.Description; vocabulary.IsCatchInfo = true; }