private void DownC() { pcatvList = ReadFile(); pcatvList.RemoveAll(x => x.level == 2); var curl = txt_url.Text + year; var pList = Clone <pcatv>(pcatvList).Where(x => x.parentNo == "" && x.level == 1 && x.flag == "p" && !string.IsNullOrWhiteSpace(x.nextUrl)); if (!pList.Any()) { return; } foreach (pcatv p in pList) { var ccurl = curl + "/" + p.nextUrl; var html = getPage(ccurl); Regex re = new Regex("<a .*?href=['\"](.*?)['\"].*?>(.*?)</a></td><td><a .*?href=['\"].*?['\"].*?>(.*?)</a>"); Match q = re.Match(html); while (q.Success) { pcatv c = new pcatv(); c.flag = "c"; c.level = 2; c.nextUrl = q.Groups[1].ToString(); c.name = q.Groups[3].ToString(); c.no = q.Groups[2].ToString(); c.parentNo = p.no; pcatvList.Add(c); q = q.NextMatch(); } } CreateFile(pcatvList); }
private void DownV() { pcatvList = ReadFile(); pcatvList.RemoveAll(x => x.level == 5); var curl = txt_url.Text + year; var cTown = Clone <pcatv>(pcatvList).Where(x => x.level == 4 && x.flag == "t" && !string.IsNullOrWhiteSpace(x.nextUrl)); if (!cTown.Any()) { return; } rtb_msg.AppendText("共:" + cTown.Count() + "行 \n"); foreach (pcatv c in cTown) { rtb_msg.AppendText("/" + c.nextUrl + "\n"); var ccurl = curl + "/" + c.nextUrl; var html = getPage(ccurl); Regex re = new Regex(@"<tr class='villagetr'><td>(\d+)</td><td>(.*?)</td></tr>"); Match q = re.Match(html); while (q.Success) { pcatv a = new pcatv(); a.flag = "v"; a.level = 5; a.nextUrl = ""; a.name = q.Groups[2].ToString(); a.no = q.Groups[1].ToString(); a.parentNo = c.no; pcatvList.Add(a); q = q.NextMatch(); } Regex re1 = new Regex("<a .*?href=['\"](.*?)['\"].*?>(.*?)</a></td><td><a .*?href=['\"].*?['\"].*?>(.*?)</a>"); Match q1 = re1.Match(html); while (q1.Success) { pcatv a = new pcatv(); a.flag = "v"; a.level = 5; a.nextUrl = q1.Groups[1].ToString(); a.name = q1.Groups[3].ToString(); a.no = q1.Groups[2].ToString(); a.parentNo = c.no; pcatvList.Add(a); q1 = q1.NextMatch(); } } CreateFile(pcatvList); }
private void DownP() { var geturl = url + txt_p_url.Text.Replace("{year}", year); var html = getPage(geturl); Regex re = new Regex("<a .*?href=['\"](.*?)['\"].*?>(.*?)<br/></a>"); Match q = re.Match(html); while (q.Success) { pcatv p = new pcatv(); p.flag = "p"; p.level = 1; p.nextUrl = q.Groups[1].ToString(); p.name = q.Groups[2].ToString(); p.no = p.nextUrl.Replace(".html", "0000000000"); p.parentNo = ""; pcatvList.Add(p); q = q.NextMatch(); } CreateFile(pcatvList); }