public static string parseAntaraRegex(string url) { WebClient W = new WebClient(); string page = W.DownloadString(url); int idx = RegexC.regexMatch(page, "content_news") + 20; if (idx < 0) { Console.WriteLine("isi berita tidak ditemukan"); } else { page = page.Substring(idx); } idx = RegexC.regexMatch(page, "mt10") - 10; if (idx < 0) { Console.WriteLine("isi berita tidak ditemukan"); } else { page = page.Substring(0, idx); } idx = RegexC.regexMatch(page, "<br>"); while (idx != -1) { string front = page.Substring(0, idx); string end = page.Substring(idx + 4); page = front + end; idx = RegexC.regexMatch(page, "<br>"); } return(page); }
public static string parseHTML(string url, int method) { switch (method) { case 0: if (KMP.kmpMatch(url, "detik.com") != -1) { return(parseDetikKMP(url)); } else if (KMP.kmpMatch(url, "tempo.co") != -1) { return(parseTempoKMP(url)); } else if (KMP.kmpMatch(url, "viva") != -1) { return(parseVivaKMP(url)); } else if (KMP.kmpMatch(url, "antara") != -1) { return(parseAntaraKMP(url)); } break; case 1: if (BM.bmMatch(url, "detik.com") != -1) { return(parseDetikBM(url)); } else if (BM.bmMatch(url, "tempo.co") != -1) { return(parseTempoBM(url)); } else if (BM.bmMatch(url, "viva") != -1) { return(parseVivaBM(url)); } else if (BM.bmMatch(url, "antara") != -1) { return(parseAntaraBM(url)); } break; case 2: if (RegexC.regexMatch(url, "detik.com") != -1) { return(parseDetikRegex(url)); } else if (RegexC.regexMatch(url, "tempo.co") != -1) { return(parseTempoRegex(url)); } else if (RegexC.regexMatch(url, "viva") != -1) { return(parseVivaRegex(url)); } else if (RegexC.regexMatch(url, "antara") != -1) { return(parseAntaraRegex(url)); } break; } return("Salah URL"); }
public static string parseVivaRegex(string url) { WebClient W = new WebClient(); string page = W.DownloadString(url); int idx = RegexC.regexMatch(page, "article-content"); if (idx < 0) { Console.WriteLine("isi berita tidak ditemukan"); } else { page = page.Substring(idx); } idx = RegexC.regexMatch(page, "description"); if (idx < 0) { Console.WriteLine("isi berita tidak ditemukan"); } else { page = page.Substring(idx); } idx = RegexC.regexMatch(page, "<p>") + 3; if (idx < 0) { Console.WriteLine("isi berita tidak ditemukan"); } else { page = page.Substring(idx); } idx = RegexC.regexMatch(page, "</span>"); if (idx < 0) { Console.WriteLine("isi berita tidak ditemukan"); } else { page = page.Substring(0, idx); } idx = RegexC.regexMatch(page, "<p>"); while (idx != -1) { string front = page.Substring(0, idx); string end = page.Substring(idx + 3); page = front + end; idx = RegexC.regexMatch(page, "<p>"); } idx = RegexC.regexMatch(page, "</p>"); while (idx != -1) { string front = page.Substring(0, idx); string end = page.Substring(idx + 4); page = front + end; idx = RegexC.regexMatch(page, "</p>"); } idx = RegexC.regexMatch(page, "<em>"); while (idx != -1) { string front = page.Substring(0, idx); string end = page.Substring(idx + 4); page = front + end; idx = RegexC.regexMatch(page, "<em>"); } idx = RegexC.regexMatch(page, "</em>"); while (idx != -1) { string front = page.Substring(0, idx); string end = page.Substring(idx + 5); page = front + end; idx = RegexC.regexMatch(page, "</em>"); } return(page); }
public static String parseTempoRegex(string url) { WebClient W = new WebClient(); string page = W.DownloadString(url); int idx = RegexC.regexMatch(page, "666666") + 6; if (idx < 6) { idx = RegexC.regexMatch(page, "p-artikel"); if (idx >= 0) { page = page.Substring(idx); } else { Console.WriteLine("isi berita tidak ditemukan1"); } } else { page = page.Substring(idx); idx = RegexC.regexMatch(page, "666666") + 6; if (idx < 6) { Console.WriteLine("isi berita tidak ditemukan2"); } else { page = page.Substring(idx); } } idx = RegexC.regexMatch(page, "</span>") + 10; if (idx < 0) { Console.WriteLine("isi berita tidak ditemukan3"); } else { page = page.Substring(0, idx); } idx = RegexC.regexMatch(page, "<!-- end artikel"); if (idx < 0) { Console.WriteLine("isi berita tidak ditemukan"); } else { page = page.Substring(0, idx); } idx = RegexC.regexMatch(page, "<br />"); while (idx != -1) { string front = page.Substring(0, idx); string end = page.Substring(idx + 6); page = front + end; idx = RegexC.regexMatch(page, "<br />"); } idx = RegexC.regexMatch(page, "<em>"); while (idx != -1) { string front = page.Substring(0, idx); string end = page.Substring(idx + 4); page = front + end; idx = RegexC.regexMatch(page, "<em>"); } idx = RegexC.regexMatch(page, "</em>"); while (idx != -1) { string front = page.Substring(0, idx); string end = page.Substring(idx + 5); page = front + end; idx = RegexC.regexMatch(page, "</em>"); } idx = RegexC.regexMatch(page, "</a>"); while (idx != -1) { string front = page.Substring(0, idx); string end = page.Substring(idx + 4); page = front + end; idx = RegexC.regexMatch(page, "</a>"); } idx = RegexC.regexMatch(page, "</p>"); while (idx != -1) { string front = page.Substring(0, idx); string end = page.Substring(idx + 4); page = front + end; idx = RegexC.regexMatch(page, "</p>"); } return(page); }
public static String parseDetikRegex(string url) { WebClient W = new WebClient(); string page = W.DownloadString(url); int idx = RegexC.regexMatch(page, "detikdetailtext"); if (idx < 0) { idx = RegexC.regexMatch(page, "p-artikel"); if (idx < 0) { Console.WriteLine("isi berita tidak ditemukan1"); } else { page = page.Substring(idx); } } else { page = page.Substring(idx); } idx = RegexC.regexMatch(page, "<!-- POLONG"); if (idx < 0) { Console.WriteLine("isi berita tidak ditemukan2"); } else { page = page.Substring(0, idx); } idx = RegexC.regexMatch(page, "</p>") + 4; if (idx < 0) { Console.WriteLine("isi berita tidak ditemukan3"); } else { page = page.Substring(idx); } idx = RegexC.regexMatch(page, "<br />"); while (idx != -1) { string front, end; front = page.Substring(0, idx); end = page.Substring(idx + 6); page = front + end; idx = RegexC.regexMatch(page, "<br />"); } idx = RegexC.regexMatch(page, "<br/>"); if (idx < 0) { Console.WriteLine("isi berita tidak ditemukan4"); } else { page = page.Substring(0, idx); } return(page); }