Пример #1
0
        public static string parseAntaraRegex(string url)
        {
            WebClient W    = new WebClient();
            string    page = W.DownloadString(url);
            int       idx  = RegexC.regexMatch(page, "content_news") + 20;

            if (idx < 0)
            {
                Console.WriteLine("isi berita tidak ditemukan");
            }
            else
            {
                page = page.Substring(idx);
            }
            idx = RegexC.regexMatch(page, "mt10") - 10;
            if (idx < 0)
            {
                Console.WriteLine("isi berita tidak ditemukan");
            }
            else
            {
                page = page.Substring(0, idx);
            }
            idx = RegexC.regexMatch(page, "<br>");
            while (idx != -1)
            {
                string front = page.Substring(0, idx);
                string end   = page.Substring(idx + 4);
                page = front + end;
                idx  = RegexC.regexMatch(page, "<br>");
            }
            return(page);
        }
Пример #2
0
        public static string parseHTML(string url, int method)
        {
            switch (method)
            {
            case 0:
                if (KMP.kmpMatch(url, "detik.com") != -1)
                {
                    return(parseDetikKMP(url));
                }
                else if (KMP.kmpMatch(url, "tempo.co") != -1)
                {
                    return(parseTempoKMP(url));
                }
                else if (KMP.kmpMatch(url, "viva") != -1)
                {
                    return(parseVivaKMP(url));
                }
                else if (KMP.kmpMatch(url, "antara") != -1)
                {
                    return(parseAntaraKMP(url));
                }
                break;

            case 1:
                if (BM.bmMatch(url, "detik.com") != -1)
                {
                    return(parseDetikBM(url));
                }
                else if (BM.bmMatch(url, "tempo.co") != -1)
                {
                    return(parseTempoBM(url));
                }
                else if (BM.bmMatch(url, "viva") != -1)
                {
                    return(parseVivaBM(url));
                }
                else if (BM.bmMatch(url, "antara") != -1)
                {
                    return(parseAntaraBM(url));
                }
                break;

            case 2:
                if (RegexC.regexMatch(url, "detik.com") != -1)
                {
                    return(parseDetikRegex(url));
                }
                else if (RegexC.regexMatch(url, "tempo.co") != -1)
                {
                    return(parseTempoRegex(url));
                }
                else if (RegexC.regexMatch(url, "viva") != -1)
                {
                    return(parseVivaRegex(url));
                }
                else if (RegexC.regexMatch(url, "antara") != -1)
                {
                    return(parseAntaraRegex(url));
                }
                break;
            }
            return("Salah URL");
        }
Пример #3
0
        public static string parseVivaRegex(string url)
        {
            WebClient W    = new WebClient();
            string    page = W.DownloadString(url);
            int       idx  = RegexC.regexMatch(page, "article-content");

            if (idx < 0)
            {
                Console.WriteLine("isi berita tidak ditemukan");
            }
            else
            {
                page = page.Substring(idx);
            }
            idx = RegexC.regexMatch(page, "description");
            if (idx < 0)
            {
                Console.WriteLine("isi berita tidak ditemukan");
            }
            else
            {
                page = page.Substring(idx);
            }
            idx = RegexC.regexMatch(page, "<p>") + 3;
            if (idx < 0)
            {
                Console.WriteLine("isi berita tidak ditemukan");
            }
            else
            {
                page = page.Substring(idx);
            }
            idx = RegexC.regexMatch(page, "</span>");
            if (idx < 0)
            {
                Console.WriteLine("isi berita tidak ditemukan");
            }
            else
            {
                page = page.Substring(0, idx);
            }
            idx = RegexC.regexMatch(page, "<p>");
            while (idx != -1)
            {
                string front = page.Substring(0, idx);
                string end   = page.Substring(idx + 3);
                page = front + end;
                idx  = RegexC.regexMatch(page, "<p>");
            }
            idx = RegexC.regexMatch(page, "</p>");
            while (idx != -1)
            {
                string front = page.Substring(0, idx);
                string end   = page.Substring(idx + 4);
                page = front + end;
                idx  = RegexC.regexMatch(page, "</p>");
            }
            idx = RegexC.regexMatch(page, "<em>");
            while (idx != -1)
            {
                string front = page.Substring(0, idx);
                string end   = page.Substring(idx + 4);
                page = front + end;
                idx  = RegexC.regexMatch(page, "<em>");
            }
            idx = RegexC.regexMatch(page, "</em>");
            while (idx != -1)
            {
                string front = page.Substring(0, idx);
                string end   = page.Substring(idx + 5);
                page = front + end;
                idx  = RegexC.regexMatch(page, "</em>");
            }

            return(page);
        }
Пример #4
0
        public static String parseTempoRegex(string url)
        {
            WebClient W    = new WebClient();
            string    page = W.DownloadString(url);
            int       idx  = RegexC.regexMatch(page, "666666") + 6;

            if (idx < 6)
            {
                idx = RegexC.regexMatch(page, "p-artikel");
                if (idx >= 0)
                {
                    page = page.Substring(idx);
                }
                else
                {
                    Console.WriteLine("isi berita tidak ditemukan1");
                }
            }
            else
            {
                page = page.Substring(idx);
                idx  = RegexC.regexMatch(page, "666666") + 6;
                if (idx < 6)
                {
                    Console.WriteLine("isi berita tidak ditemukan2");
                }
                else
                {
                    page = page.Substring(idx);
                }
            }
            idx = RegexC.regexMatch(page, "</span>") + 10;
            if (idx < 0)
            {
                Console.WriteLine("isi berita tidak ditemukan3");
            }
            else
            {
                page = page.Substring(0, idx);
            }
            idx = RegexC.regexMatch(page, "<!-- end artikel");
            if (idx < 0)
            {
                Console.WriteLine("isi berita tidak ditemukan");
            }
            else
            {
                page = page.Substring(0, idx);
            }
            idx = RegexC.regexMatch(page, "<br />");
            while (idx != -1)
            {
                string front = page.Substring(0, idx);
                string end   = page.Substring(idx + 6);
                page = front + end;
                idx  = RegexC.regexMatch(page, "<br />");
            }
            idx = RegexC.regexMatch(page, "<em>");
            while (idx != -1)
            {
                string front = page.Substring(0, idx);
                string end   = page.Substring(idx + 4);
                page = front + end;
                idx  = RegexC.regexMatch(page, "<em>");
            }
            idx = RegexC.regexMatch(page, "</em>");
            while (idx != -1)
            {
                string front = page.Substring(0, idx);
                string end   = page.Substring(idx + 5);
                page = front + end;
                idx  = RegexC.regexMatch(page, "</em>");
            }
            idx = RegexC.regexMatch(page, "</a>");
            while (idx != -1)
            {
                string front = page.Substring(0, idx);
                string end   = page.Substring(idx + 4);
                page = front + end;
                idx  = RegexC.regexMatch(page, "</a>");
            }
            idx = RegexC.regexMatch(page, "</p>");
            while (idx != -1)
            {
                string front = page.Substring(0, idx);
                string end   = page.Substring(idx + 4);
                page = front + end;
                idx  = RegexC.regexMatch(page, "</p>");
            }

            return(page);
        }
Пример #5
0
        public static String parseDetikRegex(string url)
        {
            WebClient W    = new WebClient();
            string    page = W.DownloadString(url);
            int       idx  = RegexC.regexMatch(page, "detikdetailtext");

            if (idx < 0)
            {
                idx = RegexC.regexMatch(page, "p-artikel");
                if (idx < 0)
                {
                    Console.WriteLine("isi berita tidak ditemukan1");
                }
                else
                {
                    page = page.Substring(idx);
                }
            }
            else
            {
                page = page.Substring(idx);
            }
            idx = RegexC.regexMatch(page, "<!-- POLONG");
            if (idx < 0)
            {
                Console.WriteLine("isi berita tidak ditemukan2");
            }
            else
            {
                page = page.Substring(0, idx);
            }
            idx = RegexC.regexMatch(page, "</p>") + 4;
            if (idx < 0)
            {
                Console.WriteLine("isi berita tidak ditemukan3");
            }
            else
            {
                page = page.Substring(idx);
            }
            idx = RegexC.regexMatch(page, "<br />");
            while (idx != -1)
            {
                string front, end;
                front = page.Substring(0, idx);
                end   = page.Substring(idx + 6);
                page  = front + end;
                idx   = RegexC.regexMatch(page, "<br />");
            }
            idx = RegexC.regexMatch(page, "<br/>");
            if (idx < 0)
            {
                Console.WriteLine("isi berita tidak ditemukan4");
            }
            else
            {
                page = page.Substring(0, idx);
            }

            return(page);
        }