Пример #1
0
        public void InternetDownloadTextFileAndChangeEncoding()
        {
            MyHttpGet      http = new MyHttpGet();
            HttpStatusCode statusCode;

            //Downloaded, writing [11771] bytes to [C:\Users\albert\AppData\Local\Temp\tmp71F7.tmp]

            // this one has no encoding but is actually big 5
            string url = "http://cdp.sinica.edu.tw/";

            string html = http.DownloadTextFile(url, out statusCode);

            html = Regex.Replace(html, @"\s", "");
            html = Regex.Replace(html, @"[a-zA-Z0-9\<\>/="";:.]", "");

            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 500));
            Console.WriteLine("################################################################################################################");

            html = http.DownloadTextFile(url, Encoding.GetEncoding("big5"), out statusCode);
            html = Regex.Replace(html, @"\s", "");
            html = Regex.Replace(html, @"[a-zA-Z0-9\<\>/="";:.]", "");

            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 500));
            Console.WriteLine("################################################################################################################");
        }
Пример #2
0
        public void Test404Localhost()
        {
            MyHttpGet      http = new MyHttpGet();
            HttpStatusCode statusCode;

            string url  = "http://localhost/wow.html";
            string html = http.DownloadTextFile(url, out statusCode);

            html = Regex.Replace(html, @"\s", "");

            Console.WriteLine(url);
            Console.WriteLine(html);

            Assert.AreEqual(HttpStatusCode.NotFound, statusCode);
            Assert.IsTrue(html.Contains("404"));
        }
Пример #3
0
        public void InternetTest404Big5()
        {
            MyHttpGet      http = new MyHttpGet();
            HttpStatusCode statusCode;

            string url  = "http://www.edu.tw/EDU_WEB/EDU_MGT/MANDR/EDU6300001/bbs/1-4-2/1-4-2.html";
            string html = http.DownloadTextFile(url, out statusCode);

            html = Regex.Replace(html, @"\s", "");

            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html);
            Console.WriteLine("################################################################################################################");

            Assert.AreEqual(HttpStatusCode.NotFound, statusCode);
            Assert.IsTrue(html.Contains("charset=big5"));
            Assert.IsTrue(html.Contains("404"));
            Assert.IsTrue(html.Contains("找不到檔案或目錄"));
            Assert.IsTrue(http.IsError(statusCode));
        }
Пример #4
0
        public void InternetDownloadTextFiles2()
        {
            MyHttpGet      http = new MyHttpGet();
            HttpStatusCode statusCode;

            var theRegex = @"[a-zA-Z0-9\<\>/="";:.\(\)\[\]\$\-!,_{}'\#\+\&\*\?\|\\\%]";

            string html;
            string url;

            url  = "http://www.cnn.co.jp/";
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 100));
            Console.WriteLine("################################################################################################################");

            url  = "http://arabic.cnn.com/";
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 100));
            Console.WriteLine("################################################################################################################");

            url  = "http://www8.hp.com/th/en/hp-news/article_detail.html?compURI=tcm:120-479498-16";
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 100));
            Console.WriteLine("################################################################################################################");

            url  = "http://te.wikipedia.org/wiki/%E0%B0%87%E0%B0%9C%E0%B1%8D%E0%B0%B0%E0%B0%BE%E0%B0%AF%E0%B0%BF%E0%B0%B2%E0%B1%8D";
            url  = Uri.UnescapeDataString(url);
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Trace.WriteLine(html);
            Console.WriteLine("################################################################################################################");

            url  = "http://www.google.cn";
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html);                //.Substring( 0, 100 ) ) ;
            Console.WriteLine("################################################################################################################");

            url  = "http://www.thehinditimes.com/BusinessNews.aspx?Region=Canada";
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 100));
            Console.WriteLine("################################################################################################################");
        }
Пример #5
0
        public void InternetDownloadTextFiles1()
        {
            MyHttpGet      http = new MyHttpGet();
            HttpStatusCode statusCode;

            string html;
            string url;

            var theRegex = @"[a-zA-Z0-9\<\>/="";:.\(\)\[\]\$\-!,_{}'\#\+\&\*\?\|\\\%]";

            url  = "http://cn.yahoo.com";
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            //html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 100));
            Console.WriteLine("################################################################################################################");

            url  = "http://www.bbc.co.uk";
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            //html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 100));
            Console.WriteLine("################################################################################################################");

            url  = "http://www.microsoft.com";
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            //html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 100));
            Console.WriteLine("################################################################################################################");

            url  = "http://www.medjugorje.hr/hr";
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            //html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 200));
            Console.WriteLine("################################################################################################################");

            url  = "http://www.jpostlite.co.il";
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 100));
            Console.WriteLine("################################################################################################################");

            url  = "http://club.japantimes.co.jp/jt";             // japanese
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 100));
            Console.WriteLine("################################################################################################################");

            url  = "http://welcome.hp.com/country/jp/ja/cs/home.html";
            html = http.DownloadTextFile(url, out statusCode);
            html = Regex.Replace(html, @"\s", "");
            html = Regex.Replace(html, theRegex, "");
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(url);
            Console.WriteLine("----------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(html.Substring(0, 100));
            Console.WriteLine("################################################################################################################");
        }