public void InternetDownloadTextFileAndChangeEncoding() { MyHttpGet http = new MyHttpGet(); HttpStatusCode statusCode; //Downloaded, writing [11771] bytes to [C:\Users\albert\AppData\Local\Temp\tmp71F7.tmp] // this one has no encoding but is actually big 5 string url = "http://cdp.sinica.edu.tw/"; string html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, @"[a-zA-Z0-9\<\>/="";:.]", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 500)); Console.WriteLine("################################################################################################################"); html = http.DownloadTextFile(url, Encoding.GetEncoding("big5"), out statusCode); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, @"[a-zA-Z0-9\<\>/="";:.]", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 500)); Console.WriteLine("################################################################################################################"); }
public void TestInternetConnection() { MyHttpGet http = new MyHttpGet(); bool ok = http.HasInternetConnection(); Console.WriteLine(ok); }
public void Test404Localhost() { MyHttpGet http = new MyHttpGet(); HttpStatusCode statusCode; string url = "http://localhost/wow.html"; string html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); Console.WriteLine(url); Console.WriteLine(html); Assert.AreEqual(HttpStatusCode.NotFound, statusCode); Assert.IsTrue(html.Contains("404")); }
public void InternetDownload13MegImage() { string url = @"http://imgsrc.hubblesite.org/hu/db/images/hs-2004-28-b-full_jpg.jpg"; // 13.8 MB string filename = @"c:\temp\hst2004.jpg"; File.Delete(filename); Assert.IsFalse(File.Exists(filename)); MyHttpGet http = new MyHttpGet(); HttpStatusCode statusCode; http.DownloadFile(url, filename, out statusCode); Assert.IsTrue(File.Exists(filename)); FileInfo fi = new FileInfo(filename); long expectedLength = 14537420; // 13.8 MB (14,537,420 bytes) Assert.AreEqual(expectedLength, fi.Length); }
public void InternetTest404Big5() { MyHttpGet http = new MyHttpGet(); HttpStatusCode statusCode; string url = "http://www.edu.tw/EDU_WEB/EDU_MGT/MANDR/EDU6300001/bbs/1-4-2/1-4-2.html"; string html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html); Console.WriteLine("################################################################################################################"); Assert.AreEqual(HttpStatusCode.NotFound, statusCode); Assert.IsTrue(html.Contains("charset=big5")); Assert.IsTrue(html.Contains("404")); Assert.IsTrue(html.Contains("找不到檔案或目錄")); Assert.IsTrue(http.IsError(statusCode)); }
public void TestCharsetEncoding() { MyHttpGet http = new MyHttpGet(); string htmlfile; Encoding encoding; // UTF-8 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\sinica.binary.html"; // actually big5 encoding = http.GetEncoding(htmlfile); Console.WriteLine("[{0}] [{1}]", encoding.WebName, htmlfile); Assert.AreEqual("utf-8", encoding.WebName); // UTF-8 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\bbc.binary.html"; encoding = http.GetEncoding(htmlfile); Console.WriteLine("[{0}] [{1}]", encoding.WebName, htmlfile); Assert.AreEqual("utf-8", encoding.WebName); // utf-8 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\hpjap.binary.html"; encoding = http.GetEncoding(htmlfile); Console.WriteLine("[{0}] [{1}]", encoding.WebName, htmlfile); Assert.AreEqual("utf-8", encoding.WebName); // utf-8 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\jerupost.binary.html"; encoding = http.GetEncoding(htmlfile); Console.WriteLine("[{0}] [{1}]", encoding.WebName, htmlfile); Assert.AreEqual("utf-8", encoding.WebName); // euc-jp htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\jptimes.binary.html"; encoding = http.GetEncoding(htmlfile); Console.WriteLine("[{0}] [{1}]", encoding.WebName, htmlfile); Assert.AreEqual("euc-jp", encoding.WebName); // utf-8 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\ms.binary.html"; encoding = http.GetEncoding(htmlfile); Console.WriteLine("[{0}] [{1}]", encoding.WebName, htmlfile); Assert.AreEqual("utf-8", encoding.WebName); // gb2312 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\cnnyahoo.binary.html"; encoding = http.GetEncoding(htmlfile); Console.WriteLine("[{0}] [{1}]", encoding.WebName, htmlfile); Assert.AreEqual("gb2312", encoding.WebName); // utf-8 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\medj.binary.html"; encoding = http.GetEncoding(htmlfile); Console.WriteLine("[{0}] [{1}]", encoding.WebName, htmlfile); Assert.AreEqual("utf-8", encoding.WebName); // utf-8 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\smh.binary.html"; encoding = http.GetEncoding(htmlfile); Console.WriteLine("[{0}] [{1}]", encoding.WebName, htmlfile); Assert.AreEqual("utf-8", encoding.WebName); }
public void TestBinaryToTextDecoding() { MyHttpGet http = new MyHttpGet(); string htmlfile; string html; htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\sinica.binary.html"; html = http.ConvertFileToString(htmlfile); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, @"[a-zA-Z0-9\<\>/="";:.]", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(htmlfile); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 1000)); Console.WriteLine("################################################################################################################"); htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\sinica.binary.html"; Encoding big5 = Encoding.GetEncoding("big5"); html = http.ConvertFileToString(htmlfile, big5); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, @"[a-zA-Z0-9\<\>/="";:.]", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(htmlfile); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 500)); Console.WriteLine("################################################################################################################"); htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\bbc.binary.html"; html = http.ConvertFileToString(htmlfile); html = Regex.Replace(html, @"\s", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(htmlfile); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 1000)); Console.WriteLine("################################################################################################################"); htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\hpjap.binary.html"; html = http.ConvertFileToString(htmlfile); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, @"[a-zA-Z0-9\<\>/="";:.]", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(htmlfile); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 1000)); Console.WriteLine("################################################################################################################"); htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\jerupost.binary.html"; html = http.ConvertFileToString(htmlfile); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, @"[a-zA-Z0-9\<\>/="";:.]", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(htmlfile); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 1000)); Console.WriteLine("################################################################################################################"); htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\jptimes.binary.html"; html = http.ConvertFileToString(htmlfile); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, @"[a-zA-Z0-9\<\>/="";:.]", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(htmlfile); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 1000)); Console.WriteLine("################################################################################################################"); htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\ms.binary.html"; html = http.ConvertFileToString(htmlfile); html = Regex.Replace(html, @"\s", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(htmlfile); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 1000)); Console.WriteLine("################################################################################################################"); htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\cnnyahoo.binary.html"; html = http.ConvertFileToString(htmlfile); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, @"[a-zA-Z0-9\<\>/="";:.]", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(htmlfile); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 1000)); Console.WriteLine("################################################################################################################"); htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\medj.binary.html"; html = http.ConvertFileToString(htmlfile); html = Regex.Replace(html, @"\s", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(htmlfile); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 1000)); Console.WriteLine("################################################################################################################"); htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\smh.binary.html"; html = http.ConvertFileToString(htmlfile); html = Regex.Replace(html, @"\s", ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(htmlfile); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 1000)); Console.WriteLine("################################################################################################################"); }
public void TestHtmlMetaCharsetRegex() { MyHttpGet http = new MyHttpGet(); string htmlfile; string html; string charset; // blank htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\sinica.binary.html"; html = File.ReadAllText(htmlfile, Encoding.ASCII); charset = http.GetCharsetFromMetaTag(html); Console.WriteLine("[{0}] [{1}]", charset, htmlfile); Assert.AreEqual("", charset); // UTF-8 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\bbc.binary.html"; html = File.ReadAllText(htmlfile, Encoding.ASCII); charset = http.GetCharsetFromMetaTag(html); Console.WriteLine("[{0}] [{1}]", charset, htmlfile); Assert.AreEqual("UTF-8", charset); // utf-8 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\hpjap.binary.html"; html = File.ReadAllText(htmlfile, Encoding.ASCII); charset = http.GetCharsetFromMetaTag(html); Console.WriteLine("[{0}] [{1}]", charset, htmlfile); Assert.AreEqual("utf-8", charset); // utf-8 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\jerupost.binary.html"; html = File.ReadAllText(htmlfile, Encoding.ASCII); charset = http.GetCharsetFromMetaTag(html); Console.WriteLine("[{0}] [{1}]", charset, htmlfile); Assert.AreEqual("utf-8", charset); // euc-jp htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\jptimes.binary.html"; html = File.ReadAllText(htmlfile, Encoding.ASCII); charset = http.GetCharsetFromMetaTag(html); Console.WriteLine("[{0}] [{1}]", charset, htmlfile); Assert.AreEqual("euc-jp", charset); // utf-8 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\ms.binary.html"; html = File.ReadAllText(htmlfile, Encoding.ASCII); charset = http.GetCharsetFromMetaTag(html); Console.WriteLine("[{0}] [{1}]", charset, htmlfile); Assert.AreEqual("utf-8", charset); // gb2312 htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\cnnyahoo.binary.html"; html = File.ReadAllText(htmlfile, Encoding.ASCII); charset = http.GetCharsetFromMetaTag(html); Console.WriteLine("[{0}] [{1}]", charset, htmlfile); Assert.AreEqual("gb2312", charset); // blank htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\medj.binary.html"; html = File.ReadAllText(htmlfile, Encoding.ASCII); charset = http.GetCharsetFromMetaTag(html); Console.WriteLine("[{0}] [{1}]", charset, htmlfile); Assert.AreEqual("", charset); // blank htmlfile = @"c:\albyStuff\development\alby.core.2015\alby.core.test\web\htmlPages\smh.binary.html"; html = File.ReadAllText(htmlfile, Encoding.ASCII); charset = http.GetCharsetFromMetaTag(html); Console.WriteLine("[{0}] [{1}]", charset, htmlfile); Assert.AreEqual("", charset); }
public void InternetDownloadTextFilesAsBinary() { MyHttpGet http = new MyHttpGet(); HttpStatusCode statusCode; string url; string filename; //Downloaded, writing [15635] bytes to [c:\temp\chinasea.binary.html] url = "http://www.mimosapudica.org"; //ChinaSea/index_c.html" ; // big 5 filename = @"c:\temp\chinasea.binary.html"; http.DownloadFile(url, filename, out statusCode); Assert.IsTrue(http.IsOK(statusCode)); //Downloading binary, url = [http://www.smh.com.au] //Response CharacterSet [UTF-8] //Response header [Content-Type]=[text/html; charset=UTF-8] //Downloaded binary, read [197130] bytes //Downloaded binary, writing [197130] bytes to [c:\temp\smh.binary.html] url = "http://www.smh.com.au"; filename = @"c:\temp\smh.binary.html"; http.DownloadFile(url, filename, out statusCode); Assert.IsTrue(http.IsOK(statusCode)); //Downloading binary, url = [http://cn.yahoo.com] //Response CharacterSet [GBK] //Response header [Content-Type]=[text/html; charset=GBK] //Downloaded binary, read [186647] bytes //Downloaded binary, writing [186647] bytes to [c:\temp\cnnyahoo.binary.html] url = "http://cn.yahoo.com"; filename = @"c:\temp\cnnyahoo.binary.html"; http.DownloadFile(url, filename, out statusCode); Assert.IsTrue(http.IsOK(statusCode)); //Downloading binary, url = [http://www.bbc.co.uk] //Response CharacterSet [ISO-8859-1] //Response header [Content-Type]=[text/html] //Downloaded binary, read [84407] bytes //Downloaded binary, writing [84407] bytes to [c:\temp\bbc.binary.html] url = "http://www.bbc.co.uk"; filename = @"c:\temp\bbc.binary.html"; http.DownloadFile(url, filename, out statusCode); Assert.IsTrue(http.IsOK(statusCode)); //Downloading binary, url = [http://www.microsoft.com] //Response CharacterSet [utf-8] //Response ContentLength [92704] //Response header [Content-Type]=[text/html; charset=utf-8] //Downloaded binary, read [92704] bytes //Downloaded binary, writing [92704] bytes to [c:\temp\ms.binary.html] url = "http://www.microsoft.com"; filename = @"c:\temp\ms.binary.html"; http.DownloadFile(url, filename, out statusCode); Assert.IsTrue(http.IsOK(statusCode)); //Downloading binary, url = [http://www.medjugorje.hr/hr] //Response CharacterSet [utf-8] //Response ContentLength [18812] //Response header [Content-Length]=[18812] //Response header [Content-Type]=[text/html; charset=utf-8] //Downloaded binary, read [18812] bytes //Downloaded binary, writing [18812] bytes to [c:\temp\medj.binary.html] url = "http://www.medjugorje.hr/hr"; filename = @"c:\temp\medj.binary.html"; http.DownloadFile(url, filename, out statusCode); Assert.IsTrue(http.IsOK(statusCode)); //Downloading binary, url = [http://www.jpostlite.co.il] //Response CharacterSet [ISO-8859-1] //Response header [Content-Type]=[text/html] //Downloaded binary, read [7339] bytes //Downloaded binary, writing [7339] bytes to [c:\temp\jerupost.binary.html] url = "http://www.jpostlite.co.il"; filename = @"c:\temp\jerupost.binary.html"; http.DownloadFile(url, filename, out statusCode); Assert.IsTrue(http.IsOK(statusCode)); //Downloading binary, url = [http://welcome.hp.com/country/jp/ja/cs/home.html] //Response CharacterSet [ISO-8859-1] //Response ContentLength [101683] //Response header [Content-Length]=[101683] //Response header [Content-Type]=[text/html] //Downloaded binary, read [101683] bytes //Downloaded binary, writing [101683] bytes to [c:\temp\hpjap.binary.html] url = "http://welcome.hp.com/country/jp/ja/cs/home.html"; filename = @"c:\temp\hpjap.binary.html"; http.DownloadFile(url, filename, out statusCode); Assert.IsTrue(http.IsOK(statusCode)); }
public void InternetDownloadTextFiles2() { MyHttpGet http = new MyHttpGet(); HttpStatusCode statusCode; var theRegex = @"[a-zA-Z0-9\<\>/="";:.\(\)\[\]\$\-!,_{}'\#\+\&\*\?\|\\\%]"; string html; string url; url = "http://www.cnn.co.jp/"; html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 100)); Console.WriteLine("################################################################################################################"); url = "http://arabic.cnn.com/"; html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 100)); Console.WriteLine("################################################################################################################"); url = "http://www8.hp.com/th/en/hp-news/article_detail.html?compURI=tcm:120-479498-16"; html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 100)); Console.WriteLine("################################################################################################################"); url = "http://te.wikipedia.org/wiki/%E0%B0%87%E0%B0%9C%E0%B1%8D%E0%B0%B0%E0%B0%BE%E0%B0%AF%E0%B0%BF%E0%B0%B2%E0%B1%8D"; url = Uri.UnescapeDataString(url); html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Trace.WriteLine(html); Console.WriteLine("################################################################################################################"); url = "http://www.google.cn"; html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html); //.Substring( 0, 100 ) ) ; Console.WriteLine("################################################################################################################"); url = "http://www.thehinditimes.com/BusinessNews.aspx?Region=Canada"; html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 100)); Console.WriteLine("################################################################################################################"); }
public void InternetDownloadTextFiles1() { MyHttpGet http = new MyHttpGet(); HttpStatusCode statusCode; string html; string url; var theRegex = @"[a-zA-Z0-9\<\>/="";:.\(\)\[\]\$\-!,_{}'\#\+\&\*\?\|\\\%]"; url = "http://cn.yahoo.com"; html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); //html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 100)); Console.WriteLine("################################################################################################################"); url = "http://www.bbc.co.uk"; html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); //html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 100)); Console.WriteLine("################################################################################################################"); url = "http://www.microsoft.com"; html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); //html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 100)); Console.WriteLine("################################################################################################################"); url = "http://www.medjugorje.hr/hr"; html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); //html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 200)); Console.WriteLine("################################################################################################################"); url = "http://www.jpostlite.co.il"; html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 100)); Console.WriteLine("################################################################################################################"); url = "http://club.japantimes.co.jp/jt"; // japanese html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 100)); Console.WriteLine("################################################################################################################"); url = "http://welcome.hp.com/country/jp/ja/cs/home.html"; html = http.DownloadTextFile(url, out statusCode); html = Regex.Replace(html, @"\s", ""); html = Regex.Replace(html, theRegex, ""); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(url); Console.WriteLine("----------------------------------------------------------------------------------------------------------------"); Console.WriteLine(html.Substring(0, 100)); Console.WriteLine("################################################################################################################"); }