public static string GetReadings(DateTime dateTime) { string date = ""; date += dateTime.ToString("MM:dd:yy"); date = date.Replace(":", ""); WebClient w = new WebClient(); string s = w.DownloadString("http://www.usccb.org/bible/readings/" + date + ".cfm"); var htmldoc = new HtmlDocument(); var encoding = htmldoc.DetectEncodingHtml(s); htmldoc.LoadHtml(s); if (htmldoc.ParseErrors != null && htmldoc.ParseErrors.Any()) { //handle errors } if (htmldoc.DocumentNode != null) { HtmlNode bodyNode = htmldoc.DocumentNode.SelectSingleNode("//div[@id='CS_Element_maincontent']"); string returnString = bodyNode.WriteTo(); returnString = returnString.Replace("“", "\""); returnString = returnString.Replace("â€", "\""); return returnString; } return null; }
public List<BindData> Retrive(string Data) { List<BindData> list = new List<BindData>(); try { XDocument doc = XDocument.Parse(Data); foreach (XElement ele in doc.Descendants("item")) { BindData d = new BindData(); d.Tag = ele.Element("link").Value; d.Content = ele.Element("title").Value; string destocheck = ele.Element("description").Value; HtmlDocument HTdoc = new HtmlDocument(); HTdoc.LoadHtml(destocheck); HTdoc.DetectEncodingHtml(destocheck); d.Description = HttpUtility.HtmlDecode(HTdoc.DocumentNode.InnerText); list.Add(d); } return list; } catch (Exception c) { list=null; return list; } }
public List<DataToBind> Retrive(string Data) { List<DataToBind> list = new List<DataToBind>(); try { XDocument doc = XDocument.Parse(Data); foreach (XElement ele in doc.Descendants("item")) { DataToBind d = new DataToBind(); d.Tag = ele.Element("link").Value; d.Content = ele.Element("title").Value; d.Image = "/EasyPin;component/Images/Loading____Please_Wait.png"; string destocheck = ele.Element("description").Value; HtmlDocument HTdoc = new HtmlDocument(); HTdoc.LoadHtml(destocheck); HTdoc.DetectEncodingHtml(destocheck); if (destocheck.Contains("</img>")) { try { HtmlAttribute att = HTdoc.DocumentNode.Element("//img").Attributes["src"]; d.Image = att.Value; } catch { d.Image = "/EasyPin;component/Images/Empty.png"; } } else { d.Image = "/EasyPin;component/Images/Empty.png"; } d.Pubdate = ele.Element("pubDate").Value; d.Description = HttpUtility.HtmlDecode(HTdoc.DocumentNode.InnerText); list.Add(d); } return list; } catch (Exception c) { list=null; return list; } }
/// <summary> /// Attempts to fetch and load a HtmlDocument for a given URL. /// Also determines the MIME-type for the stream and computes a hash if needed. /// </summary> /// <param name="url">URL to be loaded.</param> /// <param name="timeout">Timeout for HttpWebRequest in seconds.</param> public static LunchRestaurantDocument GetLunchRestaurantDocumentForUrl(string url, int timeout) { var document = new LunchRestaurantDocument { URL = url }; var htmlDoc = new HtmlDocument(); var allowedmimetypes = new[] { "text/html", "text/xml" }; const int buffsize = 1024; try { var request = (HttpWebRequest)WebRequest.Create(GetUri(url)); request.Timeout = timeout * 1000; using (var response = (HttpWebResponse)request.GetResponse()) { var headerEncoding = TryGetEncoding(response.ContentEncoding) ?? TryGetEncoding(response.CharacterSet) ?? Encoding.UTF8; var buf = new byte[buffsize]; var ms = new MemoryStream(); var responseStream = response.GetResponseStream(); if (responseStream == null) { return null; } var count = responseStream.Read(buf, 0, buffsize); document.MimeType = MimeDetector.DetermineMIMEType(buf); if (Array.Exists(allowedmimetypes, mimetype => mimetype.Equals(document.MimeType))) { do ms.Write(buf, 0, count); while ((count = responseStream.Read(buf, 0, buffsize)) != 0); var bytes = ms.GetBuffer(); var docEncoding = htmlDoc.DetectEncodingHtml(headerEncoding.GetString(bytes)); var convertedBytes = Encoding.Convert(docEncoding ?? headerEncoding, Encoding.Unicode, bytes); var convertedData = Encoding.Unicode.GetString(convertedBytes); htmlDoc.LoadHtml(convertedData); } else { _logger.Info("Discarded invalid mimetype '{0}' for URL: {1}", document.MimeType, url); } } } catch { return null; } if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0) { // TODO: handle any parse errors } if (htmlDoc.DocumentNode != null) { document.HtmlDocument = htmlDoc; // let's also compute a hash for the document document.Hash = ComputeHashForDocument(htmlDoc, url); } return document; }
public void ExecuteDemo2(string sitesource, DataGridView khistoryResult) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(sitesource); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); //초반 부분에서 charset확인 int len = 1024; byte[] barr = new byte[len]; Stream stream = response.GetResponseStream(); int read_size = stream.Read(barr, 0, (int)len); string data = Encoding.UTF8.GetString(barr); var encod = doc.DetectEncodingHtml(data); if (encod == null) { encod = Encoding.UTF8; //default } //나머지 읽기 int total_size = read_size; while (read_size > 0) { byte[] barr2 = new byte[total_size * 2]; for (int a = 0; a < total_size; a++) { barr2[a] = barr[a]; //복사 } read_size = stream.Read(barr, 0, total_size); for (int a = 0; a < read_size; a++) { barr2[total_size + a] = barr[a]; //복사 } total_size += read_size; barr = barr2; } string convstr = Encoding.Unicode.GetString(Encoding.Convert(encod, Encoding.Unicode, barr)); doc.LoadHtml(convstr); response.Close(); int cnt = 0; int cnt_limit = 20; foreach (HtmlNode body in doc.DocumentNode.SelectNodes("//body")) { if (cnt == cnt_limit) { break; } foreach (HtmlNode table in body.SelectNodes("//table[@summary = '6개등급(1급~6급)']")) { if (cnt == cnt_limit) { break; } int colCnt = 0; foreach (HtmlNode th in table.SelectNodes("//th")) { if (colCnt == 4) { break; } khistoryResult.Columns[colCnt++].HeaderText = th.InnerText; } colCnt = 0; List <string> rowList = new List <string>(); foreach (HtmlNode td in table.SelectNodes("//tbody//td")) { if (cnt == cnt_limit) { break; } //Console.WriteLine("Row: " + td.InnerText + ", Rows: " + khistoryResult.Rows.Count); if (colCnt < 4) { rowList.Add(td.InnerText); colCnt++; if (colCnt == 4) { khistoryResult.Rows.Add(rowList.ToArray()); rowList.Clear(); colCnt = 0; } } cnt++; } } } }
public void ExecuteDemo(string sitesource, DataGridView toeicResult) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(sitesource); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); //초반 부분에서 charset확인 int len = 1024; byte[] barr = new byte[len]; Stream stream = response.GetResponseStream(); int read_size = stream.Read(barr, 0, (int)len); string data = Encoding.UTF8.GetString(barr); var encod = doc.DetectEncodingHtml(data); if (encod == null) { encod = Encoding.UTF8; //default } //나머지 읽기 int total_size = read_size; while (read_size > 0) { byte[] barr2 = new byte[total_size * 2]; for (int a = 0; a < total_size; a++) { barr2[a] = barr[a]; //복사 } read_size = stream.Read(barr, 0, total_size); for (int a = 0; a < read_size; a++) { barr2[total_size + a] = barr[a]; //복사 } total_size += read_size; barr = barr2; } string convstr = Encoding.Unicode.GetString(Encoding.Convert(encod, Encoding.Unicode, barr)); doc.LoadHtml(convstr); response.Close(); foreach (HtmlNode body in doc.DocumentNode.SelectNodes("//body")) { foreach (HtmlNode table in body.SelectNodes("//table[@class = 'table_info_print']")) { int colCnt = 0; foreach (HtmlNode th in table.SelectNodes("//th[@class = 'str']")) { if (colCnt == 4) { break; } toeicResult.Columns[colCnt++].HeaderText = th.InnerText; } colCnt = 0; List <string> rowList = new List <string>(); foreach (HtmlNode td in table.SelectNodes("//td[@class = 'str']")) { string filtered1 = Regex.Replace(td.InnerText, "[A-Za-z;]", ""); string filtered2 = Regex.Replace(filtered1, "[&]", " "); filtered2 = filtered2.Replace("\t", ""); filtered2 = filtered2.Replace("★ ", ""); //Console.WriteLine("Row: " + filtered2 + ", Rows: " + toeicResult.Rows.Count); if (colCnt < 4) { rowList.Add(filtered2); colCnt++; if (colCnt == 4) { toeicResult.Rows.Add(rowList.ToArray()); rowList.Clear(); colCnt = 0; } } } } } }