public async static Task <List <TagImg> > GetImgFromHtml(string html, bool isHotspot = false) { Task <List <TagImg> > task = Task.Run(() => { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); var imgList = doc.DocumentNode.SelectNodes("//img"); var w = 0; var h = 0; HtmlAgilityPack.HtmlAttribute tempAttribute = null; List <TagImg> list = new List <TagImg>(); foreach (var item in imgList) { TagImg tagImg = new TagImg(); tempAttribute = item.Attributes["alt"]; tagImg.Alt = tempAttribute == null ? "":tempAttribute.Value; tempAttribute = item.Attributes["src"]; tagImg.Src = tempAttribute == null ? "" : tempAttribute.Value; tempAttribute = item.Attributes["h"]; if (tempAttribute != null) { int.TryParse(tempAttribute.Value, out h); } tempAttribute = item.Attributes["w"]; if (tempAttribute != null) { int.TryParse(tempAttribute.Value, out w); } //Search Detail /* * <a class="iusc" style="height:208px;width:333px" m="{"cid":"1jz2ZvDM","purl":"https://www.927tour.com/News_newsDetail_id_20180408195735146766.html","murl":"http://ynwgm.ynurl.cn/uploadfile/s10/2018/0408/20180408075500850.jpg","turl":"https://tse1-mm.cn.bing.net/th?id=OIP.1jz2ZvDMIyhtns4hK1ay-AHaFJ&pid=15.1","md5":"d63cf666f0cc23286d9ece212b56b2f8","shkey":"","t":"铁路、民航保障游客正常出游","mid":"1034F8C523DE0FCD1B8302CF3C0D52E2DA5E1CD3","desc":""}" onclick="sj_evt.fire('IFrame.Navigate', this.href); return false;" href="/images/search?view=detailV2&ccid=1jz2ZvDM&id=1034F8C523DE0FCD1B8302CF3C0D52E2DA5E1CD3&thid=OIP.1jz2ZvDMIyhtns4hK1ay-AHaFJ&mediaurl=http%3a%2f%2fynwgm.ynurl.cn%2fuploadfile%2fs10%2f2018%2f0408%2f20180408075500850.jpg&exph=407&expw=585&q=%e6%b8%85%e6%98%8e%e5%81%87%e6%9c%9f%e5%9b%bd%e5%86%85%e6%97%85%e6%b8%b8%e6%8e%a5%e5%be%85%e6%80%bb%e4%ba%ba%e6%95%b01.12%e4%ba%bf&simid=608053044385353052&selectedIndex=32&qft=+filterui%3aphoto-photo" h="ID=images.5601_7,5217.1"> * <div class="img_cont hoff"> * <img class="mimg" style="background-color:#c10a34;color:#c10a34" height="208" width="299" src="https://tse3-mm.cn.bing.net/th?id=OIP.1jz2ZvDMIyhtns4hK1ay-AHaFJ&w=299&h=208&c=7&o=5&pid=1.7" alt="清明假期国内旅游接待总人数1.12亿 的图像结果" /> * </div> * </a> */ /*< a class="iusc" style="height:207px;width:276px" m="{"cid":"Ox2V7JRH","purl":"http://www.wall001.com/nature/under_sky/html/image8.html","murl":"http://wall001.com/nature/under_sky/mxxx01/[wall001.com]_sky_AP23070.jpg","turl":"https://tse2-mm.cn.bing.net/th?id=OIP.Ox2V7JRHXMInhT3_WlPpVgHaFj&pid=15.1","md5":"3b1d95ec94475cc227853dff5a53e956","shkey":"","t":"桌布天堂 --- 晴朗天空 - 藍天白云8","mid":"8A372FC995FECC38853858A07F4171C439B8FA58","desc":""}" onclick="sj_evt.fire('IFrame.Navigate', this.href); return false;" href="/images/search?view=detailV2&ccid=Ox2V7JRH&id=8A372FC995FECC38853858A07F4171C439B8FA58&thid=OIP.Ox2V7JRHXMInhT3_WlPpVgHaFj&mediaurl=http%3a%2f%2fwall001.com%2fnature%2funder_sky%2fmxxx01%2f%5bwall001.com%5d_sky_AP23070.jpg&exph=768&expw=1024&q=%e5%a4%a9%e7%a9%ba&simid=608010515721882861&selectedIndex=5&qft=+filterui%3aphoto-photo" h="ID=images.5601_7,5055.1"><div class="img_cont hoff"><img class="mimg" style="background-color:#1543b6;color:#1543b6" height="207" width="276" src="https://tse4-mm.cn.bing.net/th?id=OIP.Ox2V7JRHXMInhT3_WlPpVgHaFj&w=276&h=207&c=7&o=5&pid=1.7" alt="天空 的图像结果"></div></a>*/ Tuple <bool, string> extractResult = RegexUtil.ExtractBingImage(item.ParentNode.ParentNode.OuterHtml); if (extractResult.Item1 == true || isHotspot == true) { tagImg.DetailUrl = extractResult.Item2; tagImg.Width = w; tagImg.Height = h; list.Add(tagImg); } } return(list); }); return(await task); }
public static string GetSrc(this TagImg tag) { var attr = tag.Attribute("src"); if (attr == null) { return(string.Empty); } return(attr.Value); }
public static TagImg Main(string name) { var size = Urls.GetSize(Urls.MainSys(name)); var tag = new TagImg().Src(Urls.Main(name)).Alt(""); if (size.HasValue) { tag.Size(size.Value.Width, size.Value.Height); } return(tag); }
private string GetCertView(TagImg certImg, decimal sigId, bool eng = false, bool vendor = false, bool ru = false) { var view = H.div[ certImg.Style("margin:10px 0;"), H.br, Url.Graduate().DownloadCert(sigId, eng, vendor, ru, "Скачать").Class("ui-button"), ru&& !vendor ? H.div[H.h3["Поделитесь с друзьями!"], Htmls.AddThis(Html)] : null ]; return(view.ToString()); }
public static TagImg lang(this TagImg tag, LangCode value) { tag.Lang = value; return(tag); }
public static TagImg style(this TagImg tag, string value) { tag.Style = value; return(tag); }
public static TagImg name(this TagImg tag, string value) { tag.Name = value; return(tag); }
public static TagImg ismap(this TagImg tag, IsMap value) { tag.IsMap = value; return(tag); }
public static TagImg width(this TagImg tag, Length value) { tag.Width = value; return(tag); }
public static TagImg longdesc(this TagImg tag, string value) { tag.LongDesc = value; return(tag); }
public static TagImg ondblclick(this TagImg tag, string value) { tag.OnDblClick = value; return(tag); }
public static TagImg dir(this TagImg tag, Dir value) { tag.Dir = value; return(tag); }
public static TagImg Size(this TagImg tag, object width, object height) { return(tag.Width(width).Height(height)); }
public string WebChecker(string url) { string http = "http://"; string https = "https://"; string lblResult = String.Empty; string lblStatusDescription = String.Empty; string lblCause = String.Empty; string innerTextOfTitle = String.Empty; string contentOfTagMeta = String.Empty; string tagA = String.Empty; string tagImg = String.Empty; string innerTextOfH1 = String.Empty; string all = String.Empty; DateTime date = DateTime.Now; Urls urllink = new Urls(); List <TagA> tagAList = new List <TagA>(); List <TagImg> tagImgList = new List <TagImg>(); List <TagH1> tagH1List = new List <TagH1>(); try { //------проверяем есть ли ссылка------------- if (string.IsNullOrEmpty(url)) { MessageBox.Show(@"Вы не вввели ссылку !"); return(null); } //---делаем проверку на наличие протокола "http://" в ссылке запроса if (!(url.StartsWith(http) || url.StartsWith(https))) { url = url.Insert(0, http); } //------------------------------------------------------------------ // Создаём объект запроса HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); // Получаем ответ с сервера, если запрашиваемый URL не действителен, // переходим к блоку catch, иначе идем дальше. HttpWebResponse response = (HttpWebResponse)request.GetResponse(); if (request.HaveResponse) { lblResult = (response.StatusCode == HttpStatusCode.OK) ? "Сайт доступен" : "Сайт не доступен"; lblStatusDescription = response.StatusDescription; lblCause = ((int)response.StatusCode).ToString(); string respStream = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("utf-8")).ReadToEnd(); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(respStream); //------ получаем значение тэга Title ------------- var nodeTitle = htmlDoc.DocumentNode.SelectNodes("//title"); if (nodeTitle != null) { innerTextOfTitle = nodeTitle["title"].InnerText; } //-------------------------------------------------- //------ получаем значение атрибута content тэга meta у которого //------ значение атрибута name является description var nodeMeta = htmlDoc.DocumentNode.SelectNodes("//meta"); if (nodeMeta != null) { foreach (var tag in nodeMeta) { if (tag.Attributes["name"] != null && tag.Attributes["name"].Value == "description") { contentOfTagMeta = tag.Attributes["content"].Value; } } } //--------------------------------------------------------------- //------ получаем значение атрибута href у тэга A ------------- var nodesA = htmlDoc.DocumentNode.SelectNodes("//a"); if (nodesA != null) { foreach (var tag in nodesA) { if (tag.Attributes["href"] != null) { var link = tag.Attributes["href"].Value; tagA += link + "\n"; TagA insTagA = new TagA { UrlId = urllink.Id, Href = link }; tagAList.Add(insTagA); } } } //-------------------------------------------------------------- //------ получаем значение атрибута src у тэга img ------------- var nodesImg = htmlDoc.DocumentNode.SelectNodes("//img"); if (nodesImg != null) { foreach (var tag in nodesImg) { if (tag.Attributes["src"] != null) { var src = tag.Attributes["src"].Value; tagImg += src + "\n"; TagImg tagImgIns = new TagImg { UrlId = urllink.Id, Src = src }; tagImgList.Add(tagImgIns); } } } //--------------------------------------------------------------- //------ получаем значение атрибута src у тэга img ------------- var nodesH1 = htmlDoc.DocumentNode.SelectNodes("//h1"); if (nodesH1 != null) { foreach (var tag in nodesH1) { innerTextOfH1 += tag.InnerText + "\n"; int index = nodesH1[tag]; TagH1 tagH1Ins = new TagH1 { UrlId = urllink.Id, H1Text = tag.InnerText }; tagH1List.Add(tagH1Ins); } } //--------------------------------------------------------------- StringBuilder newContent = new StringBuilder(); newContent.AppendLine("URL: " + url + "\nДата проверки: " + date + "\nСтатус: " + lblResult + "\nОписание статуса: " + lblStatusDescription + "\nКод статуса: " + lblCause + "\nTitle: " + innerTextOfTitle + "\nContent: " + contentOfTagMeta + "\n" + "\nТэги A: \n" + tagA + "\n" + "\nТэги Img: \n" + tagImg + "\nТэги H1: \n" + innerTextOfH1 + "\n"); all = newContent.ToString(); urllink.Url = url; urllink.Title = innerTextOfTitle; urllink.MetaContent = contentOfTagMeta; urllink.Status = lblResult; urllink.StatusDescription = lblStatusDescription; urllink.StatusCode = lblCause; urllink.DateOfParsing = date; using (var db = new ParsingResultsEntities()) { db.Urls.Add(urllink); db.TagH1.AddRange(tagH1List); db.TagA.AddRange(tagAList); db.TagImg.AddRange(tagImgList); db.SaveChanges(); } } response.Close(); request.Abort(); return(all); //возвращаем полученные данные в интерфейс приложения } catch (WebException ex) { lblResult = @"Сайт не доступен"; lblStatusDescription = ex.Message; //---делаем проверку является ли код ошибки HttpStatusCode или WebExceptionStatus if (((HttpWebResponse)ex.Response) != null) { lblCause = ((int)(((HttpWebResponse)ex.Response).StatusCode)).ToString(); } else { lblCause = ((int)(ex.Status)).ToString(); } //------------------------------------------------------------------------------- StringBuilder newContent = new StringBuilder(); newContent.AppendLine("URL: " + url + "\nДата проверки: " + date + "\nСтатус: " + lblResult + "\nОписание статуса: " + lblStatusDescription + "\nКод статуса: " + lblCause + "\n"); all = newContent.ToString(); using (var db = new ParsingResultsEntities()) { db.Urls.Add(urllink); db.SaveChanges(); } return(all); } }
public static TagImg src(this TagImg tag, string value) { tag.Src = value; return(tag); }
public static TagImg onmousedown(this TagImg tag, string value) { tag.OnMouseDown = value; return(tag); }
public static TagImg alt(this TagImg tag, string value) { tag.Alt = value; return(tag); }
public static TagImg onmouseup(this TagImg tag, string value) { tag.OnMouseUp = value; return(tag); }
public static TagImg height(this TagImg tag, Length value) { tag.Height = value; return(tag); }
public static TagImg onmousemove(this TagImg tag, string value) { tag.OnMouseMove = value; return(tag); }
public static TagImg usemap(this TagImg tag, string value) { tag.UseMap = value; return(tag); }
public static TagImg onmouseout(this TagImg tag, string value) { tag.OnMouseOut = value; return(tag); }
public static TagImg id(this TagImg tag, string value) { tag.Id = value; return(tag); }
public static TagImg onkeypress(this TagImg tag, string value) { tag.OnKeyPress = value; return(tag); }
public static TagImg @class(this TagImg tag, string value) { tag.Class = value; return(tag); }
public static TagImg onkeydown(this TagImg tag, string value) { tag.OnKeyDown = value; return(tag); }
public static TagImg title(this TagImg tag, string value) { tag.Title = value; return(tag); }
public static TagImg onkeyup(this TagImg tag, string value) { tag.OnKeyUp = value; return(tag); }
public static TagImg xmllang(this TagImg tag, string value) { tag.XmlLang = value; return(tag); }
public static TagImg FloatLeft(this TagImg tag) { return(tag.Class("float_left")); }