private string infofromdb(GetSet getset) { string id = getset.id.ToString(); string website = getset.url; string httpstatuscode = getset.httpstatus; string statusdesc = getset.statusdes; string statuscode = getset.status; string titlestr = getset.title; string descstr = getset.desc; string hrefTags = getset.hrefTag; string imgTags = getset.imgTag; string h1 = getset.h; string timetaken = getset.time; StringBuilder SB = new StringBuilder(); SB.AppendFormat("URL: {0}\r\nStatus:{1}\r\nDescription status:{2}\r\nStatus code:{3}\r\nTitle:{4}\r\nDescription:{5}\r\nA-HREF:\r\n{6}Img tag:\r\n{7}H1:{8}\r\nTime:{9}\r\nID:{10}\r\n", website, httpstatuscode, statusdesc, statuscode, titlestr, descstr, hrefTags, imgTags, h1, timetaken, id); string answer = SB.ToString(); return(answer); }
public async Task <string> runparsing(string website) { string titlestr = String.Empty; string descstr = String.Empty; string ahref = String.Empty; string src = String.Empty; string h1 = String.Empty; string httpstatuscode = String.Empty; string statusdesc = String.Empty; string statuscode = String.Empty; string answer = String.Empty; try { var date = DateTime.Now; timer.Start(); if (!(website.StartsWith("http") || website.StartsWith("https"))) { MessageBox.Show("no correct link (without http or https)"); } request = (HttpWebRequest)WebRequest.Create(website); resp = (HttpWebResponse)request.GetResponse(); if (request.HaveResponse) { timer.Stop(); timetaken = timer.Elapsed; var response = await http.GetByteArrayAsync(website); source = Encoding.GetEncoding("utf-8").GetString(response, 0, response.Length - 1); source = WebUtility.HtmlDecode(source); Document.LoadHtml(source); httpstatuscode = (resp.StatusCode == HttpStatusCode.OK).ToString(); statusdesc = resp.StatusDescription; statuscode = ((int)resp.StatusCode).ToString(); var nhref = Document.DocumentNode.SelectNodes("//a"); if (nhref != null) { foreach (var tag in nhref) { if (tag.Attributes["href"] != null) { if (tag.Attributes["href"].Value != "#") { hrefTags.Add(tag.Attributes["href"].Value); ahref = tag.Attributes["href"].Value; } } } } var nimg = Document.DocumentNode.SelectNodes("//img"); if (nimg != null) { foreach (var tag in nimg) { if (tag.Attributes["src"] != null) { if (!(tag.Attributes["src"].Value.Contains("data:"))) { imgTags.Add(tag.Attributes["src"].Value); src = tag.Attributes["src"].Value; } } } } var nh1 = Document.DocumentNode.SelectNodes("//h1"); if (nh1 != null) { foreach (var tag in nh1) { h1 += tag.InnerText + "\n"; } } var ndesc = Document.DocumentNode.SelectNodes("//meta"); if (ndesc != null) { foreach (var tag in ndesc) { if (tag.Attributes["name"] != null && tag.Attributes["name"].Value == "description") { descstr = tag.Attributes["content"].Value; } } } var ntitle = Document.DocumentNode.SelectNodes("//title"); if (ntitle != null) { titlestr = ntitle["title"].InnerText; } StringBuilder SB = new StringBuilder(); SB.AppendFormat("URL: {0}\r\nStatus:{1}\r\nDescription status:{2}\r\nStatus code:{3}\r\nTitle:{4}\r\nDescription:{5}\r\nA-HREF:\r\n{6}Img tag:\r\n{7}H1:{8}\r\n", website, httpstatuscode, statusdesc, statuscode, titlestr, descstr, control.Listtostring(hrefTags), control.Listtostring(imgTags), h1); answer = SB.ToString(); var info = new GetSet { url = website, httpstatus = httpstatuscode, statusdes = statusdesc, status = statuscode, title = titlestr, desc = descstr, hrefTag = control.Listtostring(hrefTags), imgTag = control.Listtostring(imgTags), h = h1, time = timetaken.ToString() }; rep.SaveInfo(info); } hrefTags.Clear(); imgTags.Clear(); resp.Close(); request.Abort(); } catch (Exception ex) { answer = ex.ToString(); } return(answer + "~" + timetaken); }