public bool Save(XUrl url) { List <SqlParameter> paramList = new List <SqlParameter>(); paramList.Add(new SqlParameter("@Id", url.Id)); if (!string.IsNullOrEmpty(url.Name)) { paramList.Add(new SqlParameter("@Name", url.Name)); } else { paramList.Add(new SqlParameter("@Name", null)); } if (!string.IsNullOrEmpty(url.Description)) { paramList.Add(new SqlParameter("@Description", url.Description)); } else { paramList.Add(new SqlParameter("@Description", null)); } if (!string.IsNullOrEmpty(url.Url)) { paramList.Add(new SqlParameter("@URL", url.Url)); } else { paramList.Add(new SqlParameter("@URL", null)); } paramList.Add(new SqlParameter("@Created", url.Created)); paramList.Add(new SqlParameter("@CreatedBy", url.CreatedBy)); if (url.Deleted.HasValue) { paramList.Add(new SqlParameter("@Deleted", url.Deleted.Value)); } else { paramList.Add(new SqlParameter("@Deleted", null)); } if (url.DeletedBy.HasValue) { paramList.Add(new SqlParameter("@DeletedBy", url.DeletedBy.Value)); } else { paramList.Add(new SqlParameter("@DeletedBy", null)); } return(base.ExecuteSql("spr_URL_Save", paramList)); }
public XUrl Get(Guid id) { StringBuilder sql = new StringBuilder(); sql.AppendLine("SELECT [Name], [Description], [URL], [Created], [CreatedBy], [Deleted], [DeletedBy]"); sql.AppendLine("FROM [URLs] WITH (NoLock) WHERE [Id] = @Id"); List <SqlParameter> paramList = new List <SqlParameter>(); paramList.Add(new SqlParameter("@Id", id)); XUrl url = null; using (SqlDataReader rdr = base.OpenDataReaderInLine(sql.ToString(), paramList)) { if ((rdr != null) && (rdr.HasRows)) { url = new XUrl(); rdr.Read(); url.Id = id; if (!rdr.IsDBNull(0)) { url.Name = rdr.GetString(0); } if (!rdr.IsDBNull(1)) { url.Description = rdr.GetString(1); } if (!rdr.IsDBNull(2)) { url.Url = rdr.GetString(2); } url.Created = rdr.GetDateTime(3); url.CreatedBy = rdr.GetGuid(4); if (!rdr.IsDBNull(5)) { url.Deleted = rdr.GetDateTime(5); } if (!rdr.IsDBNull(6)) { url.DeletedBy = rdr.GetGuid(6); } url.IsNew = false; url.IsDirty = false; } } return(url); }
private void Validate(XUrl url) { if (url.Id.CompareTo(new Guid()) == 0) { throw new LogicalException("URL must have an Id"); } if (string.IsNullOrEmpty(url.Url)) { throw new LogicalException("URL must have a URI for the URL property"); } Uri myUri; if (!Uri.TryCreate(url.Url, UriKind.RelativeOrAbsolute, out myUri)) { throw new LogicalException("URL must have a valid URI for the URL property"); } //// http://stackoverflow.com/questions/3228984/a-better-way-to-validate-url-in-c-sharp-than-try-catch //string regular = @"^(ht|f|sf)tp(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\'\/\\\+&%\$#_]*)?$"; //string regular123 = @"^(www.)[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\'\/\\\+&%\$#_]*)?$"; //if (Regex.IsMatch(url.Url, regular)) //{ // // ok //} //else if (Regex.IsMatch(url.Url, regular123)) //{ // // ok //} //else //{ // throw new LogicalException("URL must have a valid URI for the URL property"); //} }
static object TaskLockCore = new object(); //线程锁 #region 抓取线程 /// <summary> /// 抓取线程 /// </summary> /// <param name="Taskaisle">线程id</param> public static void SpiderCore(int Taskaisle) { try { if (echo) { Console.WriteLine("访问:" + ReadUrl[Taskaisle]); } WebDriver[Taskaisle].Manage().Timeouts().ImplicitlyWait(TimeSpan.FromSeconds(3)); WebDriver[Taskaisle].Navigate().GoToUrl(ReadUrl[Taskaisle]); /* XUrl DoneUrl = new XUrl(); * DoneUrl.url = ReadUrl[Taskaisle]; * DoneUrl.Tile = WebDriver[Taskaisle].Title; * Console.WriteLine("添加链接:"+ ReadUrl[Taskaisle]+ "标题:"+ WebDriver[Taskaisle].Title); * ALLUrl.Add(DoneUrl);*/ HtmlDocument page = new HtmlDocument(); page.LoadHtml(WebDriver[Taskaisle].PageSource); HtmlNodeCollection hrefList = page.DocumentNode.SelectNodes(".//a[@href]"); int hrefList_Count = 0; if (hrefList != null) { hrefList_Count = hrefList.Count; } for (int i2 = 0; i2 != hrefList_Count; i2++)//循环遍历抓取到的链接组 { HtmlNode href = hrefList[i2]; HtmlAttribute att = href.Attributes["href"]; bool IsNotOld = true; string HTTPUri = att.Value; //Console.WriteLine(HTTPUri.Length+ HTTPUri); //替换非http开头的路径链接开头,并扔掉一些没用的,格式错误的链接 if (HTTPUri.Length < 2) { HTTPUri = ""; } else if (HTTPUri.IndexOf("http") == -1 && HTTPUri.Substring(0, 2) == @"//") { HTTPUri = HTTPUri.Replace("//", "http://"); } else if (HTTPUri.IndexOf("http") == -1 && HTTPUri.Substring(0, 2) == @"./") { HTTPUri = HTTPUri.Replace("./", ReadUrl[Taskaisle]); } else if (HTTPUri.IndexOf("http") == -1 && HTTPUri.Substring(0, 1) == @"/") { HTTPUri = ReadUrl[Taskaisle] + HTTPUri.Substring(1, HTTPUri.Length - 1); } else if (HTTPUri.IndexOf("http") == -1) { HTTPUri = ""; } //查看是否重复抓取链接 for (int I_repeat = 0; I_repeat != ALLUrl.Count; I_repeat++) { if (ALLUrl[I_repeat].url == HTTPUri) { IsNotOld = false; } } for (int I_repeat = 0; I_repeat != ReadUrl.Count; I_repeat++) { if (ReadUrl[I_repeat] == HTTPUri) { IsNotOld = false; } } if (HTTPUri != "" & IsNotOld & HTTPUri.ToCharArray().Length <= 250) { //Console.WriteLine(HTTPUri.ToCharArray().Length); //标题获取线程 Thread geturl = new Thread(() => { string geturlstring = HTTPUri; try { HtmlAgilityPack.HtmlWeb get = new HtmlWeb(); HtmlDocument tdoc = get.Load(geturlstring); XUrl DoneUrl = new XUrl(); DoneUrl.url = geturlstring; if (tdoc != null) { if (tdoc.DocumentNode.SelectSingleNode("//title").InnerText != null)//获取标题 { DoneUrl.Tile = tdoc.DocumentNode.SelectSingleNode("//title").InnerText; } else { DoneUrl.Tile = geturlstring; } if (DoneUrl.Tile != "" & DoneUrl.Tile.IndexOf("404") == -1 & DoneUrl.Tile.IndexOf("NOT FOUND") == -1 & DoneUrl.Tile.IndexOf("not found") == -1 & DoneUrl.Tile.IndexOf("Not Found") == -1 & DoneUrl.Tile.IndexOf("¤") == -1 & DoneUrl.Tile.IndexOf("¢") == -1)//防止部分标题乱码和无法访问的网页(需要改进 { //把抓到的链接添加进去 ALLUrl.Add(DoneUrl); ReadUrl.Add(geturlstring); if (echo) { Console.WriteLine("添加链接:" + geturlstring + "标题:" + DoneUrl.Tile); } } } } catch (Exception ex) { if (echo) { Console.WriteLine(ex.Message); } } }); geturl.Start();//启动线程 float nowcpu = cpuCounter.NextValue(); if (ReadUrl.Count <= aisle) { while (geturl.ThreadState == System.Threading.ThreadState.Running) { } } else if (nowcpu > cpumax && opti) { if (echo) { Console.WriteLine("CPU总占用" + nowcpu + "超过设定值,开始限速"); } Thread.Sleep(1000); if (geturl.ThreadState == System.Threading.ThreadState.Running) { geturl.Interrupt(); Console.WriteLine("线程超时"); } else { Debug.WriteLine(geturl.ThreadState); } } /* else * { * Thread threadover = new Thread(() => * { * Thread.Sleep(1000); * if (geturl.ThreadState == System.Threading.ThreadState.Running) * { * geturl.Abort(); * if (echo) * { * Console.WriteLine("线程超时"); * } * } * else * { * // Debug.WriteLine("线程不超速"); * } * }); * threadover.Start(); * }*/ //CPU去世器↑已弃用 } } ReadUrl[Taskaisle] = null; } catch (Exception ex) { Console.WriteLine(ex); } lock (TaskLockCore) { TaskDone++; } if (echo) { Console.WriteLine("访问完成"); } }
public bool Save(XUrl url) { this.Validate(url); return(this._dal.Save(url)); }