Beispiel #1
0
        public bool Save(XUrl url)
        {
            List <SqlParameter> paramList = new List <SqlParameter>();

            paramList.Add(new SqlParameter("@Id", url.Id));

            if (!string.IsNullOrEmpty(url.Name))
            {
                paramList.Add(new SqlParameter("@Name", url.Name));
            }
            else
            {
                paramList.Add(new SqlParameter("@Name", null));
            }

            if (!string.IsNullOrEmpty(url.Description))
            {
                paramList.Add(new SqlParameter("@Description", url.Description));
            }
            else
            {
                paramList.Add(new SqlParameter("@Description", null));
            }

            if (!string.IsNullOrEmpty(url.Url))
            {
                paramList.Add(new SqlParameter("@URL", url.Url));
            }
            else
            {
                paramList.Add(new SqlParameter("@URL", null));
            }

            paramList.Add(new SqlParameter("@Created", url.Created));
            paramList.Add(new SqlParameter("@CreatedBy", url.CreatedBy));

            if (url.Deleted.HasValue)
            {
                paramList.Add(new SqlParameter("@Deleted", url.Deleted.Value));
            }
            else
            {
                paramList.Add(new SqlParameter("@Deleted", null));
            }

            if (url.DeletedBy.HasValue)
            {
                paramList.Add(new SqlParameter("@DeletedBy", url.DeletedBy.Value));
            }
            else
            {
                paramList.Add(new SqlParameter("@DeletedBy", null));
            }

            return(base.ExecuteSql("spr_URL_Save", paramList));
        }
Beispiel #2
0
        public XUrl Get(Guid id)
        {
            StringBuilder sql = new StringBuilder();

            sql.AppendLine("SELECT [Name], [Description], [URL], [Created], [CreatedBy], [Deleted], [DeletedBy]");
            sql.AppendLine("FROM [URLs] WITH (NoLock) WHERE [Id] = @Id");

            List <SqlParameter> paramList = new List <SqlParameter>();

            paramList.Add(new SqlParameter("@Id", id));

            XUrl url = null;

            using (SqlDataReader rdr = base.OpenDataReaderInLine(sql.ToString(), paramList))
            {
                if ((rdr != null) && (rdr.HasRows))
                {
                    url = new XUrl();
                    rdr.Read();
                    url.Id = id;
                    if (!rdr.IsDBNull(0))
                    {
                        url.Name = rdr.GetString(0);
                    }
                    if (!rdr.IsDBNull(1))
                    {
                        url.Description = rdr.GetString(1);
                    }
                    if (!rdr.IsDBNull(2))
                    {
                        url.Url = rdr.GetString(2);
                    }
                    url.Created   = rdr.GetDateTime(3);
                    url.CreatedBy = rdr.GetGuid(4);
                    if (!rdr.IsDBNull(5))
                    {
                        url.Deleted = rdr.GetDateTime(5);
                    }
                    if (!rdr.IsDBNull(6))
                    {
                        url.DeletedBy = rdr.GetGuid(6);
                    }
                    url.IsNew   = false;
                    url.IsDirty = false;
                }
            }

            return(url);
        }
Beispiel #3
0
        private void Validate(XUrl url)
        {
            if (url.Id.CompareTo(new Guid()) == 0)
            {
                throw new LogicalException("URL must have an Id");
            }

            if (string.IsNullOrEmpty(url.Url))
            {
                throw new LogicalException("URL must have a URI for the URL property");
            }

            Uri myUri;

            if (!Uri.TryCreate(url.Url, UriKind.RelativeOrAbsolute, out myUri))
            {
                throw new LogicalException("URL must have a valid URI for the URL property");
            }

            //// http://stackoverflow.com/questions/3228984/a-better-way-to-validate-url-in-c-sharp-than-try-catch
            //string regular = @"^(ht|f|sf)tp(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\'\/\\\+&amp;%\$#_]*)?$";
            //string regular123 = @"^(www.)[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\'\/\\\+&amp;%\$#_]*)?$";

            //if (Regex.IsMatch(url.Url, regular))
            //{
            //    // ok
            //}
            //else if (Regex.IsMatch(url.Url, regular123))
            //{
            //    // ok
            //}
            //else
            //{
            //    throw new LogicalException("URL must have a valid URI for the URL property");
            //}
        }
Beispiel #4
0
        static object TaskLockCore = new object(); //线程锁
        #region 抓取线程
        /// <summary>
        /// 抓取线程
        /// </summary>
        /// <param name="Taskaisle">线程id</param>
        public static void SpiderCore(int Taskaisle)
        {
            try
            {
                if (echo)
                {
                    Console.WriteLine("访问:" + ReadUrl[Taskaisle]);
                }
                WebDriver[Taskaisle].Manage().Timeouts().ImplicitlyWait(TimeSpan.FromSeconds(3));
                WebDriver[Taskaisle].Navigate().GoToUrl(ReadUrl[Taskaisle]);

                /*  XUrl DoneUrl = new XUrl();
                 * DoneUrl.url = ReadUrl[Taskaisle];
                 * DoneUrl.Tile = WebDriver[Taskaisle].Title;
                 * Console.WriteLine("添加链接:"+ ReadUrl[Taskaisle]+ "标题:"+ WebDriver[Taskaisle].Title);
                 * ALLUrl.Add(DoneUrl);*/

                HtmlDocument page = new HtmlDocument();
                page.LoadHtml(WebDriver[Taskaisle].PageSource);
                HtmlNodeCollection hrefList = page.DocumentNode.SelectNodes(".//a[@href]");
                int hrefList_Count          = 0;
                if (hrefList != null)
                {
                    hrefList_Count = hrefList.Count;
                }
                for (int i2 = 0; i2 != hrefList_Count; i2++)//循环遍历抓取到的链接组
                {
                    HtmlNode      href     = hrefList[i2];
                    HtmlAttribute att      = href.Attributes["href"];
                    bool          IsNotOld = true;
                    string        HTTPUri  = att.Value;

                    //Console.WriteLine(HTTPUri.Length+ HTTPUri);

                    //替换非http开头的路径链接开头,并扔掉一些没用的,格式错误的链接
                    if (HTTPUri.Length < 2)
                    {
                        HTTPUri = "";
                    }
                    else if (HTTPUri.IndexOf("http") == -1 && HTTPUri.Substring(0, 2) == @"//")
                    {
                        HTTPUri = HTTPUri.Replace("//", "http://");
                    }
                    else if (HTTPUri.IndexOf("http") == -1 && HTTPUri.Substring(0, 2) == @"./")
                    {
                        HTTPUri = HTTPUri.Replace("./", ReadUrl[Taskaisle]);
                    }
                    else if (HTTPUri.IndexOf("http") == -1 && HTTPUri.Substring(0, 1) == @"/")
                    {
                        HTTPUri = ReadUrl[Taskaisle] + HTTPUri.Substring(1, HTTPUri.Length - 1);
                    }
                    else if (HTTPUri.IndexOf("http") == -1)
                    {
                        HTTPUri = "";
                    }
                    //查看是否重复抓取链接
                    for (int I_repeat = 0; I_repeat != ALLUrl.Count; I_repeat++)
                    {
                        if (ALLUrl[I_repeat].url == HTTPUri)
                        {
                            IsNotOld = false;
                        }
                    }
                    for (int I_repeat = 0; I_repeat != ReadUrl.Count; I_repeat++)
                    {
                        if (ReadUrl[I_repeat] == HTTPUri)
                        {
                            IsNotOld = false;
                        }
                    }

                    if (HTTPUri != "" & IsNotOld & HTTPUri.ToCharArray().Length <= 250)
                    {
                        //Console.WriteLine(HTTPUri.ToCharArray().Length);

                        //标题获取线程
                        Thread geturl = new Thread(() =>
                        {
                            string geturlstring = HTTPUri;
                            try
                            {
                                HtmlAgilityPack.HtmlWeb get = new HtmlWeb();
                                HtmlDocument tdoc           = get.Load(geturlstring);
                                XUrl DoneUrl = new XUrl();
                                DoneUrl.url  = geturlstring;
                                if (tdoc != null)
                                {
                                    if (tdoc.DocumentNode.SelectSingleNode("//title").InnerText != null)//获取标题
                                    {
                                        DoneUrl.Tile = tdoc.DocumentNode.SelectSingleNode("//title").InnerText;
                                    }
                                    else
                                    {
                                        DoneUrl.Tile = geturlstring;
                                    }
                                    if (DoneUrl.Tile != "" & DoneUrl.Tile.IndexOf("404") == -1 & DoneUrl.Tile.IndexOf("NOT FOUND") == -1 & DoneUrl.Tile.IndexOf("not found") == -1 & DoneUrl.Tile.IndexOf("Not Found") == -1 & DoneUrl.Tile.IndexOf("¤") == -1 & DoneUrl.Tile.IndexOf("¢") == -1)//防止部分标题乱码和无法访问的网页(需要改进
                                    {
                                        //把抓到的链接添加进去
                                        ALLUrl.Add(DoneUrl);
                                        ReadUrl.Add(geturlstring);
                                        if (echo)
                                        {
                                            Console.WriteLine("添加链接:" + geturlstring + "标题:" + DoneUrl.Tile);
                                        }
                                    }
                                }
                            }
                            catch (Exception ex)
                            {
                                if (echo)
                                {
                                    Console.WriteLine(ex.Message);
                                }
                            }
                        });
                        geturl.Start();//启动线程
                        float nowcpu = cpuCounter.NextValue();
                        if (ReadUrl.Count <= aisle)
                        {
                            while (geturl.ThreadState == System.Threading.ThreadState.Running)
                            {
                            }
                        }
                        else if (nowcpu > cpumax && opti)
                        {
                            if (echo)
                            {
                                Console.WriteLine("CPU总占用" + nowcpu + "超过设定值,开始限速");
                            }
                            Thread.Sleep(1000);
                            if (geturl.ThreadState == System.Threading.ThreadState.Running)
                            {
                                geturl.Interrupt();
                                Console.WriteLine("线程超时");
                            }
                            else
                            {
                                Debug.WriteLine(geturl.ThreadState);
                            }
                        }

                        /* else
                         * {
                         *   Thread threadover = new Thread(() =>
                         *   {
                         *       Thread.Sleep(1000);
                         *       if (geturl.ThreadState == System.Threading.ThreadState.Running)
                         *       {
                         *           geturl.Abort();
                         *           if (echo)
                         *           {
                         *               Console.WriteLine("线程超时");
                         *           }
                         *       }
                         *       else
                         *       {
                         *           //  Debug.WriteLine("线程不超速");
                         *       }
                         *   });
                         *   threadover.Start();
                         * }*/
                        //CPU去世器↑已弃用
                    }
                }

                ReadUrl[Taskaisle] = null;
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex);
            }
            lock (TaskLockCore)
            {
                TaskDone++;
            }
            if (echo)
            {
                Console.WriteLine("访问完成");
            }
        }
Beispiel #5
0
 public bool Save(XUrl url)
 {
     this.Validate(url);
     return(this._dal.Save(url));
 }