public long Add(AutoCaptureInfo info) { if (info == null) { throw new ArgumentNullException("info"); } var sqlTxt = @"insert into auto_capture_config (Name,CategoryId,ThridCategoryId,ListUrl,ListXPath,DetailUrl,DetailXpath,LinkBaseUrl) values (@Name,@CategoryId,@ThridCategoryId,@ListUrl,@ListXPath,@DetailUrl,@DetailXpath,@LinkBaseUrl); select last_insert_id();"; using (DbCommand cmd = DbInstance.GetSqlStringCommand(sqlTxt)) { SetCommandParameter(cmd, "Name", DbType.String, info.Name); SetCommandParameter(cmd, "CategoryId", DbType.Int64, info.CategoryId); SetCommandParameter(cmd, "ThridCategoryId", DbType.UInt16, info.ThridCategoryId); SetCommandParameter(cmd, "ListUrl", DbType.String, info.ListUrl); SetCommandParameter(cmd, "ListXPath", DbType.String, info.ListXPath); SetCommandParameter(cmd, "DetailUrl", DbType.String, info.DetailUrl); SetCommandParameter(cmd, "DetailXpath", DbType.String, info.DetailXpath); SetCommandParameter(cmd, "LinkBaseUrl", DbType.String, info.LinkBaseUrl); return(GetLong(cmd)); } }
public void Edit(AutoCaptureInfo info) { if (info == null) { throw new ArgumentNullException("info"); } if (!info.Id.HasValue) { throw new Exception("id不能为空"); } var sqlTxt = @"update auto_capture_config set Name=@Name, CategoryId=@CategoryId, ThridCategoryId=@ThridCategoryId, ListUrl=@ListUrl, ListXPath=@ListXPath, DetailUrl=@DetailUrl, DetailXpath=@DetailXpath, LinkBaseUrl=@LinkBaseUrl where Id=@Id"; var sql = string.Format("select CategoryId from auto_capture_config where Id={0}", info.Id); var cid = GetNullAbleLong(sql); using (DbCommand cmd = DbInstance.GetSqlStringCommand(sqlTxt)) { SetCommandParameter(cmd, "Name", DbType.String, info.Name); SetCommandParameter(cmd, "CategoryId", DbType.Int64, info.CategoryId); SetCommandParameter(cmd, "ThridCategoryId", DbType.Int64, info.ThridCategoryId); SetCommandParameter(cmd, "ListUrl", DbType.String, info.ListUrl); SetCommandParameter(cmd, "ListXPath", DbType.String, info.ListXPath); SetCommandParameter(cmd, "DetailUrl", DbType.String, info.DetailUrl); SetCommandParameter(cmd, "DetailXpath", DbType.String, info.DetailXpath); SetCommandParameter(cmd, "LinkBaseUrl", DbType.String, info.LinkBaseUrl); SetCommandParameter(cmd, "Id", DbType.Int64, info.Id); ExecSql(cmd); } if (cid != info.CategoryId) { sql = string.Format("update article set CategoryId={0} where ThirdCategoryId={1};", info.CategoryId.HasValue ? info.CategoryId.ToString() : "null", info.ThridCategoryId); ExecSql(sql); } }
public static void Capture(AutoCaptureInfo cfg, Dictionary <long, long> existRefIds) { if (cfg.Status == AutoCatureStatus.Capturing) { return; } AutoCaptureDAL.Instance.SetStatus(cfg.Id.Value, AutoCatureStatus.Capturing); var isLoop = true; var index = 1; long lastId = 0; try { do { var dataList = CaptureList(cfg, existRefIds, index); var lastId2 = GetDataListMaxId(dataList); isLoop = (dataList != null && dataList.Count() > 0) && lastId2 != lastId; //防止数据造成的无限循环 if (lastId2 > 0 && lastId2 != lastId) { lastId = lastId2; ArticleDAL.Instance.Add(dataList); } index++; } while (isLoop); } catch (Exception ex) { throw new Exception("抓取数据失败,请检查抓取参数配置:" + ex.Message); } finally { AutoCaptureDAL.Instance.SetStatus(cfg.Id.Value, AutoCatureStatus.Normal); } }
public static List <ArticleDetailInfo> CaptureList(AutoCaptureInfo cfg, Dictionary <long, long> existRefIds, int index) { List <ArticleDetailInfo> dataList = null; var existHasVal = existRefIds != null && existRefIds.GetEnumerator().MoveNext(); if (cfg != null && !string.IsNullOrWhiteSpace(cfg.ListUrl) && !string.IsNullOrWhiteSpace(cfg.ListXPath) && !string.IsNullOrWhiteSpace(cfg.DetailUrl) && !string.IsNullOrWhiteSpace(cfg.DetailXpath)) { var listUrl = cfg.ListUrl.ToLower(); if (listUrl.Contains("{pageindex}")) { listUrl = listUrl.Replace("{pageindex}", index.ToString()); } if (listUrl.Contains("{categoryid}")) { listUrl = listUrl.Replace("{categoryid}", cfg.ThridCategoryId.ToString()); } using (WebClient client = new WebClient()) { var res = string.Empty; client.Encoding = Encoding.Default; try { res = client.DownloadString(listUrl); } catch (Exception ex) { Point.Common.Core.SystemLoger.Current.Write(string.Format("获取[{0}]数据失败:{1}", listUrl, ex.Message)); } if (!string.IsNullOrWhiteSpace(res)) { var doc = new HtmlDocument(); doc.LoadHtml(res); var rootNode = doc.DocumentNode; if (rootNode != null) { var list = rootNode.SelectNodes(cfg.ListXPath); if (list != null && list.Count() > 0) { dataList = new List <ArticleDetailInfo>(); foreach (var node in list) { var title = node.Attributes["title"].Value; var href = node.Attributes["href"].Value; var refId = GetUrlParmsValue(href, "infoid"); long _refId; if (Int64.TryParse(refId, out _refId)) { var model = new ArticleDetailInfo() { Title = title, ThirdId = _refId, ThirdCategoryId = cfg.ThridCategoryId, CategoryId = cfg.CategoryId, CreateDate = DateTime.Now }; if (existHasVal && existRefIds.Values.Contains(_refId)) { model.Id = existRefIds.First(i => i.Value == _refId).Key; } //获取详情 var details_url = cfg.DetailUrl.ToLower(); var content = string.Empty; var cover = string.Empty; if (details_url.Contains("{articleid}")) { details_url = details_url.Replace("{articleid}", _refId.ToString()); } model.Content = CaptureDetails(details_url, cfg.DetailXpath, cfg.LinkBaseUrl, cfg.ThridCategoryId, out cover); model.Cover = cover; dataList.Add(model); } } } } } } } return(dataList); }