Пример #1
0
        public long Add(AutoCaptureInfo info)
        {
            if (info == null)
            {
                throw new ArgumentNullException("info");
            }

            var sqlTxt = @"insert into auto_capture_config (Name,CategoryId,ThridCategoryId,ListUrl,ListXPath,DetailUrl,DetailXpath,LinkBaseUrl)
                                      values
                                      (@Name,@CategoryId,@ThridCategoryId,@ListUrl,@ListXPath,@DetailUrl,@DetailXpath,@LinkBaseUrl);
                                      select last_insert_id();";

            using (DbCommand cmd = DbInstance.GetSqlStringCommand(sqlTxt))
            {
                SetCommandParameter(cmd, "Name", DbType.String, info.Name);
                SetCommandParameter(cmd, "CategoryId", DbType.Int64, info.CategoryId);
                SetCommandParameter(cmd, "ThridCategoryId", DbType.UInt16, info.ThridCategoryId);
                SetCommandParameter(cmd, "ListUrl", DbType.String, info.ListUrl);
                SetCommandParameter(cmd, "ListXPath", DbType.String, info.ListXPath);
                SetCommandParameter(cmd, "DetailUrl", DbType.String, info.DetailUrl);
                SetCommandParameter(cmd, "DetailXpath", DbType.String, info.DetailXpath);
                SetCommandParameter(cmd, "LinkBaseUrl", DbType.String, info.LinkBaseUrl);
                return(GetLong(cmd));
            }
        }
Пример #2
0
        public void Edit(AutoCaptureInfo info)
        {
            if (info == null)
            {
                throw new ArgumentNullException("info");
            }

            if (!info.Id.HasValue)
            {
                throw new Exception("id不能为空");
            }

            var sqlTxt = @"update auto_capture_config set
                                     Name=@Name,
                                     CategoryId=@CategoryId,
                                     ThridCategoryId=@ThridCategoryId,
                                     ListUrl=@ListUrl,
                                     ListXPath=@ListXPath,
                                     DetailUrl=@DetailUrl,
                                     DetailXpath=@DetailXpath,
                                     LinkBaseUrl=@LinkBaseUrl
                                     where Id=@Id";

            var sql = string.Format("select CategoryId from auto_capture_config where Id={0}", info.Id);
            var cid = GetNullAbleLong(sql);


            using (DbCommand cmd = DbInstance.GetSqlStringCommand(sqlTxt))
            {
                SetCommandParameter(cmd, "Name", DbType.String, info.Name);
                SetCommandParameter(cmd, "CategoryId", DbType.Int64, info.CategoryId);
                SetCommandParameter(cmd, "ThridCategoryId", DbType.Int64, info.ThridCategoryId);
                SetCommandParameter(cmd, "ListUrl", DbType.String, info.ListUrl);
                SetCommandParameter(cmd, "ListXPath", DbType.String, info.ListXPath);
                SetCommandParameter(cmd, "DetailUrl", DbType.String, info.DetailUrl);
                SetCommandParameter(cmd, "DetailXpath", DbType.String, info.DetailXpath);
                SetCommandParameter(cmd, "LinkBaseUrl", DbType.String, info.LinkBaseUrl);

                SetCommandParameter(cmd, "Id", DbType.Int64, info.Id);
                ExecSql(cmd);
            }

            if (cid != info.CategoryId)
            {
                sql = string.Format("update article set CategoryId={0} where ThirdCategoryId={1};",
                                    info.CategoryId.HasValue ? info.CategoryId.ToString() : "null",
                                    info.ThridCategoryId);

                ExecSql(sql);
            }
        }
Пример #3
0
        public static void Capture(AutoCaptureInfo cfg, Dictionary <long, long> existRefIds)
        {
            if (cfg.Status == AutoCatureStatus.Capturing)
            {
                return;
            }

            AutoCaptureDAL.Instance.SetStatus(cfg.Id.Value, AutoCatureStatus.Capturing);

            var  isLoop = true;
            var  index  = 1;
            long lastId = 0;

            try
            {
                do
                {
                    var dataList = CaptureList(cfg, existRefIds, index);
                    var lastId2  = GetDataListMaxId(dataList);
                    isLoop = (dataList != null && dataList.Count() > 0) && lastId2 != lastId;

                    //防止数据造成的无限循环
                    if (lastId2 > 0 && lastId2 != lastId)
                    {
                        lastId = lastId2;
                        ArticleDAL.Instance.Add(dataList);
                    }
                    index++;
                } while (isLoop);
            }
            catch (Exception ex)
            {
                throw new Exception("抓取数据失败,请检查抓取参数配置:" + ex.Message);
            }
            finally
            {
                AutoCaptureDAL.Instance.SetStatus(cfg.Id.Value, AutoCatureStatus.Normal);
            }
        }
Пример #4
0
        public static List <ArticleDetailInfo> CaptureList(AutoCaptureInfo cfg, Dictionary <long, long> existRefIds, int index)
        {
            List <ArticleDetailInfo> dataList = null;
            var existHasVal = existRefIds != null && existRefIds.GetEnumerator().MoveNext();

            if (cfg != null &&
                !string.IsNullOrWhiteSpace(cfg.ListUrl) &&
                !string.IsNullOrWhiteSpace(cfg.ListXPath) &&
                !string.IsNullOrWhiteSpace(cfg.DetailUrl) &&
                !string.IsNullOrWhiteSpace(cfg.DetailXpath))
            {
                var listUrl = cfg.ListUrl.ToLower();
                if (listUrl.Contains("{pageindex}"))
                {
                    listUrl = listUrl.Replace("{pageindex}", index.ToString());
                }

                if (listUrl.Contains("{categoryid}"))
                {
                    listUrl = listUrl.Replace("{categoryid}", cfg.ThridCategoryId.ToString());
                }

                using (WebClient client = new WebClient())
                {
                    var res = string.Empty;
                    client.Encoding = Encoding.Default;
                    try
                    {
                        res = client.DownloadString(listUrl);
                    }
                    catch (Exception ex)
                    {
                        Point.Common.Core.SystemLoger.Current.Write(string.Format("获取[{0}]数据失败:{1}", listUrl, ex.Message));
                    }

                    if (!string.IsNullOrWhiteSpace(res))
                    {
                        var doc = new HtmlDocument();
                        doc.LoadHtml(res);


                        var rootNode = doc.DocumentNode;
                        if (rootNode != null)
                        {
                            var list = rootNode.SelectNodes(cfg.ListXPath);
                            if (list != null && list.Count() > 0)
                            {
                                dataList = new List <ArticleDetailInfo>();
                                foreach (var node in list)
                                {
                                    var title = node.Attributes["title"].Value;
                                    var href  = node.Attributes["href"].Value;
                                    var refId = GetUrlParmsValue(href, "infoid");

                                    long _refId;
                                    if (Int64.TryParse(refId, out _refId))
                                    {
                                        var model = new ArticleDetailInfo()
                                        {
                                            Title           = title,
                                            ThirdId         = _refId,
                                            ThirdCategoryId = cfg.ThridCategoryId,
                                            CategoryId      = cfg.CategoryId,
                                            CreateDate      = DateTime.Now
                                        };

                                        if (existHasVal && existRefIds.Values.Contains(_refId))
                                        {
                                            model.Id = existRefIds.First(i => i.Value == _refId).Key;
                                        }

                                        //获取详情
                                        var details_url = cfg.DetailUrl.ToLower();
                                        var content     = string.Empty;
                                        var cover       = string.Empty;

                                        if (details_url.Contains("{articleid}"))
                                        {
                                            details_url = details_url.Replace("{articleid}", _refId.ToString());
                                        }

                                        model.Content = CaptureDetails(details_url, cfg.DetailXpath, cfg.LinkBaseUrl, cfg.ThridCategoryId, out cover);
                                        model.Cover   = cover;
                                        dataList.Add(model);
                                    }
                                }
                            }
                        }
                    }
                }
            }

            return(dataList);
        }