/// <summary> /// 保存数据 /// </summary> /// <param name="entity">保存的实体</param> /// <param name="isExistFields">判断重复数据字段,则该参数值为null或空字符串</param> /// <param name="isUpdate">当数据为重复时,是否需要更新</param> /// <param name="existFields">更新重复数据时,其它加入其它条件的字段</param> /// <param name="isUpdateCtx">当数据有重复时,是否更新CtxHtml与Ctx</param> /// <returns>true:成功,false:失败</returns> public static bool SaveEntity(object entity, string isExistFields, bool isUpdate = false, bool isUpdateCtx = false, string existFields = null) { CrawlerService service = NewService(); bool isSave = service.SaveEntity(entity, isExistFields, isUpdate, isUpdateCtx, existFields); return(isSave); }
static void Main(string[] args) { var app = new CommandLineApplication() { Name = "Coffeetube Crawler" }; app.Command("crawl", command => { command.Description = "Begins to crawl from a seed video id."; var seedIdOption = command.Option("--seed", "Id of YouTube Seed Video", CommandOptionType.SingleValue); var connectionStringArg = command.Option("--mongo", "Mongo database connection string.", CommandOptionType.SingleValue); var youtubeApiKey = command.Option("--api", "Youtube api key.", CommandOptionType.SingleValue); var depth = command.Option("--depth", "Search depth.", CommandOptionType.SingleValue); command.OnExecute(async() => { var apiKey = youtubeApiKey.HasValue() ? youtubeApiKey.Value() : Environment.GetEnvironmentVariable("YOUTUBE_API_KEY"); var logger = new LoggerFactory() .AddConsole() .CreateLogger <CrawlerService>(); var crawler = new CrawlerService( new YouTubeApi( apiKey), new MongoRepository <YouTubeVideo>( connectionStringArg.Value(), "testdb", "videos"), logger); logger.LogInformation($"Starting crawler..."); try { await crawler.Crawl(seedIdOption.Value(), int.Parse(depth.Value())); } catch (Exception e) { logger.LogError(e, "Error during crawl."); } logger.LogInformation($"Crawl complete."); return(0); }); }); app.Execute(args); }
private static CrawlerService NewService() { try { CrawlerService service = new CrawlerService(); service.Url = CrawServiceUrl; service.Timeout = 1000 * 300; return(service); } catch (Exception ex) { Logger.Error(ex); Logger.Error("连接服务器失败"); throw new Exception("连接服务器失败。"); } }
/// <summary> /// The main entry point for the application. /// </summary> public static void Main() { IoC.Init(); var service = new CrawlerService(); var servicesToRun = new ServiceBase[] { service }; if (Environment.UserInteractive == false) { ServiceBase.Run(servicesToRun); return; } Console.CancelKeyPress += (x, y) => service.Stop(); service.Start(); Console.WriteLine("Service started"); Console.ReadKey(); service.Stop(); Console.WriteLine("Service stopped"); }
/// <summary> /// 批量保存数据,在保存之前会根据isExistFields参数值判断该条数据是否重复 /// </summary> /// <param name="entityList"></param> /// <param name="isExistFields">如不需要判断是否存在数据,则该参数值为null或空字符串</param> /// <param name="successList"> 返回成功插入表的数据</param> /// <returns></returns> public static int SaveDatas(IList entityList, string isExistFields, List <BaseAttach> listBaseAttach, out object[] successList, bool isUpdate, bool isUpdateHtlCtx, bool isUpdateAttach) { if (listBaseAttach == null) { listBaseAttach = new List <BaseAttach>(); } CrawlerService service = NewService(); List <object> list = new List <object>(); foreach (object obj in entityList) { list.Add(obj); } int result = service.AddItem(list.ToArray(), isExistFields, listBaseAttach.ToArray(), isUpdate, isUpdateHtlCtx, null, isUpdateAttach, out successList); return(result); }
static void Main(string[] args) { // 数据抓取服务 CrawlerService service = new CrawlerService(); #region 京东首页商品标题数据 // 获取京东首页Html string pageHtml = RequestHelper.HttpGetPageHtml("https://www.jd.com/"); //// 首页商品标题数据 //string xpathFirst = "//*[@class='cate_menu_item']/a"; //List<ProductInfo> productList = service.GetProductList(pageHtml, xpathFirst); #endregion #region 抓取京东手机信息 List <ProductInfo> phoneList = new List <ProductInfo>(); // 商品总页数 int TotalCount = 200; for (int i = 0; i < TotalCount - 1; i++) { // 该链接目前偶数页数据和前一个奇数页数据相同 if (i % 2 != 0) { // 获取京东手机信息Html string phoneHtml = RequestHelper.WebClientDownloadHtml("https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&cid2=653&cid3=655&page=" + i + ""); // 获取当前页的手机数据 List <ProductInfo> onePageList = service.GetPhoneList(phoneHtml); phoneList.AddRange(onePageList); Console.WriteLine($"----------当前抓取总数{phoneList.Count}-----------"); } } #endregion Console.ReadKey(); }
/// <summary> /// 执行一条SQL语句,返回DataTable /// </summary> /// <param name="sql"></param> /// <returns></returns> public static DataTable GetDbData(string sql) { CrawlerService service = NewService(); return(service.GetDbData("kdxx" + sql)); }
/// <summary> /// 执行一条SQL语句,返回受影响的行数 /// </summary> /// <param name="sql"></param> /// <returns></returns> public static int ExecuteSql(string sql) { CrawlerService service = NewService(); return(service.ExecuteSql("kdxx" + sql)); }
/// <summary> /// 查询第一行第一列数据 /// </summary> /// <param name="sql"></param> /// <returns></returns> public static object ExecuteScalar(string sql) { CrawlerService service = NewService(); return(service.ExecuteScalar("kdxx" + sql)); }
/// <summary> /// 执行存储过程,默认为处理企业库中重复数据及错误格式 /// </summary> /// <param name="proName"></param> /// <returns></returns> public static void ExecuteProcedure(string proName = "UP_ClearCorpRepeat") { CrawlerService service = NewService(); service.ExecuteProcedure(proName); }