Ejemplo n.º 1
0
        /// <summary>
        /// 保存数据
        /// </summary>
        /// <param name="entity">保存的实体</param>
        /// <param name="isExistFields">判断重复数据字段,则该参数值为null或空字符串</param>
        /// <param name="isUpdate">当数据为重复时,是否需要更新</param>
        /// <param name="existFields">更新重复数据时,其它加入其它条件的字段</param>
        ///  <param name="isUpdateCtx">当数据有重复时,是否更新CtxHtml与Ctx</param>
        /// <returns>true:成功,false:失败</returns>
        public static bool SaveEntity(object entity, string isExistFields, bool isUpdate = false, bool isUpdateCtx = false, string existFields = null)
        {
            CrawlerService service = NewService();
            bool           isSave  = service.SaveEntity(entity, isExistFields, isUpdate, isUpdateCtx, existFields);

            return(isSave);
        }
Ejemplo n.º 2
0
        static void Main(string[] args)
        {
            var app = new CommandLineApplication()
            {
                Name = "Coffeetube Crawler"
            };

            app.Command("crawl", command =>
            {
                command.Description = "Begins to crawl from a seed video id.";

                var seedIdOption        = command.Option("--seed", "Id of YouTube Seed Video", CommandOptionType.SingleValue);
                var connectionStringArg = command.Option("--mongo", "Mongo database connection string.", CommandOptionType.SingleValue);
                var youtubeApiKey       = command.Option("--api", "Youtube api key.", CommandOptionType.SingleValue);
                var depth = command.Option("--depth", "Search depth.", CommandOptionType.SingleValue);

                command.OnExecute(async() =>
                {
                    var apiKey = youtubeApiKey.HasValue()
                        ? youtubeApiKey.Value()
                        : Environment.GetEnvironmentVariable("YOUTUBE_API_KEY");

                    var logger = new LoggerFactory()
                                 .AddConsole()
                                 .CreateLogger <CrawlerService>();

                    var crawler = new CrawlerService(
                        new YouTubeApi(
                            apiKey),
                        new MongoRepository <YouTubeVideo>(
                            connectionStringArg.Value(),
                            "testdb",
                            "videos"),
                        logger);

                    logger.LogInformation($"Starting crawler...");

                    try
                    {
                        await crawler.Crawl(seedIdOption.Value(), int.Parse(depth.Value()));
                    }
                    catch (Exception e)
                    {
                        logger.LogError(e, "Error during crawl.");
                    }

                    logger.LogInformation($"Crawl complete.");

                    return(0);
                });
            });

            app.Execute(args);
        }
Ejemplo n.º 3
0
 private static CrawlerService NewService()
 {
     try
     {
         CrawlerService service = new CrawlerService();
         service.Url     = CrawServiceUrl;
         service.Timeout = 1000 * 300;
         return(service);
     }
     catch (Exception ex)
     {
         Logger.Error(ex);
         Logger.Error("连接服务器失败");
         throw new Exception("连接服务器失败。");
     }
 }
Ejemplo n.º 4
0
 /// <summary>
 /// The main entry point for the application.
 /// </summary>
 public static void Main()
 {
     IoC.Init();
     var service = new CrawlerService();
     var servicesToRun = new ServiceBase[]  { service };
     if (Environment.UserInteractive == false)
     {
         ServiceBase.Run(servicesToRun);
         return;
     }
     Console.CancelKeyPress += (x, y) => service.Stop();
     service.Start();
     Console.WriteLine("Service started");
     Console.ReadKey();
     service.Stop();
     Console.WriteLine("Service stopped");
 }
Ejemplo n.º 5
0
        /// <summary>
        /// 批量保存数据,在保存之前会根据isExistFields参数值判断该条数据是否重复
        /// </summary>
        /// <param name="entityList"></param>
        /// <param name="isExistFields">如不需要判断是否存在数据,则该参数值为null或空字符串</param>
        ///  <param name="successList"> 返回成功插入表的数据</param>
        /// <returns></returns>
        public static int SaveDatas(IList entityList, string isExistFields, List <BaseAttach> listBaseAttach, out object[] successList, bool isUpdate, bool isUpdateHtlCtx, bool isUpdateAttach)
        {
            if (listBaseAttach == null)
            {
                listBaseAttach = new List <BaseAttach>();
            }

            CrawlerService service = NewService();
            List <object>  list    = new List <object>();

            foreach (object obj in entityList)
            {
                list.Add(obj);
            }
            int result = service.AddItem(list.ToArray(), isExistFields, listBaseAttach.ToArray(), isUpdate, isUpdateHtlCtx, null, isUpdateAttach, out successList);

            return(result);
        }
Ejemplo n.º 6
0
        static void Main(string[] args)
        {
            // 数据抓取服务
            CrawlerService service = new CrawlerService();

            #region 京东首页商品标题数据

            // 获取京东首页Html
            string pageHtml = RequestHelper.HttpGetPageHtml("https://www.jd.com/");

            //// 首页商品标题数据
            //string xpathFirst = "//*[@class='cate_menu_item']/a";
            //List<ProductInfo> productList = service.GetProductList(pageHtml, xpathFirst);

            #endregion

            #region 抓取京东手机信息


            List <ProductInfo> phoneList = new List <ProductInfo>();

            // 商品总页数
            int TotalCount = 200;
            for (int i = 0; i < TotalCount - 1; i++)
            {
                // 该链接目前偶数页数据和前一个奇数页数据相同
                if (i % 2 != 0)
                {
                    // 获取京东手机信息Html
                    string phoneHtml = RequestHelper.WebClientDownloadHtml("https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&cid2=653&cid3=655&page=" + i + "");

                    // 获取当前页的手机数据
                    List <ProductInfo> onePageList = service.GetPhoneList(phoneHtml);
                    phoneList.AddRange(onePageList);
                    Console.WriteLine($"----------当前抓取总数{phoneList.Count}-----------");
                }
            }

            #endregion

            Console.ReadKey();
        }
Ejemplo n.º 7
0
        /// <summary>
        /// 执行一条SQL语句,返回DataTable
        /// </summary>
        /// <param name="sql"></param>
        /// <returns></returns>
        public static DataTable GetDbData(string sql)
        {
            CrawlerService service = NewService();

            return(service.GetDbData("kdxx" + sql));
        }
Ejemplo n.º 8
0
        /// <summary>
        /// 执行一条SQL语句,返回受影响的行数
        /// </summary>
        /// <param name="sql"></param>
        /// <returns></returns>
        public static int ExecuteSql(string sql)
        {
            CrawlerService service = NewService();

            return(service.ExecuteSql("kdxx" + sql));
        }
Ejemplo n.º 9
0
        /// <summary>
        /// 查询第一行第一列数据
        /// </summary>
        /// <param name="sql"></param>
        /// <returns></returns>
        public static object ExecuteScalar(string sql)
        {
            CrawlerService service = NewService();

            return(service.ExecuteScalar("kdxx" + sql));
        }
Ejemplo n.º 10
0
        /// <summary>
        /// 执行存储过程,默认为处理企业库中重复数据及错误格式
        /// </summary>
        /// <param name="proName"></param>
        /// <returns></returns>
        public static void ExecuteProcedure(string proName = "UP_ClearCorpRepeat")
        {
            CrawlerService service = NewService();

            service.ExecuteProcedure(proName);
        }