Ejemplo n.º 1
0
        /// <summary>
        /// 异步创建爬虫
        /// </summary>
        /// <param name="uri">爬虫URL地址</param>
        /// <param name="proxy">代理服务器</param>
        /// <returns>网页源代码</returns>
        public async Task StartAsync(Uri uri, Script script, Operation operation, CrawlerPartConfig cpc)
        {
            await Task.Run(() =>
            {
                try
                {
                    Semaphore.Wait();
                    //OnStart?.Invoke(this, new OnStartEventArgs(uri));
                    var _service        = PhantomJSDriverService.CreateDefaultService();
                    _service.LoadImages = false;
                    var _option         = new PhantomJSOptions();
                    var driver          = new PhantomJSDriver(_service, _option);
                    try
                    {
                        //WriteLog.InsertLogs(uri.ToString(), "步骤零");
                        var watch = DateTime.Now;
                        driver.Navigate().GoToUrl(uri.ToString());

                        if (script != null)
                        {
                            driver.ExecuteScript(script.Code, script.Args);
                        }
                        if (operation != null)
                        {
                            ExecuteAction(operation, driver);
                        }
                        var threadId = Thread.CurrentThread.ManagedThreadId;
                        var seconds  = Convert.ToInt32(DateTime.Now.Subtract(watch).TotalSeconds);
                        //WriteLog.InsertLogs(uri.ToString(), "步骤一");
                        //打开网页时间过长可能导致driver被垃圾回收,限制为30秒
                        if (seconds < 30)
                        {
                            News news = GetNews(driver, cpc, uri, threadId, seconds);
                            OnCompleted?.Invoke(this, new OnCompletedEventArgs(news));
                            //OnCompleted?.Invoke(this, new OnCompletedEventArgs(uri, threadId, milliseconds, pageSource, driver, cpc));
                        }
                        else
                        {
                            WriteLog.InsertLogs(uri.ToString(), "打开网页超时");
                            if (driver != null)
                            {
                                driver.Quit();
                                driver = null;
                                //WriteLog.InsertLogs(uri.ToString(), "结束");
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        OnError?.Invoke(this, new OnErrorEventArgs(uri, ex));
                        //WriteLog.InsertLogs(uri.ToString(), ex.Message);
                    }
                    finally
                    {
                        if (driver != null)
                        {
                            driver.Quit();
                            //WriteLog.InsertLogs(uri.ToString(), "结束");
                        }
                    }
                }
                catch (Exception ex)
                {
                    OnError?.Invoke(this, new OnErrorEventArgs(uri, ex));
                    //WriteLog.InsertLogs(uri.ToString(), ex.Message);
                }
                finally
                {
                    Semaphore.Release();
                }
            });
        }