Пример #1
0
        public ActionResult Export(TaskEntity model, bool isOptimize)
        {
            var crawlerlists = new CrawlerDomain().Get(t => t.TaskGuid == model.Unique).OrderByDescending(t => t.爬行更新时间).ToList();

            if (crawlerlists.Count < 1)
            {
                return(Content("<script>alert('未找到内容');</script>"));
            }
            var bytes = crawlerlists.ListToExcel(isOptimize);

            return(File(bytes, "application/vnd.ms-excel",
                        "导出北京企业采集信息[" + DateTime.Now.ToString("yyyy-M-d dddd") + "].xls"));
        }
Пример #2
0
        public ActionResult SingelSearch(string guid = null, string searchInfo = null)
        {
            CrawlerEntity crawlerEntity = null;

            if (string.IsNullOrWhiteSpace(guid) && string.IsNullOrWhiteSpace(searchInfo))
            {
                ViewBag.Guid = Guid.NewGuid();
            }
            else
            {
                ViewBag.Guid = guid;
                var count = new TargeCompanyDomain().Get(t => t.CompanyName.Equals(searchInfo))?.Count;
                if (count > 0)
                {
                    //历史记录中已存在
                }
                else
                {
                    //上网检索
                    List <string> companyList = new List <string> {
                        searchInfo
                    };
                    TaskEntity model = new TaskEntity();
                    model.TaskType       = EnumTaskType.BjCrawler;
                    model.TaskName       = $"单个任务[{DateTime.Now.ToString("G")}]";
                    model.Unique         = Conv.ToGuid(guid);
                    model.TaskStateDicId = 1;
                    model.TaskNum        = 1;
                    model.CreateTime     = DateTime.Now;
                    model.IsSingelSearch = true;
                    new TaskDomain().Add(model);
                    new BaseData(model).InsertMetadata(companyList.ToList(), model.TaskName, model, taskEntity =>
                    {
                        Task[] tasks = new Task[4];
                        for (int i = 0; i < 4; i++)
                        {
                            tasks[i] = new Task(() =>
                            {
                                var bjqyxy = new Crawler.Bjqyxy.BjCrawler(taskEntity, t => t.TaskGuid.Equals(taskEntity.Unique));
                                bjqyxy.SingelSearch(searchInfo);
                            });
                            tasks[i].Start();
                        }
                        Task.WaitAny(tasks);
                    });
                }
                crawlerEntity = new CrawlerDomain().Get(t => t.搜索名称 == searchInfo && t.称 != null).FirstOrDefault();
            }
            return(View(crawlerEntity));
        }
Пример #3
0
 public static int ToImageResource(this CrawlerDomain domain)
 {
     if (domain == CrawlerDomain.Mandarake)
     {
         return(Resource.Drawable.mandarake);
     }
     if (domain == CrawlerDomain.Surugaya)
     {
         return(Resource.Drawable.surugaya);
     }
     if (domain == CrawlerDomain.Mercari)
     {
         return(Resource.Drawable.mercari);
     }
     if (domain == CrawlerDomain.Yahoo)
     {
         return(Resource.Drawable.yahoo);
     }
     if (domain == CrawlerDomain.Lashinbang)
     {
         return(Resource.Drawable.lashinbang);
     }
     return(0);
 }
Пример #4
0
        protected override void OnStart(string[] args)
        {
            log.Info("Start service");
            try
            {
                InitializeComponent();
                cancelTokenSource = new CancellationTokenSource();
                string rabbitMQServerName = ConfigurationManager.AppSettings["rabbitMQServerName"];
                workers        = new Worker[workerCount];
                rabbitMQServer = RabbitMQManager.GetRabbitMQServer(rabbitMQServerName);

                string connectToSQL        = @"Data Source=172.22.30.86,1455;Initial Catalog=QT_2;Persist Security Info=True;User ID=qt_vn;Password=@F4sJ=l9/ryJt9MT;connection timeout=200";
                string connectToConnection = @"Data Source=42.112.28.93;Initial Catalog=QT_2;Persist Security Info=True;User ID=wss_price;Password=HzlRt4$$axzG-*UlpuL2gYDu;connection timeout=200";
                CrawlerProductAdapter crawlerProductAdapter = new CrawlerProductAdapter(new SqlDb(connectToSQL));
                ProductAdapter        productAdapter        = new ProductAdapter(new SqlDb(connectToConnection));


                for (int i = 0; i < workerCount; i++)
                {
                    log.InfoFormat("Start worker {i}", i.ToString());
                    var worker = new Worker(AddProductToSqlJobName, false, rabbitMQServer);
                    workers[i] = worker;
                    var  token      = this.cancelTokenSource.Token;
                    Task workerTask = new Task(() =>
                    {
                        worker.JobHandler = (downloadImageJob) =>
                        {
                            try
                            {
                                token.ThrowIfCancellationRequested();

                                QT.Entities.CrawlerProduct.RabbitMQ.MsSaveProduct Mss = QT.Entities.CrawlerProduct.RabbitMQ.MsSaveProduct.GetDataFromMessage(downloadImageJob.Data);
                                string Url     = Mss.Url;
                                string Domain  = QT.Entities.Common.GetDomainFromUrl(Url);
                                long CompanyID = QT.Entities.Common.GetIDCompany(Domain);
                                QT.Entities.Configuration config = new QT.Entities.Configuration(CompanyID);
                                if (_company.Status == Common.CompanyStatus.WEB_CRAWLERDOMAIN)
                                {
                                    List <QT.Entities.Company> ls = new List <QT.Entities.Company>();
                                    QT.Entities.CrawlerDomain obj = new CrawlerDomain();
                                    string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(Url.Trim(), 15, 1);
                                    GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                                    html = html.Replace("<form", "<div");
                                    html = html.Replace("</form", "</div");
                                    doc.LoadHtml(html);
                                }
                                else
                                {
                                    int numberItemSaved = 0;
                                    string[] arLink     = Url.Trim().Split(SqlDb.arSplit, StringSplitOptions.RemoveEmptyEntries);
                                    foreach (var item in arLink)
                                    {
                                        QT.Entities.Product _product = new Product();
                                        string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(item, 45, 2);
                                        if (config.ContentAnanyticXPath.Count >= 1)
                                        {
                                            int i1 = 0, i2 = 0;
                                            i1     = html.IndexOf(config.ContentAnanyticXPath[0]);
                                            if (i1 >= 0)
                                            {
                                                html = html.Substring(i1);
                                                if (config.ContentAnanyticXPath.Count >= 2)
                                                {
                                                    i2 = html.IndexOf(config.ContentAnanyticXPath[1]);
                                                    if (i2 >= 0)
                                                    {
                                                        html = html.Substring(0, i2 + config.ContentAnanyticXPath[1].Length);
                                                    }
                                                }
                                            }
                                            html = html.Replace("<form", "<div");
                                            html = html.Replace("</form", "</div");
                                            html = Common.TidyCleanR(html);
                                        }

                                        _htmlSource = html;
                                        GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                                        html = html.Replace("<form", "<div");
                                        html = html.Replace("</form", "</div");
                                        doc.LoadHtml(html);

                                        List <Product> lstUpdateProduct = new List <Product>();
                                        List <Product> lstInsertProduct = new List <Product>();

                                        _product.Analytics(doc, item, config, true, _company.Domain);

                                        if (_product != null && _product.IsSuccessData(config.CheckPrice))
                                        {
                                            numberItemSaved++;
                                            if (productAdapter.CheckExistInDb(_product.ID))
                                            {
                                                lstUpdateProduct.Add(_product);
                                            }
                                            else
                                            {
                                                lstInsertProduct.Add(_product);
                                            }

                                            productAdapter.UpdateProductsChangeToDb(lstUpdateProduct);
                                            productAdapter.InsertListProduct(lstInsertProduct);

                                            productAdapter.PushQueueIndexCompany(config.CompanyID);
                                            productAdapter.PushQueueChangeChangeImage(new MQChangeImage()
                                            {
                                                ProductID = _product.ID,
                                                Type      = 1
                                            });

                                            log.InfoFormat("Saved {0} item product!", _product.Name);
                                        }
                                    }
                                }

                                return(true);
                            }
                            catch (OperationCanceledException opc)
                            {
                                log.Info("End worker");
                                return(false);
                            }
                        };
                        worker.Start();
                    }, token);
                    workerTask.Start();
                    log.InfoFormat("Worker {0} started", i);
                }
            }
            catch (Exception ex)
            {
                log.Error("Start error", ex);
                throw;
            }
        }
Пример #5
0
 public ICrawler GetCrawler(CrawlerDomain crawlerDomain)
 {
     return(_crawlers[crawlerDomain]);
 }