Ejemplo n.º 1
0
        private void FindProduct()
        {
            var  rabbitMQServer = RabbitMQManager.GetRabbitMQServer("rabbitMQ177");
            var  worker         = new Worker("VatGia_Queue", false, rabbitMQServer);
            Task workerTask     = new Task(() =>
            {
                log.Info("Start consumer!");
                worker.JobHandler = (downloadImageJob) =>
                {
                    log.Info("Get job from MQ");
                    try
                    {
                        JobCrawler jobData = JobCrawler.Deserialize(downloadImageJob.Data);
                        if (jobData == null)
                        {
                            return(true);
                        }
                        return(true);
                    }
                    catch (Exception ex01)
                    {
                        log.Error("Exception:", ex01);
                        return(true);
                    }
                };
                worker.Start();
            });

            workerTask.Start();
        }
Ejemplo n.º 2
0
 public static JobCrawler Deserialize(byte[] JobCrawler)
 {
     try
     {
         Encoding   enc     = new UTF8Encoding(true, true);
         string     strData = enc.GetString(JobCrawler);
         JobCrawler job     = JsonConvert.DeserializeObject <JobCrawler>(strData);
         return(job);
     }
     catch (Exception ex01)
     {
         return(null);
     }
 }
Ejemplo n.º 3
0
        public void FindClassification()
        {
            List <long> addedToDb = new List <long>();
            Dictionary <long, string> addedLink = this.LoadVisitedLink();
            ILog log = log4net.LogManager.GetLogger(typeof(Program));

            var  rabbitMQServer = RabbitMQManager.GetRabbitMQServer("rabbitMQ177");
            var  worker         = new Worker("VatGia_Queue", false, rabbitMQServer);
            Task workerTask     = new Task(() =>
            {
                log.Info("Start consumer!");
                worker.JobHandler = (downloadImageJob) =>
                {
                    log.Info("Get job from MQ");
                    try
                    {
                        JobCrawler jobData = JobCrawler.Deserialize(downloadImageJob.Data);
                        if (jobData == null)
                        {
                            return(true);
                        }

                        Console.WriteLine("Get Job:" + jobData.urlDetail);
                        long IDUrl = Math.Abs(GABIZ.Base.Tools.getCRC64(jobData.urlDetail));
                        if (!addedLink.ContainsKey(IDUrl))
                        {
                            addedLink.Add(IDUrl, "");
                            string regexExtract = @"^.*vatgia.com\/\d+\/[^\/]*html$";
                            string html         = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(jobData.urlDetail, 120, 2);

                            GABIZ.Base.HtmlAgilityPack.HtmlDocument htmlDocument = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                            htmlDocument.LoadHtml(html);
                            var nodesLink = htmlDocument.DocumentNode.SelectNodes("//a");
                            if (nodesLink != null)
                            {
                                foreach (var nodeLink in nodesLink)
                                {
                                    string url    = QT.Entities.Common.GetAbsoluteUrl(nodeLink.Attributes["href"].Value.ToString(), "http://vatgia.com");
                                    long IDUrlNew = Math.Abs(GABIZ.Base.Tools.getCRC64(url));
                                    if (!addedLink.ContainsKey(IDUrlNew))
                                    {
                                        if (Regex.IsMatch(url, regexExtract))
                                        {
                                            PushJobToQueue(new JobCrawler()
                                            {
                                                level     = jobData.level + 1,
                                                urlDetail = url
                                            });

                                            sqlSaveData.RunQuery("if not exists (select id from VatGiaClassification where id = @id) insert into VatGiaClassification (id, url) values (@id, @url)", CommandType.Text,
                                                                 new System.Data.SqlClient.SqlParameter[] {
                                                SqlDb.CreateParamteterSQL("@id", Math.Abs(GABIZ.Base.Tools.getCRC64(url)), SqlDbType.BigInt),
                                                SqlDb.CreateParamteterSQL("@url", url, SqlDbType.NVarChar)
                                            });

                                            addedLink.Add(IDUrlNew, "");
                                            addedToDb.Add(IDUrlNew);

                                            Console.WriteLine(url);
                                        }

                                        if (addedToDb.Count > 100)
                                        {
                                            foreach (var item in addedToDb)
                                            {
                                                this.sqlVisited.RunQuery("if not exists (select id from visitedlink where id = @id) insert into VisitedLink (id) values (@id)"
                                                                         , CommandType.Text
                                                                         , new System.Data.SqlClient.SqlParameter[] {
                                                    SqlDb.CreateParamteterSQL("@id", item, SqlDbType.BigInt)
                                                });
                                            }
                                            addedToDb.Clear();
                                        }
                                    }
                                }
                            }
                        }
                        return(true);
                    }
                    catch (Exception ex01)
                    {
                        log.Error("Exception:", ex01);
                        return(true);
                    }
                };
                worker.Start();
            });

            workerTask.Start();
            Console.ReadLine();
        }