private void FindProduct() { var rabbitMQServer = RabbitMQManager.GetRabbitMQServer("rabbitMQ177"); var worker = new Worker("VatGia_Queue", false, rabbitMQServer); Task workerTask = new Task(() => { log.Info("Start consumer!"); worker.JobHandler = (downloadImageJob) => { log.Info("Get job from MQ"); try { JobCrawler jobData = JobCrawler.Deserialize(downloadImageJob.Data); if (jobData == null) { return(true); } return(true); } catch (Exception ex01) { log.Error("Exception:", ex01); return(true); } }; worker.Start(); }); workerTask.Start(); }
public static JobCrawler Deserialize(byte[] JobCrawler) { try { Encoding enc = new UTF8Encoding(true, true); string strData = enc.GetString(JobCrawler); JobCrawler job = JsonConvert.DeserializeObject <JobCrawler>(strData); return(job); } catch (Exception ex01) { return(null); } }
public void FindClassification() { List <long> addedToDb = new List <long>(); Dictionary <long, string> addedLink = this.LoadVisitedLink(); ILog log = log4net.LogManager.GetLogger(typeof(Program)); var rabbitMQServer = RabbitMQManager.GetRabbitMQServer("rabbitMQ177"); var worker = new Worker("VatGia_Queue", false, rabbitMQServer); Task workerTask = new Task(() => { log.Info("Start consumer!"); worker.JobHandler = (downloadImageJob) => { log.Info("Get job from MQ"); try { JobCrawler jobData = JobCrawler.Deserialize(downloadImageJob.Data); if (jobData == null) { return(true); } Console.WriteLine("Get Job:" + jobData.urlDetail); long IDUrl = Math.Abs(GABIZ.Base.Tools.getCRC64(jobData.urlDetail)); if (!addedLink.ContainsKey(IDUrl)) { addedLink.Add(IDUrl, ""); string regexExtract = @"^.*vatgia.com\/\d+\/[^\/]*html$"; string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(jobData.urlDetail, 120, 2); GABIZ.Base.HtmlAgilityPack.HtmlDocument htmlDocument = new GABIZ.Base.HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(html); var nodesLink = htmlDocument.DocumentNode.SelectNodes("//a"); if (nodesLink != null) { foreach (var nodeLink in nodesLink) { string url = QT.Entities.Common.GetAbsoluteUrl(nodeLink.Attributes["href"].Value.ToString(), "http://vatgia.com"); long IDUrlNew = Math.Abs(GABIZ.Base.Tools.getCRC64(url)); if (!addedLink.ContainsKey(IDUrlNew)) { if (Regex.IsMatch(url, regexExtract)) { PushJobToQueue(new JobCrawler() { level = jobData.level + 1, urlDetail = url }); sqlSaveData.RunQuery("if not exists (select id from VatGiaClassification where id = @id) insert into VatGiaClassification (id, url) values (@id, @url)", CommandType.Text, new System.Data.SqlClient.SqlParameter[] { SqlDb.CreateParamteterSQL("@id", Math.Abs(GABIZ.Base.Tools.getCRC64(url)), SqlDbType.BigInt), SqlDb.CreateParamteterSQL("@url", url, SqlDbType.NVarChar) }); addedLink.Add(IDUrlNew, ""); addedToDb.Add(IDUrlNew); Console.WriteLine(url); } if (addedToDb.Count > 100) { foreach (var item in addedToDb) { this.sqlVisited.RunQuery("if not exists (select id from visitedlink where id = @id) insert into VisitedLink (id) values (@id)" , CommandType.Text , new System.Data.SqlClient.SqlParameter[] { SqlDb.CreateParamteterSQL("@id", item, SqlDbType.BigInt) }); } addedToDb.Clear(); } } } } } return(true); } catch (Exception ex01) { log.Error("Exception:", ex01); return(true); } }; worker.Start(); }); workerTask.Start(); Console.ReadLine(); }