/// <summary> /// Publishing messeges to rabbitmq queue /// </summary> /// <param name="context"></param> public void Execute(IJobExecutionContext context) { var taskList = new List <TaskExecuterModel>(); var tasklistdb = parserManager.GetAll().Where(i => i.Status == (Common.Enum.Status.Coming) || (i.Status == Common.Enum.Status.Infinite)).ToList(); foreach (var task in tasklistdb) { var urlList = urlManager.GetAllUrls(task.IteratorSettings); foreach (var url in urlList) { var taskExecute = new TaskExecuterModel(); taskExecute.TaskId = task.Id; taskExecute.GoodUrl = url; taskList.Add(taskExecute); } break; } foreach (var mess in taskList) { ConnectionFactory connFactory = new ConnectionFactory(); connFactory.uri = new Uri(System.Configuration.ConfigurationManager.AppSettings["RabbitMqConnection"]); // create a connection and open a channel, dispose them when done using (var conn = connFactory.CreateConnection()) using (var channel = conn.CreateModel()) { channel.QueueDeclare(queue: "Queue-" + Environment.MachineName, durable: true, exclusive: false, autoDelete: false, arguments: null); var serializer = new JavaScriptSerializer(); var output = serializer.Serialize(mess).ToCharArray(); // the data put on the queue must be a byte array var data = Encoding.UTF8.GetBytes(output); var properties = channel.CreateBasicProperties(); properties.Persistent = true; // ensure that the queue exists before we publish to it // publish to the "default exchange", with the queue name as the routing key channel.BasicPublish(exchange: "", routingKey: "Queue-" + Environment.MachineName, basicProperties: properties, body: data); } } var ids_update = taskList.Select(i => i.TaskId).Distinct(); foreach (var id in ids_update) { var obj = parserManager.Get(id); obj.Status = Common.Enum.Status.InQuery; parserManager.Update(obj); } }
public void Execute(IJobExecutionContext context) { TaskExecuter te = new TaskExecuter(); TaskGetter tg = new TaskGetter(); var obj = tg.GetTask(); uOw.UpdateContext(); if (obj == null) { return; } var task1 = parsermanager.Get(obj.TaskId); var endTime = parsermanager.Get(obj.TaskId).EndDate; if (endTime == null) { te.ExecuteTask(obj.TaskId, obj.GoodUrl); var task_s = parsermanager.Get(obj.TaskId); task_s.Status = (Common.Enum.Status.Infinite); parsermanager.Update(task_s); } else if (endTime != null && DateTime.Now <= endTime) { te.ExecuteTask(obj.TaskId, obj.GoodUrl); var task_s = parsermanager.Get(obj.TaskId); task_s.Status = (Common.Enum.Status.Coming); parsermanager.Update(task_s); } else { var task_s = parsermanager.Get(obj.TaskId); task_s.Status = (Common.Enum.Status.Finished); parsermanager.Update(task_s); } }
/// <summary> /// Parses input url by configuration from parser task /// </summary> /// <param name="parsertaskid">id of parser task</param> /// <param name="url">url to parse</param> /// <returns>New parsed GoodDTO</returns> public GoodDTO ExecuteTask(int parsertaskid, string url) { //clearing previous logs //if (!isStarted) // taskinfoManager.DeleteByStatus(ExecuteStatus.Executing); //else // isStarted = true; //downloading page source using tor+phantomjs ParserTaskDTO parsertask = parsermanager.Get(parsertaskid); HtmlDocument doc = null; //adding to local log storage ExecutingInfoDTO taskinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.Executing, Date = DateTime.Now, ParserTaskId = parsertaskid }; taskinfo.Id = taskinfoManager.Insert(taskinfo); //getting page source due to method string pageSource = ""; try { SiteDownloader sw = new SiteDownloader(); switch (parsertask.IteratorSettings.DownloadMethod) { case DownloadMethod.Direct: pageSource = sw.GetPageSouceDirectly(url); break; case DownloadMethod.Tor: pageSource = sw.GetPageSouce(url); break; default: break; } //pageSource = htmlValidator.CheckHtml(pageSource); doc = new HtmlDocument(); doc.LoadHtml(pageSource); } catch (Exception ex) { ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't download url" }; taskinfoManager.Insert(errorinfo); taskinfoManager.Delete(taskinfo); return(null); } //gets configuration from parsertask id GrabberSettingsDTO grabbersettings = parsertask.GrabberSettings; GoodDTO resultGood = new GoodDTO(); resultGood.WebShop_Id = parsertask.WebShopId; resultGood.Category_Id = parsertask.CategoryId; ///////////////////////////////////Parcing name by list of xpathes var xpathbuffer = ""; try { var name = ""; foreach (var nameprop in grabbersettings.Name) { xpathbuffer = nameprop; HtmlNode value = doc.DocumentNode.SelectSingleNode(nameprop); if (value != null) { name = value.InnerHtml; break; } } name = name.Trim(); resultGood.Name = StripHTML(name); } catch (Exception ex) { ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't parse name,-xpath: " + xpathbuffer }; taskinfoManager.Insert(errorinfo); } /////////////////////////////////////Parcing price by list of xpathes try { var price = ""; foreach (var priceprop in grabbersettings.Price) { xpathbuffer = priceprop; HtmlNode value = doc.DocumentNode.SelectSingleNode(priceprop); if (value != null) { price = value.InnerHtml; break; } } if (price != "") { resultGood.Price = Convert.ToDecimal(this.RemoveAllLetters(price)); } } catch (Exception ex) { ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't parse main price,-xpath: " + xpathbuffer }; taskinfoManager.Insert(errorinfo); } //////////////////////////////////////Parcing old price by list of xpathes try { var oldPrice = ""; foreach (var price in grabbersettings.OldPrice) { xpathbuffer = price; HtmlNode value = doc.DocumentNode.SelectNodes(price).FirstOrDefault(); if (value != null) { oldPrice = value.InnerHtml; break; } } if (oldPrice != "") { resultGood.OldPrice = Convert.ToDecimal(this.RemoveAllLetters(oldPrice)); } } catch (Exception ex) { ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't parse old price,-xpath: " + xpathbuffer }; taskinfoManager.Insert(errorinfo); } //////////////////////////////Parcing image link by list of xpathes try { var imagelink = ""; foreach (var imglink in grabbersettings.ImgLink) { xpathbuffer = imglink; HtmlNode value = doc.DocumentNode.SelectNodes(imglink + "/@src").FirstOrDefault(); if (value != null) { imagelink = value.Attributes["src"].Value; resultGood.ImgLink = imagelink; break; } if (imagelink == "" || imagelink == null) { resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png"; } else { resultGood.ImgLink = imagelink; } if (resultGood.ImgLink == null) { resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png"; } } } catch (Exception ex) { resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png"; ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't parse image link,-xpath: " + xpathbuffer }; taskinfoManager.Insert(errorinfo); } resultGood.UrlLink = url; PropertyValuesDTO propertyValues = new PropertyValuesDTO(); propertyValues.DictDoubleProperties = new Dictionary <int, double>(); propertyValues.DictIntProperties = new Dictionary <int, int>(); propertyValues.DictStringProperties = new Dictionary <int, string>(); foreach (var propitem in grabbersettings.PropertyItems) { HtmlNode value = null; PropertyDTO property = propmanager.Get(propitem.Id); var htmlvalue = ""; try { foreach (var item in propitem.Value) { xpathbuffer = item; value = doc.DocumentNode.SelectNodes(item).FirstOrDefault(); if (value != null) { htmlvalue = value.InnerHtml; break; } } } catch (Exception ex) { ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't parse property" + property.Name + ",-xpath: " + xpathbuffer }; taskinfoManager.Insert(errorinfo); } try { switch (property.Type) { case PropertyType.Integer: propertyValues.DictIntProperties.Add(propitem.Id, Convert.ToInt32(htmlvalue)); break; case PropertyType.Double: propertyValues.DictDoubleProperties.Add(propitem.Id, Convert.ToDouble(htmlvalue)); break; case PropertyType.String: propertyValues.DictStringProperties.Add(propitem.Id, StripHTML(htmlvalue)); break; default: break; } } catch (Exception ex) { logger.Error(ex); ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't convert value " + htmlvalue + " of " + property.Name + ",-xpath: " + xpathbuffer }; taskinfoManager.Insert(errorinfo); } } resultGood.Status = true; resultGood.PropertyValues = propertyValues; goodwizardManager.InsertOrUpdate(resultGood); //goodManager.Insert(resultGood); var newPrice = new PriceHistoryDTO(); newPrice.Url = resultGood.UrlLink; newPrice.Price = resultGood.Price; newPrice.Date = DateTime.Now; newPrice.Name = resultGood.Name; priceManager.Insert(newPrice); //deleting from local log storage taskinfoManager.Delete(taskinfo); return(resultGood); }