Ejemplo n.º 1
0
        /// <summary>
        /// Publishing messeges to rabbitmq queue
        /// </summary>
        /// <param name="context"></param>
        public void Execute(IJobExecutionContext context)
        {
            var taskList   = new List <TaskExecuterModel>();
            var tasklistdb = parserManager.GetAll().Where(i => i.Status == (Common.Enum.Status.Coming) || (i.Status == Common.Enum.Status.Infinite)).ToList();

            foreach (var task in tasklistdb)
            {
                var urlList = urlManager.GetAllUrls(task.IteratorSettings);
                foreach (var url in urlList)
                {
                    var taskExecute = new TaskExecuterModel();
                    taskExecute.TaskId  = task.Id;
                    taskExecute.GoodUrl = url;
                    taskList.Add(taskExecute);
                }

                break;
            }

            foreach (var mess in taskList)
            {
                ConnectionFactory connFactory = new ConnectionFactory();
                connFactory.uri = new Uri(System.Configuration.ConfigurationManager.AppSettings["RabbitMqConnection"]);
                // create a connection and open a channel, dispose them when done
                using (var conn = connFactory.CreateConnection())
                    using (var channel = conn.CreateModel())
                    {
                        channel.QueueDeclare(queue: "Queue-" + Environment.MachineName,
                                             durable: true,
                                             exclusive: false,
                                             autoDelete: false,
                                             arguments: null);
                        var serializer = new JavaScriptSerializer();
                        var output     = serializer.Serialize(mess).ToCharArray();
                        // the data put on the queue must be a byte array
                        var data       = Encoding.UTF8.GetBytes(output);
                        var properties = channel.CreateBasicProperties();
                        properties.Persistent = true;
                        // ensure that the queue exists before we publish to it
                        // publish to the "default exchange", with the queue name as the routing key
                        channel.BasicPublish(exchange: "",
                                             routingKey: "Queue-" + Environment.MachineName,
                                             basicProperties: properties,
                                             body: data);
                    }
            }
            var ids_update = taskList.Select(i => i.TaskId).Distinct();

            foreach (var id in ids_update)
            {
                var obj = parserManager.Get(id);
                obj.Status = Common.Enum.Status.InQuery;
                parserManager.Update(obj);
            }
        }
Ejemplo n.º 2
0
        public void Execute(IJobExecutionContext context)
        {
            TaskExecuter te = new TaskExecuter();
            TaskGetter   tg = new TaskGetter();

            var obj = tg.GetTask();

            uOw.UpdateContext();

            if (obj == null)
            {
                return;
            }

            var task1   = parsermanager.Get(obj.TaskId);
            var endTime = parsermanager.Get(obj.TaskId).EndDate;

            if (endTime == null)
            {
                te.ExecuteTask(obj.TaskId, obj.GoodUrl);
                var task_s = parsermanager.Get(obj.TaskId);
                task_s.Status = (Common.Enum.Status.Infinite);
                parsermanager.Update(task_s);
            }
            else if (endTime != null && DateTime.Now <= endTime)
            {
                te.ExecuteTask(obj.TaskId, obj.GoodUrl);
                var task_s = parsermanager.Get(obj.TaskId);
                task_s.Status = (Common.Enum.Status.Coming);
                parsermanager.Update(task_s);
            }
            else
            {
                var task_s = parsermanager.Get(obj.TaskId);
                task_s.Status = (Common.Enum.Status.Finished);
                parsermanager.Update(task_s);
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Parses input url by configuration from parser task
        /// </summary>
        /// <param name="parsertaskid">id of parser task</param>
        /// <param name="url">url to parse</param>
        /// <returns>New parsed GoodDTO</returns>
        public GoodDTO ExecuteTask(int parsertaskid, string url)
        {
            //clearing previous logs
            //if (!isStarted)
            //    taskinfoManager.DeleteByStatus(ExecuteStatus.Executing);
            //else
            //    isStarted = true;
            //downloading page source using tor+phantomjs
            ParserTaskDTO parsertask = parsermanager.Get(parsertaskid);
            HtmlDocument  doc        = null;

            //adding to local log storage
            ExecutingInfoDTO taskinfo = new ExecutingInfoDTO()
            {
                GoodUrl      = url,
                Status       = ExecuteStatus.Executing,
                Date         = DateTime.Now,
                ParserTaskId = parsertaskid
            };

            taskinfo.Id = taskinfoManager.Insert(taskinfo);

            //getting page source due to method
            string pageSource = "";

            try
            {
                SiteDownloader sw = new SiteDownloader();

                switch (parsertask.IteratorSettings.DownloadMethod)
                {
                case DownloadMethod.Direct:
                    pageSource = sw.GetPageSouceDirectly(url);
                    break;

                case DownloadMethod.Tor:
                    pageSource = sw.GetPageSouce(url);
                    break;

                default:
                    break;
                }

                //pageSource = htmlValidator.CheckHtml(pageSource);

                doc = new HtmlDocument();
                doc.LoadHtml(pageSource);
            }
            catch (Exception ex)
            {
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't download url"
                };
                taskinfoManager.Insert(errorinfo);
                taskinfoManager.Delete(taskinfo);
                return(null);
            }


            //gets configuration from parsertask id

            GrabberSettingsDTO grabbersettings = parsertask.GrabberSettings;

            GoodDTO resultGood = new GoodDTO();

            resultGood.WebShop_Id  = parsertask.WebShopId;
            resultGood.Category_Id = parsertask.CategoryId;
            ///////////////////////////////////Parcing name by list of xpathes
            var xpathbuffer = "";

            try
            {
                var name = "";
                foreach (var nameprop in grabbersettings.Name)
                {
                    xpathbuffer = nameprop;
                    HtmlNode value = doc.DocumentNode.SelectSingleNode(nameprop);
                    if (value != null)
                    {
                        name = value.InnerHtml;
                        break;
                    }
                }
                name            = name.Trim();
                resultGood.Name = StripHTML(name);
            }
            catch (Exception ex)
            {
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't parse name,-xpath: " + xpathbuffer
                };
                taskinfoManager.Insert(errorinfo);
            }
            /////////////////////////////////////Parcing price by list of xpathes
            try
            {
                var price = "";
                foreach (var priceprop in grabbersettings.Price)
                {
                    xpathbuffer = priceprop;
                    HtmlNode value = doc.DocumentNode.SelectSingleNode(priceprop);
                    if (value != null)
                    {
                        price = value.InnerHtml;
                        break;
                    }
                }
                if (price != "")
                {
                    resultGood.Price = Convert.ToDecimal(this.RemoveAllLetters(price));
                }
            }
            catch (Exception ex)
            {
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't parse main price,-xpath: " + xpathbuffer
                };
                taskinfoManager.Insert(errorinfo);
            }
            //////////////////////////////////////Parcing old price by list of xpathes
            try
            {
                var oldPrice = "";
                foreach (var price in grabbersettings.OldPrice)
                {
                    xpathbuffer = price;
                    HtmlNode value = doc.DocumentNode.SelectNodes(price).FirstOrDefault();
                    if (value != null)
                    {
                        oldPrice = value.InnerHtml;
                        break;
                    }
                }
                if (oldPrice != "")
                {
                    resultGood.OldPrice = Convert.ToDecimal(this.RemoveAllLetters(oldPrice));
                }
            }
            catch (Exception ex)
            {
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't parse old price,-xpath: " + xpathbuffer
                };
                taskinfoManager.Insert(errorinfo);
            }
            //////////////////////////////Parcing image link by list of xpathes
            try
            {
                var imagelink = "";
                foreach (var imglink in grabbersettings.ImgLink)
                {
                    xpathbuffer = imglink;
                    HtmlNode value = doc.DocumentNode.SelectNodes(imglink + "/@src").FirstOrDefault();
                    if (value != null)
                    {
                        imagelink          = value.Attributes["src"].Value;
                        resultGood.ImgLink = imagelink;
                        break;
                    }
                    if (imagelink == "" || imagelink == null)
                    {
                        resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png";
                    }
                    else
                    {
                        resultGood.ImgLink = imagelink;
                    }
                    if (resultGood.ImgLink == null)
                    {
                        resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png";
                    }
                }
            }
            catch (Exception ex)
            {
                resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png";
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't parse image link,-xpath: " + xpathbuffer
                };
                taskinfoManager.Insert(errorinfo);
            }

            resultGood.UrlLink = url;
            PropertyValuesDTO propertyValues = new PropertyValuesDTO();

            propertyValues.DictDoubleProperties = new Dictionary <int, double>();
            propertyValues.DictIntProperties    = new Dictionary <int, int>();
            propertyValues.DictStringProperties = new Dictionary <int, string>();

            foreach (var propitem in grabbersettings.PropertyItems)
            {
                HtmlNode    value     = null;
                PropertyDTO property  = propmanager.Get(propitem.Id);
                var         htmlvalue = "";
                try
                {
                    foreach (var item in propitem.Value)
                    {
                        xpathbuffer = item;
                        value       = doc.DocumentNode.SelectNodes(item).FirstOrDefault();
                        if (value != null)
                        {
                            htmlvalue = value.InnerHtml;
                            break;
                        }
                    }
                }
                catch (Exception ex)
                {
                    ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                    {
                        GoodUrl      = url,
                        Status       = ExecuteStatus.ErrorInsert,
                        Date         = DateTime.Now,
                        ParserTaskId = parsertaskid,
                        ErrorMessage = "Can't parse property" + property.Name + ",-xpath: " + xpathbuffer
                    };
                    taskinfoManager.Insert(errorinfo);
                }

                try
                {
                    switch (property.Type)
                    {
                    case PropertyType.Integer:
                        propertyValues.DictIntProperties.Add(propitem.Id, Convert.ToInt32(htmlvalue));
                        break;

                    case PropertyType.Double:
                        propertyValues.DictDoubleProperties.Add(propitem.Id, Convert.ToDouble(htmlvalue));
                        break;

                    case PropertyType.String:
                        propertyValues.DictStringProperties.Add(propitem.Id, StripHTML(htmlvalue));
                        break;

                    default:
                        break;
                    }
                }
                catch (Exception ex)
                {
                    logger.Error(ex);
                    ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                    {
                        GoodUrl      = url,
                        Status       = ExecuteStatus.ErrorInsert,
                        Date         = DateTime.Now,
                        ParserTaskId = parsertaskid,
                        ErrorMessage = "Can't convert value " + htmlvalue + " of " + property.Name + ",-xpath: " + xpathbuffer
                    };
                    taskinfoManager.Insert(errorinfo);
                }
            }
            resultGood.Status         = true;
            resultGood.PropertyValues = propertyValues;
            goodwizardManager.InsertOrUpdate(resultGood);
            //goodManager.Insert(resultGood);
            var newPrice = new PriceHistoryDTO();

            newPrice.Url   = resultGood.UrlLink;
            newPrice.Price = resultGood.Price;
            newPrice.Date  = DateTime.Now;
            newPrice.Name  = resultGood.Name;
            priceManager.Insert(newPrice);

            //deleting from local log storage
            taskinfoManager.Delete(taskinfo);
            return(resultGood);
        }