Exemple #1
0
        public ActionResult Grabber(int?id, GrabberSettingsDTO grabber)
        {
            var _task = parserTaskManager.Get(id.Value);

            _task.GrabberSettings = grabber;

            parserTaskManager.Update(_task);
            deleteFilesManager.DeleteFiles();
            return(RedirectToAction("Index", "UniversalParser"));
        }
Exemple #2
0
        public ActionResult Grabber(int?id)
        {
            if (id == null)
            {
                return(HttpNotFound());
            }
            var    task    = new ParserTaskDTO();
            var    grabber = new GrabberSettingsDTO();
            var    urlList = new List <string>();
            string localPathToSite;
            int    index = 0;

            if (id != null)
            {
                task = parserTaskManager.Get(id.Value);

                if (task.GrabberSettings != null)
                {
                    grabber = task.GrabberSettings;
                }
                else
                {
                    grabber.Id            = id.Value;
                    task.Category         = categoryManager.Get(task.CategoryId);
                    grabber.PropertyItems = Mapper.Map <List <GrabberPropertyItemDTO> >(task.Category.PropertiesList);
                }
                if (task.IteratorSettings != null)
                {
                    urlList = urlManager.GetAllUrls(task.IteratorSettings);
                }
            }
            var arrayOfLinks = urlList.ToArray();
            var jsonArray    = urlList.ToArray();

            for (index = 0; index < 2; index++)
            {
                if (!String.IsNullOrWhiteSpace(arrayOfLinks[index]))
                {
                    Guid result = downloadManager.DownloadFromPath(arrayOfLinks[index]);
                    localPathToSite     = "/WebSites/" + result + ".html";
                    arrayOfLinks[index] = localPathToSite;
                }
            }
            grabber.urlJsonData     = JsonConvert.SerializeObject(jsonArray);
            Session["Length"]       = arrayOfLinks.Length;
            TempData["CurrentPage"] = arrayOfLinks[0];
            TempData["NextPage"]    = arrayOfLinks[1];
            TempData["AllSrc"]      = arrayOfLinks;
            return(View(grabber));
        }
Exemple #3
0
        /// <summary>
        /// Parses input url by configuration from parser task
        /// </summary>
        /// <param name="parsertaskid">id of parser task</param>
        /// <param name="url">url to parse</param>
        /// <returns>New parsed GoodDTO</returns>
        public GoodDTO ExecuteTask(int parsertaskid, string url)
        {
            //clearing previous logs
            //if (!isStarted)
            //    taskinfoManager.DeleteByStatus(ExecuteStatus.Executing);
            //else
            //    isStarted = true;
            //downloading page source using tor+phantomjs
            ParserTaskDTO parsertask = parsermanager.Get(parsertaskid);
            HtmlDocument  doc        = null;

            //adding to local log storage
            ExecutingInfoDTO taskinfo = new ExecutingInfoDTO()
            {
                GoodUrl      = url,
                Status       = ExecuteStatus.Executing,
                Date         = DateTime.Now,
                ParserTaskId = parsertaskid
            };

            taskinfo.Id = taskinfoManager.Insert(taskinfo);

            //getting page source due to method
            string pageSource = "";

            try
            {
                SiteDownloader sw = new SiteDownloader();

                switch (parsertask.IteratorSettings.DownloadMethod)
                {
                case DownloadMethod.Direct:
                    pageSource = sw.GetPageSouceDirectly(url);
                    break;

                case DownloadMethod.Tor:
                    pageSource = sw.GetPageSouce(url);
                    break;

                default:
                    break;
                }

                //pageSource = htmlValidator.CheckHtml(pageSource);

                doc = new HtmlDocument();
                doc.LoadHtml(pageSource);
            }
            catch (Exception ex)
            {
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't download url"
                };
                taskinfoManager.Insert(errorinfo);
                taskinfoManager.Delete(taskinfo);
                return(null);
            }


            //gets configuration from parsertask id

            GrabberSettingsDTO grabbersettings = parsertask.GrabberSettings;

            GoodDTO resultGood = new GoodDTO();

            resultGood.WebShop_Id  = parsertask.WebShopId;
            resultGood.Category_Id = parsertask.CategoryId;
            ///////////////////////////////////Parcing name by list of xpathes
            var xpathbuffer = "";

            try
            {
                var name = "";
                foreach (var nameprop in grabbersettings.Name)
                {
                    xpathbuffer = nameprop;
                    HtmlNode value = doc.DocumentNode.SelectSingleNode(nameprop);
                    if (value != null)
                    {
                        name = value.InnerHtml;
                        break;
                    }
                }
                name            = name.Trim();
                resultGood.Name = StripHTML(name);
            }
            catch (Exception ex)
            {
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't parse name,-xpath: " + xpathbuffer
                };
                taskinfoManager.Insert(errorinfo);
            }
            /////////////////////////////////////Parcing price by list of xpathes
            try
            {
                var price = "";
                foreach (var priceprop in grabbersettings.Price)
                {
                    xpathbuffer = priceprop;
                    HtmlNode value = doc.DocumentNode.SelectSingleNode(priceprop);
                    if (value != null)
                    {
                        price = value.InnerHtml;
                        break;
                    }
                }
                if (price != "")
                {
                    resultGood.Price = Convert.ToDecimal(this.RemoveAllLetters(price));
                }
            }
            catch (Exception ex)
            {
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't parse main price,-xpath: " + xpathbuffer
                };
                taskinfoManager.Insert(errorinfo);
            }
            //////////////////////////////////////Parcing old price by list of xpathes
            try
            {
                var oldPrice = "";
                foreach (var price in grabbersettings.OldPrice)
                {
                    xpathbuffer = price;
                    HtmlNode value = doc.DocumentNode.SelectNodes(price).FirstOrDefault();
                    if (value != null)
                    {
                        oldPrice = value.InnerHtml;
                        break;
                    }
                }
                if (oldPrice != "")
                {
                    resultGood.OldPrice = Convert.ToDecimal(this.RemoveAllLetters(oldPrice));
                }
            }
            catch (Exception ex)
            {
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't parse old price,-xpath: " + xpathbuffer
                };
                taskinfoManager.Insert(errorinfo);
            }
            //////////////////////////////Parcing image link by list of xpathes
            try
            {
                var imagelink = "";
                foreach (var imglink in grabbersettings.ImgLink)
                {
                    xpathbuffer = imglink;
                    HtmlNode value = doc.DocumentNode.SelectNodes(imglink + "/@src").FirstOrDefault();
                    if (value != null)
                    {
                        imagelink          = value.Attributes["src"].Value;
                        resultGood.ImgLink = imagelink;
                        break;
                    }
                    if (imagelink == "" || imagelink == null)
                    {
                        resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png";
                    }
                    else
                    {
                        resultGood.ImgLink = imagelink;
                    }
                    if (resultGood.ImgLink == null)
                    {
                        resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png";
                    }
                }
            }
            catch (Exception ex)
            {
                resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png";
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't parse image link,-xpath: " + xpathbuffer
                };
                taskinfoManager.Insert(errorinfo);
            }

            resultGood.UrlLink = url;
            PropertyValuesDTO propertyValues = new PropertyValuesDTO();

            propertyValues.DictDoubleProperties = new Dictionary <int, double>();
            propertyValues.DictIntProperties    = new Dictionary <int, int>();
            propertyValues.DictStringProperties = new Dictionary <int, string>();

            foreach (var propitem in grabbersettings.PropertyItems)
            {
                HtmlNode    value     = null;
                PropertyDTO property  = propmanager.Get(propitem.Id);
                var         htmlvalue = "";
                try
                {
                    foreach (var item in propitem.Value)
                    {
                        xpathbuffer = item;
                        value       = doc.DocumentNode.SelectNodes(item).FirstOrDefault();
                        if (value != null)
                        {
                            htmlvalue = value.InnerHtml;
                            break;
                        }
                    }
                }
                catch (Exception ex)
                {
                    ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                    {
                        GoodUrl      = url,
                        Status       = ExecuteStatus.ErrorInsert,
                        Date         = DateTime.Now,
                        ParserTaskId = parsertaskid,
                        ErrorMessage = "Can't parse property" + property.Name + ",-xpath: " + xpathbuffer
                    };
                    taskinfoManager.Insert(errorinfo);
                }

                try
                {
                    switch (property.Type)
                    {
                    case PropertyType.Integer:
                        propertyValues.DictIntProperties.Add(propitem.Id, Convert.ToInt32(htmlvalue));
                        break;

                    case PropertyType.Double:
                        propertyValues.DictDoubleProperties.Add(propitem.Id, Convert.ToDouble(htmlvalue));
                        break;

                    case PropertyType.String:
                        propertyValues.DictStringProperties.Add(propitem.Id, StripHTML(htmlvalue));
                        break;

                    default:
                        break;
                    }
                }
                catch (Exception ex)
                {
                    logger.Error(ex);
                    ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                    {
                        GoodUrl      = url,
                        Status       = ExecuteStatus.ErrorInsert,
                        Date         = DateTime.Now,
                        ParserTaskId = parsertaskid,
                        ErrorMessage = "Can't convert value " + htmlvalue + " of " + property.Name + ",-xpath: " + xpathbuffer
                    };
                    taskinfoManager.Insert(errorinfo);
                }
            }
            resultGood.Status         = true;
            resultGood.PropertyValues = propertyValues;
            goodwizardManager.InsertOrUpdate(resultGood);
            //goodManager.Insert(resultGood);
            var newPrice = new PriceHistoryDTO();

            newPrice.Url   = resultGood.UrlLink;
            newPrice.Price = resultGood.Price;
            newPrice.Date  = DateTime.Now;
            newPrice.Name  = resultGood.Name;
            priceManager.Insert(newPrice);

            //deleting from local log storage
            taskinfoManager.Delete(taskinfo);
            return(resultGood);
        }