Beispiel #1
0
        /// <summary>
        /// Add new parsertask
        /// </summary>
        /// <param name="parsertask"></param>
        /// <returns>id of new parser</returns>
        public int Add(ParserTaskDTO parsertask)
        {
            if (parsertask == null)
            {
                return(-1);
            }

            ParserTask parsertaskDb = Mapper.Map <ParserTask>(parsertask);

            try
            {
                parsertaskDb.Category   = uOW.CategoryRepo.GetByID(parsertaskDb.CategoryId);
                parsertaskDb.WebShop    = uOW.WebShopRepo.GetByID(parsertaskDb.WebShopId);
                parsertaskDb.Status     = Common.Enum.Status.NotFinished;
                parsertaskDb.LastChange = DateTime.Now;
                uOW.ParserRepo.Insert(parsertaskDb);
                uOW.Save();
                return(parsertaskDb.Id);
            }
            catch (Exception ex)
            {
                logger.Error(ex.Message);
                return(-1);
            }
        }
Beispiel #2
0
        public ActionResult Grabber(int?id)
        {
            if (id == null)
            {
                return(HttpNotFound());
            }
            var    task    = new ParserTaskDTO();
            var    grabber = new GrabberSettingsDTO();
            var    urlList = new List <string>();
            string localPathToSite;
            int    index = 0;

            if (id != null)
            {
                task = parserTaskManager.Get(id.Value);

                if (task.GrabberSettings != null)
                {
                    grabber = task.GrabberSettings;
                }
                else
                {
                    grabber.Id            = id.Value;
                    task.Category         = categoryManager.Get(task.CategoryId);
                    grabber.PropertyItems = Mapper.Map <List <GrabberPropertyItemDTO> >(task.Category.PropertiesList);
                }
                if (task.IteratorSettings != null)
                {
                    urlList = urlManager.GetAllUrls(task.IteratorSettings);
                }
            }
            var arrayOfLinks = urlList.ToArray();
            var jsonArray    = urlList.ToArray();

            for (index = 0; index < 2; index++)
            {
                if (!String.IsNullOrWhiteSpace(arrayOfLinks[index]))
                {
                    Guid result = downloadManager.DownloadFromPath(arrayOfLinks[index]);
                    localPathToSite     = "/WebSites/" + result + ".html";
                    arrayOfLinks[index] = localPathToSite;
                }
            }
            grabber.urlJsonData     = JsonConvert.SerializeObject(jsonArray);
            Session["Length"]       = arrayOfLinks.Length;
            TempData["CurrentPage"] = arrayOfLinks[0];
            TempData["NextPage"]    = arrayOfLinks[1];
            TempData["AllSrc"]      = arrayOfLinks;
            return(View(grabber));
        }
Beispiel #3
0
        public ActionResult Settings(ParserTaskDTO parsertask, int?parsertaskid)
        {
            int newid = -1;

            if (parsertaskid != null)
            {
                parsertask.Id = parsertaskid ?? -1;
                parserTaskManager.Update(parsertask);
            }
            else
            {
                parsertask.Status = Common.Enum.Status.NotFinished;
                newid             = parserTaskManager.Add(parsertask);
            }
            return(RedirectToAction("Iterator", new { id = parsertaskid ?? newid }));
        }
Beispiel #4
0
        public ActionResult Settings(int?id)
        {
            ParserTaskDTO   parsertask   = null;
            SettingsViewDTO settingsView = new SettingsViewDTO()
            {
                Categories = categoryManager.GetAll().Where(c => c.HasChildrenCategories == false).Select(c => c).ToList(),
                Shops      = shopManager.GetAll().ToList()
            };

            if (id != null)
            {
                parsertask = parserTaskManager.Get(id.GetValueOrDefault());
            }

            if (parsertask != null)
            {
                settingsView.ParserTask = parsertask;
            }
            return(View(settingsView));
        }
Beispiel #5
0
        /// <summary>
        /// Update parser task
        /// </summary>
        /// <param name="parsertask"></param>
        /// <returns>instance of new parser dto</returns>
        public ParserTaskDTO Update(ParserTaskDTO parsertask)
        {
            var serializer = new ExtendedXmlSerializer();

            var temp = uOW.ParserRepo.Get(p => p.Id == parsertask.Id).FirstOrDefault();

            if (temp == null)
            {
                return(null);
            }
            //temp vars for checking fillness additional settings
            bool IsIteratorSettingsAreFilled = false;
            bool IsGrabebrSettingsAreFilled  = false;


            //Filling and checking additional settings
            if (parsertask.GrabberSettings != null)
            {
                temp.GrabberSettings        = serializer.Serialize(parsertask.GrabberSettings);
                IsIteratorSettingsAreFilled = true;
            }
            if (parsertask.IteratorSettings != null)
            {
                temp.IteratorSettings      = serializer.Serialize(parsertask.IteratorSettings);
                IsGrabebrSettingsAreFilled = true;
            }

            if (IsGrabebrSettingsAreFilled && IsIteratorSettingsAreFilled)
            {
                if (parsertask.EndDate == null)
                {
                    temp.Status = Common.Enum.Status.Infinite;
                }
                else
                {
                    temp.Status = Common.Enum.Status.Coming;
                }
            }
            else
            {
                temp.Status = Common.Enum.Status.NotFinished;
            }

            if (parsertask.Status == Common.Enum.Status.InQuery)
            {
                temp.Status = Common.Enum.Status.InQuery;
            }

            temp.Priority = parsertask.Priority;

            temp.Description = parsertask.Description;
            temp.EndDate     = parsertask.EndDate;

            temp.CategoryId = parsertask.CategoryId;
            temp.WebShopId  = parsertask.WebShopId;

            temp.Category = uOW.CategoryRepo.GetByID(temp.CategoryId);
            temp.WebShop  = uOW.WebShopRepo.GetByID(temp.WebShopId);

            uOW.ParserRepo.SetStateModified(temp);
            uOW.Save();
            return(Mapper.Map <ParserTaskDTO>(temp));
        }
Beispiel #6
0
        /// <summary>
        /// Get all names of goods from parsertask
        /// </summary>
        /// <param name="model"></param>
        /// <returns></returns>
        private List <string> GetAllNamesOfGoods(ParserTaskDTO model)
        {
            var links = urlManager.GetAllUrls(model.IteratorSettings);

            return(links);
        }
Beispiel #7
0
        /// <summary>
        /// Parses input url by configuration from parser task
        /// </summary>
        /// <param name="parsertaskid">id of parser task</param>
        /// <param name="url">url to parse</param>
        /// <returns>New parsed GoodDTO</returns>
        public GoodDTO ExecuteTask(int parsertaskid, string url)
        {
            //clearing previous logs
            //if (!isStarted)
            //    taskinfoManager.DeleteByStatus(ExecuteStatus.Executing);
            //else
            //    isStarted = true;
            //downloading page source using tor+phantomjs
            ParserTaskDTO parsertask = parsermanager.Get(parsertaskid);
            HtmlDocument  doc        = null;

            //adding to local log storage
            ExecutingInfoDTO taskinfo = new ExecutingInfoDTO()
            {
                GoodUrl      = url,
                Status       = ExecuteStatus.Executing,
                Date         = DateTime.Now,
                ParserTaskId = parsertaskid
            };

            taskinfo.Id = taskinfoManager.Insert(taskinfo);

            //getting page source due to method
            string pageSource = "";

            try
            {
                SiteDownloader sw = new SiteDownloader();

                switch (parsertask.IteratorSettings.DownloadMethod)
                {
                case DownloadMethod.Direct:
                    pageSource = sw.GetPageSouceDirectly(url);
                    break;

                case DownloadMethod.Tor:
                    pageSource = sw.GetPageSouce(url);
                    break;

                default:
                    break;
                }

                //pageSource = htmlValidator.CheckHtml(pageSource);

                doc = new HtmlDocument();
                doc.LoadHtml(pageSource);
            }
            catch (Exception ex)
            {
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't download url"
                };
                taskinfoManager.Insert(errorinfo);
                taskinfoManager.Delete(taskinfo);
                return(null);
            }


            //gets configuration from parsertask id

            GrabberSettingsDTO grabbersettings = parsertask.GrabberSettings;

            GoodDTO resultGood = new GoodDTO();

            resultGood.WebShop_Id  = parsertask.WebShopId;
            resultGood.Category_Id = parsertask.CategoryId;
            ///////////////////////////////////Parcing name by list of xpathes
            var xpathbuffer = "";

            try
            {
                var name = "";
                foreach (var nameprop in grabbersettings.Name)
                {
                    xpathbuffer = nameprop;
                    HtmlNode value = doc.DocumentNode.SelectSingleNode(nameprop);
                    if (value != null)
                    {
                        name = value.InnerHtml;
                        break;
                    }
                }
                name            = name.Trim();
                resultGood.Name = StripHTML(name);
            }
            catch (Exception ex)
            {
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't parse name,-xpath: " + xpathbuffer
                };
                taskinfoManager.Insert(errorinfo);
            }
            /////////////////////////////////////Parcing price by list of xpathes
            try
            {
                var price = "";
                foreach (var priceprop in grabbersettings.Price)
                {
                    xpathbuffer = priceprop;
                    HtmlNode value = doc.DocumentNode.SelectSingleNode(priceprop);
                    if (value != null)
                    {
                        price = value.InnerHtml;
                        break;
                    }
                }
                if (price != "")
                {
                    resultGood.Price = Convert.ToDecimal(this.RemoveAllLetters(price));
                }
            }
            catch (Exception ex)
            {
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't parse main price,-xpath: " + xpathbuffer
                };
                taskinfoManager.Insert(errorinfo);
            }
            //////////////////////////////////////Parcing old price by list of xpathes
            try
            {
                var oldPrice = "";
                foreach (var price in grabbersettings.OldPrice)
                {
                    xpathbuffer = price;
                    HtmlNode value = doc.DocumentNode.SelectNodes(price).FirstOrDefault();
                    if (value != null)
                    {
                        oldPrice = value.InnerHtml;
                        break;
                    }
                }
                if (oldPrice != "")
                {
                    resultGood.OldPrice = Convert.ToDecimal(this.RemoveAllLetters(oldPrice));
                }
            }
            catch (Exception ex)
            {
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't parse old price,-xpath: " + xpathbuffer
                };
                taskinfoManager.Insert(errorinfo);
            }
            //////////////////////////////Parcing image link by list of xpathes
            try
            {
                var imagelink = "";
                foreach (var imglink in grabbersettings.ImgLink)
                {
                    xpathbuffer = imglink;
                    HtmlNode value = doc.DocumentNode.SelectNodes(imglink + "/@src").FirstOrDefault();
                    if (value != null)
                    {
                        imagelink          = value.Attributes["src"].Value;
                        resultGood.ImgLink = imagelink;
                        break;
                    }
                    if (imagelink == "" || imagelink == null)
                    {
                        resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png";
                    }
                    else
                    {
                        resultGood.ImgLink = imagelink;
                    }
                    if (resultGood.ImgLink == null)
                    {
                        resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png";
                    }
                }
            }
            catch (Exception ex)
            {
                resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png";
                ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                {
                    GoodUrl      = url,
                    Status       = ExecuteStatus.ErrorInsert,
                    Date         = DateTime.Now,
                    ParserTaskId = parsertaskid,
                    ErrorMessage = "Can't parse image link,-xpath: " + xpathbuffer
                };
                taskinfoManager.Insert(errorinfo);
            }

            resultGood.UrlLink = url;
            PropertyValuesDTO propertyValues = new PropertyValuesDTO();

            propertyValues.DictDoubleProperties = new Dictionary <int, double>();
            propertyValues.DictIntProperties    = new Dictionary <int, int>();
            propertyValues.DictStringProperties = new Dictionary <int, string>();

            foreach (var propitem in grabbersettings.PropertyItems)
            {
                HtmlNode    value     = null;
                PropertyDTO property  = propmanager.Get(propitem.Id);
                var         htmlvalue = "";
                try
                {
                    foreach (var item in propitem.Value)
                    {
                        xpathbuffer = item;
                        value       = doc.DocumentNode.SelectNodes(item).FirstOrDefault();
                        if (value != null)
                        {
                            htmlvalue = value.InnerHtml;
                            break;
                        }
                    }
                }
                catch (Exception ex)
                {
                    ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                    {
                        GoodUrl      = url,
                        Status       = ExecuteStatus.ErrorInsert,
                        Date         = DateTime.Now,
                        ParserTaskId = parsertaskid,
                        ErrorMessage = "Can't parse property" + property.Name + ",-xpath: " + xpathbuffer
                    };
                    taskinfoManager.Insert(errorinfo);
                }

                try
                {
                    switch (property.Type)
                    {
                    case PropertyType.Integer:
                        propertyValues.DictIntProperties.Add(propitem.Id, Convert.ToInt32(htmlvalue));
                        break;

                    case PropertyType.Double:
                        propertyValues.DictDoubleProperties.Add(propitem.Id, Convert.ToDouble(htmlvalue));
                        break;

                    case PropertyType.String:
                        propertyValues.DictStringProperties.Add(propitem.Id, StripHTML(htmlvalue));
                        break;

                    default:
                        break;
                    }
                }
                catch (Exception ex)
                {
                    logger.Error(ex);
                    ExecutingInfoDTO errorinfo = new ExecutingInfoDTO()
                    {
                        GoodUrl      = url,
                        Status       = ExecuteStatus.ErrorInsert,
                        Date         = DateTime.Now,
                        ParserTaskId = parsertaskid,
                        ErrorMessage = "Can't convert value " + htmlvalue + " of " + property.Name + ",-xpath: " + xpathbuffer
                    };
                    taskinfoManager.Insert(errorinfo);
                }
            }
            resultGood.Status         = true;
            resultGood.PropertyValues = propertyValues;
            goodwizardManager.InsertOrUpdate(resultGood);
            //goodManager.Insert(resultGood);
            var newPrice = new PriceHistoryDTO();

            newPrice.Url   = resultGood.UrlLink;
            newPrice.Price = resultGood.Price;
            newPrice.Date  = DateTime.Now;
            newPrice.Name  = resultGood.Name;
            priceManager.Insert(newPrice);

            //deleting from local log storage
            taskinfoManager.Delete(taskinfo);
            return(resultGood);
        }