/// <summary> /// Add new parsertask /// </summary> /// <param name="parsertask"></param> /// <returns>id of new parser</returns> public int Add(ParserTaskDTO parsertask) { if (parsertask == null) { return(-1); } ParserTask parsertaskDb = Mapper.Map <ParserTask>(parsertask); try { parsertaskDb.Category = uOW.CategoryRepo.GetByID(parsertaskDb.CategoryId); parsertaskDb.WebShop = uOW.WebShopRepo.GetByID(parsertaskDb.WebShopId); parsertaskDb.Status = Common.Enum.Status.NotFinished; parsertaskDb.LastChange = DateTime.Now; uOW.ParserRepo.Insert(parsertaskDb); uOW.Save(); return(parsertaskDb.Id); } catch (Exception ex) { logger.Error(ex.Message); return(-1); } }
public ActionResult Grabber(int?id) { if (id == null) { return(HttpNotFound()); } var task = new ParserTaskDTO(); var grabber = new GrabberSettingsDTO(); var urlList = new List <string>(); string localPathToSite; int index = 0; if (id != null) { task = parserTaskManager.Get(id.Value); if (task.GrabberSettings != null) { grabber = task.GrabberSettings; } else { grabber.Id = id.Value; task.Category = categoryManager.Get(task.CategoryId); grabber.PropertyItems = Mapper.Map <List <GrabberPropertyItemDTO> >(task.Category.PropertiesList); } if (task.IteratorSettings != null) { urlList = urlManager.GetAllUrls(task.IteratorSettings); } } var arrayOfLinks = urlList.ToArray(); var jsonArray = urlList.ToArray(); for (index = 0; index < 2; index++) { if (!String.IsNullOrWhiteSpace(arrayOfLinks[index])) { Guid result = downloadManager.DownloadFromPath(arrayOfLinks[index]); localPathToSite = "/WebSites/" + result + ".html"; arrayOfLinks[index] = localPathToSite; } } grabber.urlJsonData = JsonConvert.SerializeObject(jsonArray); Session["Length"] = arrayOfLinks.Length; TempData["CurrentPage"] = arrayOfLinks[0]; TempData["NextPage"] = arrayOfLinks[1]; TempData["AllSrc"] = arrayOfLinks; return(View(grabber)); }
public ActionResult Settings(ParserTaskDTO parsertask, int?parsertaskid) { int newid = -1; if (parsertaskid != null) { parsertask.Id = parsertaskid ?? -1; parserTaskManager.Update(parsertask); } else { parsertask.Status = Common.Enum.Status.NotFinished; newid = parserTaskManager.Add(parsertask); } return(RedirectToAction("Iterator", new { id = parsertaskid ?? newid })); }
public ActionResult Settings(int?id) { ParserTaskDTO parsertask = null; SettingsViewDTO settingsView = new SettingsViewDTO() { Categories = categoryManager.GetAll().Where(c => c.HasChildrenCategories == false).Select(c => c).ToList(), Shops = shopManager.GetAll().ToList() }; if (id != null) { parsertask = parserTaskManager.Get(id.GetValueOrDefault()); } if (parsertask != null) { settingsView.ParserTask = parsertask; } return(View(settingsView)); }
/// <summary> /// Update parser task /// </summary> /// <param name="parsertask"></param> /// <returns>instance of new parser dto</returns> public ParserTaskDTO Update(ParserTaskDTO parsertask) { var serializer = new ExtendedXmlSerializer(); var temp = uOW.ParserRepo.Get(p => p.Id == parsertask.Id).FirstOrDefault(); if (temp == null) { return(null); } //temp vars for checking fillness additional settings bool IsIteratorSettingsAreFilled = false; bool IsGrabebrSettingsAreFilled = false; //Filling and checking additional settings if (parsertask.GrabberSettings != null) { temp.GrabberSettings = serializer.Serialize(parsertask.GrabberSettings); IsIteratorSettingsAreFilled = true; } if (parsertask.IteratorSettings != null) { temp.IteratorSettings = serializer.Serialize(parsertask.IteratorSettings); IsGrabebrSettingsAreFilled = true; } if (IsGrabebrSettingsAreFilled && IsIteratorSettingsAreFilled) { if (parsertask.EndDate == null) { temp.Status = Common.Enum.Status.Infinite; } else { temp.Status = Common.Enum.Status.Coming; } } else { temp.Status = Common.Enum.Status.NotFinished; } if (parsertask.Status == Common.Enum.Status.InQuery) { temp.Status = Common.Enum.Status.InQuery; } temp.Priority = parsertask.Priority; temp.Description = parsertask.Description; temp.EndDate = parsertask.EndDate; temp.CategoryId = parsertask.CategoryId; temp.WebShopId = parsertask.WebShopId; temp.Category = uOW.CategoryRepo.GetByID(temp.CategoryId); temp.WebShop = uOW.WebShopRepo.GetByID(temp.WebShopId); uOW.ParserRepo.SetStateModified(temp); uOW.Save(); return(Mapper.Map <ParserTaskDTO>(temp)); }
/// <summary> /// Get all names of goods from parsertask /// </summary> /// <param name="model"></param> /// <returns></returns> private List <string> GetAllNamesOfGoods(ParserTaskDTO model) { var links = urlManager.GetAllUrls(model.IteratorSettings); return(links); }
/// <summary> /// Parses input url by configuration from parser task /// </summary> /// <param name="parsertaskid">id of parser task</param> /// <param name="url">url to parse</param> /// <returns>New parsed GoodDTO</returns> public GoodDTO ExecuteTask(int parsertaskid, string url) { //clearing previous logs //if (!isStarted) // taskinfoManager.DeleteByStatus(ExecuteStatus.Executing); //else // isStarted = true; //downloading page source using tor+phantomjs ParserTaskDTO parsertask = parsermanager.Get(parsertaskid); HtmlDocument doc = null; //adding to local log storage ExecutingInfoDTO taskinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.Executing, Date = DateTime.Now, ParserTaskId = parsertaskid }; taskinfo.Id = taskinfoManager.Insert(taskinfo); //getting page source due to method string pageSource = ""; try { SiteDownloader sw = new SiteDownloader(); switch (parsertask.IteratorSettings.DownloadMethod) { case DownloadMethod.Direct: pageSource = sw.GetPageSouceDirectly(url); break; case DownloadMethod.Tor: pageSource = sw.GetPageSouce(url); break; default: break; } //pageSource = htmlValidator.CheckHtml(pageSource); doc = new HtmlDocument(); doc.LoadHtml(pageSource); } catch (Exception ex) { ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't download url" }; taskinfoManager.Insert(errorinfo); taskinfoManager.Delete(taskinfo); return(null); } //gets configuration from parsertask id GrabberSettingsDTO grabbersettings = parsertask.GrabberSettings; GoodDTO resultGood = new GoodDTO(); resultGood.WebShop_Id = parsertask.WebShopId; resultGood.Category_Id = parsertask.CategoryId; ///////////////////////////////////Parcing name by list of xpathes var xpathbuffer = ""; try { var name = ""; foreach (var nameprop in grabbersettings.Name) { xpathbuffer = nameprop; HtmlNode value = doc.DocumentNode.SelectSingleNode(nameprop); if (value != null) { name = value.InnerHtml; break; } } name = name.Trim(); resultGood.Name = StripHTML(name); } catch (Exception ex) { ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't parse name,-xpath: " + xpathbuffer }; taskinfoManager.Insert(errorinfo); } /////////////////////////////////////Parcing price by list of xpathes try { var price = ""; foreach (var priceprop in grabbersettings.Price) { xpathbuffer = priceprop; HtmlNode value = doc.DocumentNode.SelectSingleNode(priceprop); if (value != null) { price = value.InnerHtml; break; } } if (price != "") { resultGood.Price = Convert.ToDecimal(this.RemoveAllLetters(price)); } } catch (Exception ex) { ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't parse main price,-xpath: " + xpathbuffer }; taskinfoManager.Insert(errorinfo); } //////////////////////////////////////Parcing old price by list of xpathes try { var oldPrice = ""; foreach (var price in grabbersettings.OldPrice) { xpathbuffer = price; HtmlNode value = doc.DocumentNode.SelectNodes(price).FirstOrDefault(); if (value != null) { oldPrice = value.InnerHtml; break; } } if (oldPrice != "") { resultGood.OldPrice = Convert.ToDecimal(this.RemoveAllLetters(oldPrice)); } } catch (Exception ex) { ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't parse old price,-xpath: " + xpathbuffer }; taskinfoManager.Insert(errorinfo); } //////////////////////////////Parcing image link by list of xpathes try { var imagelink = ""; foreach (var imglink in grabbersettings.ImgLink) { xpathbuffer = imglink; HtmlNode value = doc.DocumentNode.SelectNodes(imglink + "/@src").FirstOrDefault(); if (value != null) { imagelink = value.Attributes["src"].Value; resultGood.ImgLink = imagelink; break; } if (imagelink == "" || imagelink == null) { resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png"; } else { resultGood.ImgLink = imagelink; } if (resultGood.ImgLink == null) { resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png"; } } } catch (Exception ex) { resultGood.ImgLink = @"http://www.kalahandi.info/wp-content/uploads/2016/05/sorry-image-not-available.png"; ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't parse image link,-xpath: " + xpathbuffer }; taskinfoManager.Insert(errorinfo); } resultGood.UrlLink = url; PropertyValuesDTO propertyValues = new PropertyValuesDTO(); propertyValues.DictDoubleProperties = new Dictionary <int, double>(); propertyValues.DictIntProperties = new Dictionary <int, int>(); propertyValues.DictStringProperties = new Dictionary <int, string>(); foreach (var propitem in grabbersettings.PropertyItems) { HtmlNode value = null; PropertyDTO property = propmanager.Get(propitem.Id); var htmlvalue = ""; try { foreach (var item in propitem.Value) { xpathbuffer = item; value = doc.DocumentNode.SelectNodes(item).FirstOrDefault(); if (value != null) { htmlvalue = value.InnerHtml; break; } } } catch (Exception ex) { ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't parse property" + property.Name + ",-xpath: " + xpathbuffer }; taskinfoManager.Insert(errorinfo); } try { switch (property.Type) { case PropertyType.Integer: propertyValues.DictIntProperties.Add(propitem.Id, Convert.ToInt32(htmlvalue)); break; case PropertyType.Double: propertyValues.DictDoubleProperties.Add(propitem.Id, Convert.ToDouble(htmlvalue)); break; case PropertyType.String: propertyValues.DictStringProperties.Add(propitem.Id, StripHTML(htmlvalue)); break; default: break; } } catch (Exception ex) { logger.Error(ex); ExecutingInfoDTO errorinfo = new ExecutingInfoDTO() { GoodUrl = url, Status = ExecuteStatus.ErrorInsert, Date = DateTime.Now, ParserTaskId = parsertaskid, ErrorMessage = "Can't convert value " + htmlvalue + " of " + property.Name + ",-xpath: " + xpathbuffer }; taskinfoManager.Insert(errorinfo); } } resultGood.Status = true; resultGood.PropertyValues = propertyValues; goodwizardManager.InsertOrUpdate(resultGood); //goodManager.Insert(resultGood); var newPrice = new PriceHistoryDTO(); newPrice.Url = resultGood.UrlLink; newPrice.Price = resultGood.Price; newPrice.Date = DateTime.Now; newPrice.Name = resultGood.Name; priceManager.Insert(newPrice); //deleting from local log storage taskinfoManager.Delete(taskinfo); return(resultGood); }