public SupplierProduct ScraperProduct(string uri) { var pageLoader = new PageLoader(); var response = pageLoader.RequestsHtml(uri, decompress: true); var product = Parse(uri, response.Html); return product; }
//public void ScrapeCatalog() //{ // DownloadPrice(); //} public List<TimyrPriceRow> DownloadPrice() { string uri = SupplierUri + "catalog.php?price=show"; var pageLoader = new PageLoader(); HtmlResponse htmlResponse = pageLoader.RequestsHtml(uri); var timyrPriceRows = ParsePrice(htmlResponse.Html); return timyrPriceRows; }
private void Start() { btnTest.Visibility = Visibility.Hidden; var begemotPriceRows = (new BegemotParser()).ParsePrice(""); lots = begemotPriceRows.Where(r => r.CountInStock > 2).ToList(); pageLoader = new PageLoader(); sourceImagesFolder = new DirectoryInfo("images/SourceImages"); imagesFolder = new DirectoryInfo("images"); imagesToSkipDownload = sourceImagesFolder.GetFiles().Select(f => f.Name.Replace(f.Extension, "")).ToList(); imagesToSkipProccess = imagesFolder.GetFiles().Select(f => f.Name.Replace(f.Extension, "")).ToList(); timer = new Timer(); timer.Interval = 1; timer.Tick += timer_Tick; timer.Start(); }
//public class TaimyrCategory //{ // public int Id { get; set; } // public string Title { get; set; } // public int ParentCategoryId { get; set; } // public TaimyrCategory() // { // ParentCategoryId = -1; // _allCategories.Add(this); // } // public TaimyrCategory Parent // { // get // { // var result = _allCategories.FirstOrDefault(c => c.Id == this.ParentCategoryId); // return result; // } // } // public List<TaimyrCategory> Childs // { // get // { // var result = _allCategories.Where(c => c.ParentCategoryId == this.Id).ToList(); // return result; // } // } // public string Uri { get; set; } // static List<TaimyrCategory> _allCategories = new List<TaimyrCategory>(); //} //private T IfModified<T>(SupplierProduct prod, T oval, T nval) //{ // var modified = !oval.Equals(nval); // if (modified && prod.Status == ScrapeStatus.Stable) // { // prod.Status = ScrapeStatus.Modified; // return nval; // } // return oval; //} private SupplierProduct ParseTaimyrProduct(ShopEntities context, TimyrPriceRow pRow, string html, Supplier supplier) { var doc = new HtmlDocument(); doc.LoadHtml(html); var idOnSource = pRow.Uri.Split(new[] { "id=" }, StringSplitOptions.RemoveEmptyEntries)[1]; var product = supplier.Products.FirstOrDefault(p => p.IdOnSource == idOnSource); if (product == null) { product = new SupplierProduct(); context.SupplierProductSet.Add(product); product.Set("IdOnSource", idOnSource); product.Set("Supplier", supplier); } product.Set("Title", pRow.Title); product.Set("UriOnSource", pRow.Uri); var infoDiv = doc.DocumentNode.NodeByXpath("/html/body/div[3]/div[2]"); //------------------------------------------------ // категории // var categoryLinks = infoDiv.NodeByXpath("p[1]").SelectNodes("a"); SupplierCategory parentCategory = null; foreach (var categoryLink in categoryLinks) { var tCatHref = categoryLink.GetAttributeValue("href", null); if (!tCatHref.Contains("?sid=")) continue; var sid = tCatHref.Split(new[] { "sid=" }, StringSplitOptions.RemoveEmptyEntries)[1]; var tCat = supplier.Categories.FirstOrDefault(c => c.IdOnSource == sid); if (tCat == null) { tCat = new SupplierCategory(); tCat.Set("IdOnSource", sid); tCat.Set("Supplier", supplier); } tCat.Set("Title", categoryLink.InnerText); tCat.Set("UriOnSource", tCatHref); if (parentCategory != null) { tCat.Set("Parent", parentCategory); } parentCategory = tCat; } product.Set("Category", parentCategory); //------------------------------------------------ // цена без акции (если есть такая цена - значит текущая цена по акции) // var priceB = infoDiv.PriceByXpath("p[3]/font[2]/s", "руб."); if (priceB != null) { product.Set("IsSale", true); } var discountPrice = pRow.CurrentPrice; var price = priceB ?? discountPrice; var costPrice = discountPrice * (100m - supplier.Discount) / 100m; product.Set("Price", price); product.Set("DiscountPrice", pRow.CurrentPrice); product.Set("CostPrice", costPrice); //-------------------------------------------------- // описание // var descriptionPs = infoDiv.SelectNodes("p").Skip(3).ToList(); StringBuilder sbDescription = new StringBuilder(); foreach (var descriptionP in descriptionPs) { sbDescription.AppendLine(descriptionP.OuterHtml); } product.Set("Description", sbDescription.ToString()); context.SaveChanges(); var pageLoader = new PageLoader(); var imageTd = infoDiv.NodeByXpath("div[1]/table/tr/td"); var imageLinks = imageTd.SelectNodes("a"); for (int i = 0; i < imageLinks.Count; i++) { var imageLink = imageLinks[i]; var imageUri = imageLink.GetAttributeValue("href", null); if (imageUri == null) continue; //imageUri = "plugins/resize.php?f=../uploads/ct60-0.jpg&w=800"; var imageName = imageUri.Split(new[] { "plugins/resize.php?f=../uploads/", "&" }, StringSplitOptions.RemoveEmptyEntries)[0]; imageUri += "&s=0"; // убираем логотип //var ext = new[] {".jpg", "png"}.FirstOrDefault(e => imageUri.Contains(e)); var sourceUri = SupplierUri + imageUri; var savePath = string.Format("img/taimyr/{0}/{1}", product.IdOnSource, imageName); var image = product.Images.FirstOrDefault(im => im.LocalPath == savePath); if (image == null) { image = new Image(); image.Set("UriOnSupplier", imageUri); image.Set("LocalPath", savePath); FileSystemUtils.GetFolder(savePath); if (!File.Exists(savePath)) { pageLoader.RequestImage(sourceUri, savePath); } } if (product.DefaultImage == null) product.DefaultImage = image; product.Images.Add(image); } context.SaveChanges(); return product; }
private SupplierProduct DownloadProduct(ShopEntities context, TimyrPriceRow pRow, Supplier supplier) { var pageLoader = new PageLoader(); string uri = SupplierUri + pRow.Uri; var response = pageLoader.RequestsHtml(uri); var product = ParseTaimyrProduct(context, pRow, response.Html, supplier); return product; }
private static void ParseImages(HtmlDocument doc, SupplierProduct product, ShopEntities context) { List<HtmlNode> imageLinks = new List<HtmlNode>(); var mainImageLink = doc.DocumentNode.NodeByXpath("//*[@id=\"Gallery\"]/div/a"); imageLinks.Add(mainImageLink); var otherImagesDiv = doc.DocumentNode.NodeByXpath("//*[@id=\"Gallery\"]/div[2]"); if (otherImagesDiv!=null) { var otherImagesLinks = otherImagesDiv.SelectNodes("a"); imageLinks.AddRange(otherImagesLinks); } var imageUris = imageLinks.Select(l => l.GetAttributeValue("href", "")).ToList(); string baseImageUri = "http://media2.24aul.ru/imgs/"; var pageLoader = new PageLoader(); foreach (string imageUri in imageUris) { var imageName = imageUri.Split(new[] { baseImageUri }, StringSplitOptions.RemoveEmptyEntries)[0]; var saveFolder = string.Format("img/24auru/{0}", product.IdOnSource); var savePath = string.Format("{0}/{1}.jpg", saveFolder, imageName); var image = product.Images.FirstOrDefault(im => im.LocalPath == savePath); if (image == null) { image = new Image(); image.Set("UriOnSupplier", imageUri); image.Set("LocalPath", savePath); FileSystemUtils.GetFolder(saveFolder); if (!File.Exists(savePath)) { pageLoader.RequestImage(imageUri, savePath); } } if (product.DefaultImage == null) product.DefaultImage = image; product.Images.Add(image); } context.SaveChanges(); }