Example #1
0
        public SupplierProduct ScraperProduct(string uri)
        {
            var pageLoader = new PageLoader();
            var response = pageLoader.RequestsHtml(uri, decompress: true);
            var product = Parse(uri, response.Html);

            return product;
        }
Example #2
0
        //public void ScrapeCatalog()
        //{
        //    DownloadPrice();
        //}
        public List<TimyrPriceRow> DownloadPrice()
        {
            string uri = SupplierUri + "catalog.php?price=show";
            var pageLoader = new PageLoader();

            HtmlResponse htmlResponse = pageLoader.RequestsHtml(uri);

            var timyrPriceRows = ParsePrice(htmlResponse.Html);

            return timyrPriceRows;
        }
Example #3
0
        private void Start()
        {
            btnTest.Visibility = Visibility.Hidden;

            var begemotPriceRows = (new BegemotParser()).ParsePrice("");

            lots = begemotPriceRows.Where(r => r.CountInStock > 2).ToList();

            pageLoader = new PageLoader();

            sourceImagesFolder = new DirectoryInfo("images/SourceImages");
            imagesFolder = new DirectoryInfo("images");

            imagesToSkipDownload = sourceImagesFolder.GetFiles().Select(f => f.Name.Replace(f.Extension, "")).ToList();
            imagesToSkipProccess = imagesFolder.GetFiles().Select(f => f.Name.Replace(f.Extension, "")).ToList();

            timer = new Timer();
            timer.Interval = 1;
            timer.Tick += timer_Tick;

            timer.Start();
        }
Example #4
0
        //public class TaimyrCategory
        //{
        //    public int Id { get; set; }
        //    public string Title { get; set; }
        //    public int ParentCategoryId { get; set; }
        //    public TaimyrCategory()
        //    {
        //        ParentCategoryId = -1;
        //        _allCategories.Add(this);
        //    }
        //    public TaimyrCategory Parent
        //    {
        //        get
        //        {
        //            var result = _allCategories.FirstOrDefault(c => c.Id == this.ParentCategoryId);
        //            return result;
        //        }
        //    }
        //    public List<TaimyrCategory> Childs
        //    {
        //        get
        //        {
        //            var result = _allCategories.Where(c => c.ParentCategoryId == this.Id).ToList();
        //            return result;
        //        }
        //    }
        //    public string Uri { get; set; }
        //    static List<TaimyrCategory> _allCategories = new List<TaimyrCategory>();
        //}
        //private T IfModified<T>(SupplierProduct prod, T oval, T nval)
        //{
        //    var modified = !oval.Equals(nval);
        //    if (modified && prod.Status == ScrapeStatus.Stable)
        //    {
        //        prod.Status = ScrapeStatus.Modified;
        //        return nval;
        //    }
        //    return oval;
        //}
        private SupplierProduct ParseTaimyrProduct(ShopEntities context, TimyrPriceRow pRow, string html, Supplier supplier)
        {
            var doc = new HtmlDocument();

            doc.LoadHtml(html);

            var idOnSource = pRow.Uri.Split(new[] { "id=" }, StringSplitOptions.RemoveEmptyEntries)[1];

            var product = supplier.Products.FirstOrDefault(p => p.IdOnSource == idOnSource);

            if (product == null)
            {
                product = new SupplierProduct();

                context.SupplierProductSet.Add(product);

                product.Set("IdOnSource", idOnSource);
                product.Set("Supplier", supplier);
            }

            product.Set("Title", pRow.Title);
            product.Set("UriOnSource", pRow.Uri);

            var infoDiv = doc.DocumentNode.NodeByXpath("/html/body/div[3]/div[2]");

            //------------------------------------------------
            // категории
            //
            var categoryLinks = infoDiv.NodeByXpath("p[1]").SelectNodes("a");

            SupplierCategory parentCategory = null;
            foreach (var categoryLink in categoryLinks)
            {
                var tCatHref = categoryLink.GetAttributeValue("href", null);

                if (!tCatHref.Contains("?sid=")) continue;

                var sid = tCatHref.Split(new[] { "sid=" }, StringSplitOptions.RemoveEmptyEntries)[1];

                var tCat = supplier.Categories.FirstOrDefault(c => c.IdOnSource == sid);

                if (tCat == null)
                {
                    tCat = new SupplierCategory();
                    tCat.Set("IdOnSource", sid);
                    tCat.Set("Supplier", supplier);
                }

                tCat.Set("Title", categoryLink.InnerText);
                tCat.Set("UriOnSource", tCatHref);

                if (parentCategory != null)
                {
                    tCat.Set("Parent", parentCategory);
                }

                parentCategory = tCat;
            }

            product.Set("Category", parentCategory);

            //------------------------------------------------
            // цена без акции (если есть такая цена - значит текущая цена по акции)
            //

            var priceB = infoDiv.PriceByXpath("p[3]/font[2]/s", "руб.");
            if (priceB != null)
            {
                product.Set("IsSale", true);
            }

            var discountPrice = pRow.CurrentPrice;
            var price = priceB ?? discountPrice;
            var costPrice = discountPrice * (100m - supplier.Discount) / 100m;

            product.Set("Price", price);
            product.Set("DiscountPrice", pRow.CurrentPrice);
            product.Set("CostPrice", costPrice);

            //--------------------------------------------------
            // описание
            //
            var descriptionPs = infoDiv.SelectNodes("p").Skip(3).ToList();

            StringBuilder sbDescription = new StringBuilder();

            foreach (var descriptionP in descriptionPs)
            {
                sbDescription.AppendLine(descriptionP.OuterHtml);
            }

            product.Set("Description", sbDescription.ToString());

            context.SaveChanges();

            var pageLoader = new PageLoader();
            var imageTd = infoDiv.NodeByXpath("div[1]/table/tr/td");
            var imageLinks = imageTd.SelectNodes("a");

            for (int i = 0; i < imageLinks.Count; i++)
            {
                var imageLink = imageLinks[i];
                var imageUri = imageLink.GetAttributeValue("href", null);
                if (imageUri == null) continue;

                //imageUri = "plugins/resize.php?f=../uploads/ct60-0.jpg&amp;w=800";

                var imageName = imageUri.Split(new[] { "plugins/resize.php?f=../uploads/", "&" }, StringSplitOptions.RemoveEmptyEntries)[0];
                imageUri += "&s=0"; // убираем логотип

                //var ext = new[] {".jpg", "png"}.FirstOrDefault(e => imageUri.Contains(e));

                var sourceUri = SupplierUri + imageUri;
                var savePath = string.Format("img/taimyr/{0}/{1}", product.IdOnSource, imageName);

                var image = product.Images.FirstOrDefault(im => im.LocalPath == savePath);
                if (image == null)
                {
                    image = new Image();

                    image.Set("UriOnSupplier", imageUri);
                    image.Set("LocalPath", savePath);
                    FileSystemUtils.GetFolder(savePath);

                    if (!File.Exists(savePath))
                    {
                        pageLoader.RequestImage(sourceUri, savePath);
                    }
                }

                if (product.DefaultImage == null) product.DefaultImage = image;
                product.Images.Add(image);
            }

            context.SaveChanges();

            return product;
        }
Example #5
0
 private SupplierProduct DownloadProduct(ShopEntities context, TimyrPriceRow pRow, Supplier supplier)
 {
     var pageLoader = new PageLoader();
     string uri = SupplierUri + pRow.Uri;
     var response = pageLoader.RequestsHtml(uri);
     var product = ParseTaimyrProduct(context, pRow, response.Html, supplier);
     return product;
 }
Example #6
0
        private static void ParseImages(HtmlDocument doc, SupplierProduct product, ShopEntities context)
        {
            List<HtmlNode> imageLinks = new List<HtmlNode>();

            var mainImageLink = doc.DocumentNode.NodeByXpath("//*[@id=\"Gallery\"]/div/a");
            imageLinks.Add(mainImageLink);

            var otherImagesDiv = doc.DocumentNode.NodeByXpath("//*[@id=\"Gallery\"]/div[2]");
            if (otherImagesDiv!=null)
            {
                var otherImagesLinks = otherImagesDiv.SelectNodes("a");
                imageLinks.AddRange(otherImagesLinks);
            }

            var imageUris = imageLinks.Select(l => l.GetAttributeValue("href", "")).ToList();

            string baseImageUri = "http://media2.24aul.ru/imgs/";
            var pageLoader = new PageLoader();

            foreach (string imageUri in imageUris)
            {
                var imageName = imageUri.Split(new[] { baseImageUri }, StringSplitOptions.RemoveEmptyEntries)[0];
                var saveFolder = string.Format("img/24auru/{0}", product.IdOnSource);
                var savePath = string.Format("{0}/{1}.jpg", saveFolder, imageName);

                var image = product.Images.FirstOrDefault(im => im.LocalPath == savePath);
                if (image == null)
                {
                    image = new Image();

                    image.Set("UriOnSupplier", imageUri);
                    image.Set("LocalPath", savePath);
                    FileSystemUtils.GetFolder(saveFolder);

                    if (!File.Exists(savePath))
                    {
                        pageLoader.RequestImage(imageUri, savePath);
                    }
                }

                if (product.DefaultImage == null) product.DefaultImage = image;
                product.Images.Add(image);
            }

            context.SaveChanges();
        }