コード例 #1
0
ファイル: Scrapper24AuRu.cs プロジェクト: poolsar/LotCreator
        private static SupplierCategory ParseCategory(HtmlDocument doc, Supplier supplier)
        {
            SupplierCategory parentCategory = null;

            var categoryLinks = doc.DocumentNode.NodeByXpath("//*[@id=\"lot-content-col\"]/div[1]/div").SelectNodes("a");

            string catalogRooUri = "http://krsk.24au.ru/auction/";
            foreach (var categoryLink in categoryLinks)
            {
                var tCatHref = categoryLink.GetAttributeValue("href", null);

                // проверка, что ссылка на категорию
                if (tCatHref == null) continue;
                if (tCatHref == catalogRooUri) continue;
                if (!tCatHref.Contains(catalogRooUri)) continue;

                var catUrlParts = tCatHref.Split(new[] { catalogRooUri, @"/", @"\" }, StringSplitOptions.RemoveEmptyEntries);
                if (catUrlParts.Count() == 0) continue;

                // парсим

                var tCat = supplier.Categories.FirstOrDefault(c => c.UriOnSource == tCatHref);
                if (tCat == null)
                {
                    tCat = new SupplierCategory();
                    tCat.Set("UriOnSource", tCatHref);
                    tCat.Set("Supplier", supplier);
                }

                tCat.Set("Title", categoryLink.InnerText);

                if (parentCategory != null)
                {
                    tCat.Set("Parent", parentCategory);
                }

                parentCategory = tCat;
            }
            return parentCategory;
        }
コード例 #2
0
ファイル: TaimyrScrapper.cs プロジェクト: poolsar/LotCreator
        //public class TaimyrCategory
        //{
        //    public int Id { get; set; }
        //    public string Title { get; set; }
        //    public int ParentCategoryId { get; set; }
        //    public TaimyrCategory()
        //    {
        //        ParentCategoryId = -1;
        //        _allCategories.Add(this);
        //    }
        //    public TaimyrCategory Parent
        //    {
        //        get
        //        {
        //            var result = _allCategories.FirstOrDefault(c => c.Id == this.ParentCategoryId);
        //            return result;
        //        }
        //    }
        //    public List<TaimyrCategory> Childs
        //    {
        //        get
        //        {
        //            var result = _allCategories.Where(c => c.ParentCategoryId == this.Id).ToList();
        //            return result;
        //        }
        //    }
        //    public string Uri { get; set; }
        //    static List<TaimyrCategory> _allCategories = new List<TaimyrCategory>();
        //}
        //private T IfModified<T>(SupplierProduct prod, T oval, T nval)
        //{
        //    var modified = !oval.Equals(nval);
        //    if (modified && prod.Status == ScrapeStatus.Stable)
        //    {
        //        prod.Status = ScrapeStatus.Modified;
        //        return nval;
        //    }
        //    return oval;
        //}
        private SupplierProduct ParseTaimyrProduct(ShopEntities context, TimyrPriceRow pRow, string html, Supplier supplier)
        {
            var doc = new HtmlDocument();

            doc.LoadHtml(html);

            var idOnSource = pRow.Uri.Split(new[] { "id=" }, StringSplitOptions.RemoveEmptyEntries)[1];

            var product = supplier.Products.FirstOrDefault(p => p.IdOnSource == idOnSource);

            if (product == null)
            {
                product = new SupplierProduct();

                context.SupplierProductSet.Add(product);

                product.Set("IdOnSource", idOnSource);
                product.Set("Supplier", supplier);
            }

            product.Set("Title", pRow.Title);
            product.Set("UriOnSource", pRow.Uri);

            var infoDiv = doc.DocumentNode.NodeByXpath("/html/body/div[3]/div[2]");

            //------------------------------------------------
            // категории
            //
            var categoryLinks = infoDiv.NodeByXpath("p[1]").SelectNodes("a");

            SupplierCategory parentCategory = null;
            foreach (var categoryLink in categoryLinks)
            {
                var tCatHref = categoryLink.GetAttributeValue("href", null);

                if (!tCatHref.Contains("?sid=")) continue;

                var sid = tCatHref.Split(new[] { "sid=" }, StringSplitOptions.RemoveEmptyEntries)[1];

                var tCat = supplier.Categories.FirstOrDefault(c => c.IdOnSource == sid);

                if (tCat == null)
                {
                    tCat = new SupplierCategory();
                    tCat.Set("IdOnSource", sid);
                    tCat.Set("Supplier", supplier);
                }

                tCat.Set("Title", categoryLink.InnerText);
                tCat.Set("UriOnSource", tCatHref);

                if (parentCategory != null)
                {
                    tCat.Set("Parent", parentCategory);
                }

                parentCategory = tCat;
            }

            product.Set("Category", parentCategory);

            //------------------------------------------------
            // цена без акции (если есть такая цена - значит текущая цена по акции)
            //

            var priceB = infoDiv.PriceByXpath("p[3]/font[2]/s", "руб.");
            if (priceB != null)
            {
                product.Set("IsSale", true);
            }

            var discountPrice = pRow.CurrentPrice;
            var price = priceB ?? discountPrice;
            var costPrice = discountPrice * (100m - supplier.Discount) / 100m;

            product.Set("Price", price);
            product.Set("DiscountPrice", pRow.CurrentPrice);
            product.Set("CostPrice", costPrice);

            //--------------------------------------------------
            // описание
            //
            var descriptionPs = infoDiv.SelectNodes("p").Skip(3).ToList();

            StringBuilder sbDescription = new StringBuilder();

            foreach (var descriptionP in descriptionPs)
            {
                sbDescription.AppendLine(descriptionP.OuterHtml);
            }

            product.Set("Description", sbDescription.ToString());

            context.SaveChanges();

            var pageLoader = new PageLoader();
            var imageTd = infoDiv.NodeByXpath("div[1]/table/tr/td");
            var imageLinks = imageTd.SelectNodes("a");

            for (int i = 0; i < imageLinks.Count; i++)
            {
                var imageLink = imageLinks[i];
                var imageUri = imageLink.GetAttributeValue("href", null);
                if (imageUri == null) continue;

                //imageUri = "plugins/resize.php?f=../uploads/ct60-0.jpg&amp;w=800";

                var imageName = imageUri.Split(new[] { "plugins/resize.php?f=../uploads/", "&" }, StringSplitOptions.RemoveEmptyEntries)[0];
                imageUri += "&s=0"; // убираем логотип

                //var ext = new[] {".jpg", "png"}.FirstOrDefault(e => imageUri.Contains(e));

                var sourceUri = SupplierUri + imageUri;
                var savePath = string.Format("img/taimyr/{0}/{1}", product.IdOnSource, imageName);

                var image = product.Images.FirstOrDefault(im => im.LocalPath == savePath);
                if (image == null)
                {
                    image = new Image();

                    image.Set("UriOnSupplier", imageUri);
                    image.Set("LocalPath", savePath);
                    FileSystemUtils.GetFolder(savePath);

                    if (!File.Exists(savePath))
                    {
                        pageLoader.RequestImage(sourceUri, savePath);
                    }
                }

                if (product.DefaultImage == null) product.DefaultImage = image;
                product.Images.Add(image);
            }

            context.SaveChanges();

            return product;
        }