public static void Parse() { var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//div[@class='filter-block']") .Count != 0, findProducts: (node, args) => node ._SelectNodes(@"//*[@id='products_grid']/div/div/a") .Select(x => new ArgumentObject(x.Attributes["href"].Value)) .ToArray(), singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//ul[@class]/li[contains(@class, ""active"")]/a") .InnerText }, singlePropertiesProduct: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//h1") .InnerHtml, [@"""Код артикула"""] = (node, args) => node .SelectSingleNode(@"//*[@id='top']/div[6]/div[1]/div[2]/div[1]/span[2]") .InnerText, [@"Габариты"] = (node, args) => node .SelectSingleNode(@"//*[@id='top']/div[6]/div[1]/div[2]/div[5]/span[2]") .InnerText, [@"Цена"] = (node, args) => node .SelectSingleNode(@"//*[@id='top']/div[6]/div[1]/div[2]/div[6]/a/span[1]") .InnerText, [@"Описание"] = (node, args) => node .SelectSingleNode(@"//*[@id='top']/div[6]/div[1]/div[2]/div[7]") .InnerHtml + string.Join("\n", node ._SelectNodes(@"//*[@id='top']/div[6]/div[1]/div[2]/div[position() > 1 and position() < last() - 1]") .Select(x => x.InnerHtml)) }, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => node ._SelectNodes(@"//*[@id='top']/div[6]/div[1]/div[1]/div[1]/div/img") .Select(x => @"https://mdi-toys.ru" + x.Attributes["src"].Value) .ToArray() } ); var arguments = new ArgumentObject(url: "https://mdi-toys.ru/catalog/", args: new object[] { 0 }); var collection = parser.GetProductOrCategory( parser.GetLinks(arguments, @".//*[@id='top']/div[6]/div/div/ul[1]/li[position() > 1]/a")); Import.Write(path: "mdi-toys.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@".//*[@id='appPartZoomCompprd1appPart1zvc_Zoom_ProductBundle__0_0_def_0']") .Count == 0, findProducts: (node, args) => { var a = node ._SelectNodes(@"//a[contains(@href, 'product')]"); return(node ._SelectNodes(@"//*[contains(@class,'pgg_cg1_galleryDisplayer')]/div/a") .Select(x => new ArgumentObject(x.Attributes["href"].Value)) .ToArray()); }, singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => "ТД-Дворики", }, singlePropertiesProduct: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@".//*[@id='appPartZoomCompprd1appPart1zvc_ZoomDetails_ProductBundle__0_0_2_titlerichTextContainer']/p") .InnerText, ["Описание"] = (node, args) => node ?.SelectSingleNode(@".//*[@id='appPartZoomCompprd1appPart1zvc_ZoomDetails_ProductBundle__0_0_2_overviewrichTextContainer']/p") ?.InnerHtml ?? String.Empty + node ?.SelectSingleNode(@".//*[@id='appPartZoomCompprd1appPart1zvc_ZoomDetails_ProductBundle__0_0_2_detailsrichTextContainer']/p") ?.InnerHtml ?? String.Empty, ["Цена"] = (node, args) => node .SelectSingleNode(@".//*[@id='appPartZoomCompprd1appPart1zvc_ZoomDetails_ProductBundle__0_0_2_pricerichTextContainer']/p") .InnerText.Replace("руб.", String.Empty).Replace(",", String.Empty), }, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => node ._SelectNodes(@"//div[@class='s_xUSelectableSliderGalleryDefaultSkinimageItem']//img") .Select(x => x.Attributes["src"].Value) .Select(x => x.Substring(0, x.IndexOf("v1"))) .ToArray() } ); var argument = new ArgumentObject( url: @"http://www.td-dvoriki.com/toy-shop-cjg9", args: new object[] { 0 }); var collection = parser.GetProductOrCategory(argument); Import.Write(path: "tddvoriki.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var random = new Random(); var mainUrl = @"http://igr.ru/"; var addedUrl = @"http://igr.ru/cat.php?pgsize=1000&sort=1&pgsort=1&days=9000&rub=207&prub="; var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//h2") .InnerText, ["Код"] = (node, args) => node .SelectSingleNode(@"//*[contains(text(), 'Код')]/../text()[1]") .InnerText.Trim(), ["Описание"] = (node, args) => node .SelectSingleNode(@"//p[@class='textsm' and preceding-sibling::table]") ?.InnerHtml ?? String.Empty, ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args) + "-SBT-" + random.Next()); singlePropertiesProduct[@"""Краткое описание"""] = (node, args) => singlePropertiesProduct["Описание"](node, args).Split('.')[0]; //singlePropertiesProduct[@"""Возраст детей"""] = (node, args) => // Regex.Match(singlePropertiesProduct["Описание"](node, args), // @"детям\s+от\s+\d+\s+", // RegexOptions.IgnoreCase).Value; var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//*[@class='good_good']") .Count != 0, findProducts: (node, args) => node ._SelectNodes(@"//*[@class='good_good']//a[contains(text(), 'Подробнее')]") .Select(x => new ArgumentObject(mainUrl + x.Attributes["href"].Value)) .ToArray(), singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => new string('!', (int)args.Args[0]) + node .SelectSingleNode(@"//img[@alt]/following-sibling::a[1]") .InnerText, }, singlePropertiesProduct: singlePropertiesProduct, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => node ._SelectNodes(@"//td[@class='textsm']/a[img]") .Select(x => mainUrl + x.Attributes["href"].Value) .Select(x => x.Substring(29).Replace(".jpg", string.Empty)) .Select(x => $"http://img.simba-trade.ru/site1/{x}_c.jpg") .ToArray() }, encoding: Encoding.Default ); var arguments = new[] { 651, 382, 385, 387, 827, 678, 680, 672, 208, 677, 682, } //var arguments = new[] { 827 } .Select(x => new ArgumentObject(url: addedUrl + x, args: new object[] { 2 })); var collection = Merger.Merge( collection: parser.GetProductOrCategory(arguments), other: IgrRuDataExtractorExample.Extract(), setKeyCollection: o => o.IsCategory ? o.SingleProperties["Наименование"] : o.SingleProperties["Код"], setKeyOtherCollection: o => o.SingleProperties["Код"]); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Temporary2" }, isCategory: true), new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "!IgrRu" }, isCategory: true) }.Extend(collection); Import.Write(path: @"..\..\..\CSV\igrRu.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var random = new Random(); var URL = @"http://valda.ru"; var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Цена"] = (node, args) => node .SelectSingleNode(@"//div[@class='add2cart']/span[2]") ?.Attributes["data-price"]?.Value ?? node.SelectSingleNode(@"//div[@class='add2cart']/span[3]") .Attributes["data-price"].Value, [@"""Код артикула"""] = (node, args) => "VD-" + node .SelectSingleNode(@"//div[@itemprop]/*[contains(@class, 'hint')]") ?.InnerText ?? string.Empty, ["Наименование"] = (node, args) => node .SelectSingleNode(@"//article/h1/span") .InnerText, ["Описание"] = (node, args) => (node .SelectSingleNode(@".//*[@id='product-description']") ?.InnerHtml ?? string.Empty) + (node.SelectSingleNode(@".//*[@id='cart-form']/p") ?.InnerHtml ?? string.Empty) + (node.SelectSingleNode(@".//*[@id='cart-form']/table") ?.InnerHtml ?? string.Empty), ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", [@"""Зачеркнутая цена"""] = (node, args) => node .SelectSingleNode(@"//span[@class='compare-at-price nowrap']") ?.InnerText?.Replace(" ", string.Empty) ?? string.Empty, //[@"""Возраст детей"""] = (node, args) => //{ // return Regex.Match(node.InnerText, @"Возраст.*\s+\w+", RegexOptions.IgnoreCase).Value; //} }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args) + "-VD-" + random.Next()); var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//*[@id='product-list']") .Count != 0, findProducts: (node, args) => node ._SelectNodes(@"//*[@id='product-list']/ul/li/a") .Select(x => new ArgumentObject(URL + x.Attributes["href"].Value)) .ToArray(), singlePropertiesProduct: singlePropertiesProduct, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => { var result = node ._SelectNodes(@"//div[@id='product-gallery']/div/a") .Select(x => URL + x.Attributes["href"].Value) .ToArray(); return(result.Length != 0 ? result : new [] { URL + node.SelectSingleNode(@".//*[@id='product-image']") .Attributes["src"].Value }); } }, singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => new string('!', (int)args.Args[0]) + node .SelectSingleNode(@".//*[@id='page-content']/h1") .InnerText.Trim() } ); var argument = new ArgumentObject( url: URL, //prefix: @"http://oksva-tm.ru/catalog/15", args: new object[] { 2 }); var collection = parser.GetProductOrCategory(parser.GetLinks(argument, @".//*[@id='page-content']/div[1]/ul/li/a", prefix: URL)); //parser.GetProductOrCategory(argument); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Temporary2" }, isCategory: true), new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "!valda" }, isCategory: true) }.Extend(collection); Import.Write(path: @"..\..\..\CSV\valda.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var URL = @"http://dynatone.ru"; var addedUrl = @"http://dynatone.ru/products.php?group=993876&type=&brand=FLIGHT+PERCUSSION&cat=0&order=status&orderWay=desc&p="; var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//h1/span[@class='bk_name']") .InnerText, [@"""Код артикула"""] = (node, args) => "DNT-" + node .SelectSingleNode(@"//td[@valign='middle']/b") .InnerText .Trim(), ["Цена"] = (node, args) => node .SelectSingleNode(@"//h1[@align='center']/span") .InnerText .Replace(" ", string.Empty), ["Описание"] = (node, args) => { var description = ""; var desc = node ._SelectNodes(@"//table[@align='left']/../p[1]") ?.ToArray() ?? new HtmlNode[0]; foreach (var e in desc) { description += e.InnerHtml; } return(description); }, ["Страна-производитель"] = (node, args) => { var country = node .SelectSingleNode(@"//table[@align='left']/../ul//li[contains(text(), ""Сделано"")]") ?.InnerText ?? string.Empty; return(country.Length > 1 ? GetNormalizeCountry(country) : string.Empty); }, ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args) + "-" + singlePropertiesProduct[@"""Код артикула"""](node, args)); var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//*[@class='shapka'][2]/a") .Count != 0, findProducts: (node, args) => node ._SelectNodes(@"//td[@class='tovname']/*[@class='descrcut']/../a[1]") .Select(x => new ArgumentObject(URL + x.Attributes["href"].Value)) .ToArray(), singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => new string('!', (int)args.Args[0]) + node .SelectSingleNode(@"//*[@class='shapkasearch']/b/a[1]") .InnerText .Trim() }, singlePropertiesProduct: singlePropertiesProduct, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => node ._SelectNodes(@"//td[@align='center']/img") .Select(x => URL + x.Attributes["src"].Value) .ToArray() }, encoding: Encoding.Default ); var arguments = new[] { 1, 2, 3, 4, 5, 6 } .Select(x => new ArgumentObject(url: addedUrl + x, args: new object[] { 2 })); var collection = parser.GetProductOrCategory(arguments); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Temporary2" }, isCategory: true), new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "!DynaTone" }, isCategory: true) }.Extend(collection); Import.Write(path: "../../../CSV/dynaTone.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var URL = @"http://www.lavatoys.ru"; var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//*[@class='toy__title']") .InnerText, [@"""Код артикула"""] = (node, args) => "LVS-" + node .SelectSingleNode(@"//*[@class='toy__article']") .InnerText, [@"""Зачеркнутая цена"""] = (node, args) => Regex.Replace(node .SelectSingleNode(@"//*[@class='toy__old-price']") ?.InnerText ?? string.Empty, @"\s+", string.Empty), ["Цена"] = (node, args) => Regex.Replace(node .SelectSingleNode(@"//*[@class='toy__price-right']/strong") .InnerText, @"\s+", string.Empty), ["Описание"] = (node, args) => string.Format("Издает звук {0} при нажатии на игрушку", node .SelectSingleNode(@"//*[@class='toy__song']") ?.InnerText ?? string.Empty), ["Размеры"] = (node, args) => { var length = node .SelectSingleNode(@"//*[@class='toy__params-row'][2]/td[2]") .InnerText; var remp = length.Contains("см"); return(length.Contains("см") ? "Высота: " + length : string.Empty); }, ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args) + "-" + singlePropertiesProduct[@"""Код артикула"""](node, args)); var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//*[@class='js-content__back-button']") .Count == 0, findProducts: (node, args) => node ._SelectNodes(@"//a[@class='catalog-item__image-wrapper']") .Select(x => new ArgumentObject(URL + x.Attributes["href"].Value)) .ToArray(), singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => new string('!', (int)args.Args[0]) + node .SelectSingleNode(@"//h1[@class='catalog-items__title']") .InnerText .Split(':')[1] .Trim() }, singlePropertiesProduct: singlePropertiesProduct, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => node ._SelectNodes(@"//*[@class='js-widget']") .Select(x => URL + x.Attributes["src"].Value) .ToArray() }, encoding: Encoding.UTF8, xPathPagination: (node, args) => node ._SelectNodes(@"//nav[@class='paginator paginator_position_top']/a[@class='paginator__item']") .Select(x => new ArgumentObject(URL + x.Attributes["href"].Value)) .ToArray() ); var argument = new ArgumentObject( url: @"http://www.lavatoys.ru/catalogue/", args: new object[] { 2 }); var collection = parser.GetProductOrCategory(parser.GetLinks(argument, @"//*[@class='nav__col nav__col_width_half nav__col_type_catalog-groups']/a[@class='nav__item']", prefix: @"http://www.lavatoys.ru")); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Temporary2" }, isCategory: true), new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "!LavaToys" }, isCategory: true) }.Extend(collection); Import.Write(path: "../../../CSV/lavaToys.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var url = "http://tomik.ru/"; var singlePropertiesProduct = new Dictionary <string, Search <string> >() { [@"Наименование"] = (node, args) => node .SelectSingleNode(@"//h1[@class]") .InnerText, [@"Цена"] = (node, args) => node .SelectSingleNode(@"//div[contains(@class, 'price')]/span") .InnerText, [@"Деталей"] = (node, args) => node .SelectSingleNode(@".//*[@id='toy-block']/div/div/div/div/div/div[3]/div[1]/div[2]") .InnerText.Substring(9), [@"""Код артикула"""] = (node, args) => node .SelectSingleNode(@".//*[@id='toy-block']/div/div/div/div/div/div[3]/div[1]/div[3]") .InnerText.Substring(9), [@"Габариты"] = (node, args) => node .SelectSingleNode(@".//*[@id='toy-block']/div/div/div/div/div/div[3]/div[1]/div[4]") .InnerText.Substring(8), [@"Вес"] = (node, args) => node .SelectSingleNode(@".//*[@id='toy-block']/div/div/div/div/div/div[3]/div[1]/div[5]") .InnerText.Substring(5), ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args)); var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//h1[@class=""popular-items-cell-T""]") .Count == 0, findSubcatalogs: (node, args) => node ._SelectNodes(@"//div[@class=""container-fluid""]/div[3]/div/a") .Select(x => new ArgumentObject(url: url + x.Attributes["href"].Value , args: new object[] { (int)args.Args[0] + 1 })) .ToArray(), findProducts: (node, args) => node ._SelectNodes(@"//section[contains(@class, ""items-category"")]/div/div/div/div/div/a") .Select(x => new ArgumentObject(url: url + x.Attributes["href"].Value, args: args.Args)) .ToArray(), pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => node ._SelectNodes(@"//img[@id]") .Select(x => url + x.Attributes["src"].Value) .Where(x => !x.Equals(String.Empty)) .Distinct() .ToArray() }, singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => new string('!', (int)args.Args[0]) + node .SelectSingleNode(@"//h1") .InnerText, }, singlePropertiesProduct: singlePropertiesProduct ); var argument = new ArgumentObject( //prefix: @"http://tomik.ru/", url: @"http://tomik.ru/katalog/nastolnye-igry/domino/", //prefix: @"http://tomik.ru/katalog/elochnye-igrushki/podarki/kopiya-shnurovka-elochka-naryadnaya,-6-detalej.html", args: new object[] { 0 }); var links = parser .GetLinks(argument, @".//*[@id='sidebar']/div[2]/ul/li/a") .Select(x => new ArgumentObject(url: url + x.Url, args: x.Args)); var collection = //parser.GetProductOrCategory(links); parser.GetProductOrCategory(argument); Import.Write(path: "tomik.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var suffix = @"?limit=1000"; var prefix = @"https://playdorado.ru/"; var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//h1") .InnerText.Trim(), ["props"] = (node, args) => node .SelectSingleNode(@"//div[@class='description']") .InnerText, ["Цена"] = (node, args) => node .SelectSingleNode(@"//span[@itemprop='price']") .InnerText.Replace("р.", string.Empty).Replace(" ", string.Empty), ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", ["Описание"] = (node, args) => string.Join(".", node .SelectSingleNode(@".//div[@id='tab-description']") .InnerText.Split(new[] { '.', '!', '?' }) .Where(x => !x.Contains("Пластмастер"))), }; singlePropertiesProduct[@"""Код артикула"""] = (node, args) => "PLO-" + Regex.Match(singlePropertiesProduct["props"](node, args), @"Артикул:\s*(\w+)") .Groups[1].Value; singlePropertiesProduct[@"Размеры"] = (node, args) => { var text = singlePropertiesProduct["props"](node, args); var result = Regex.Match(singlePropertiesProduct["props"](node, args), @"Длина:\s+([\w\.]+\s*мм).*Ширина:\s+([\w\.]+\s*мм).*Высота:\s+([\w\.]+\s*мм)", RegexOptions.Singleline).Groups; return($"{result[1]} x {result[2]} x {result[3]}"); }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args) + "-" + singlePropertiesProduct[@"""Код артикула"""](node, args)); var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//b[contains(text(), 'Сортировка')]") .Count != 0, findProducts: (node, args) => node ._SelectNodes(@"//*[@id='content']//div[@class='name']/a") .Select(x => new ArgumentObject(x.Attributes["href"].Value)) .ToArray(), singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => new string('!', (int)args.Args[0]) + node .SelectSingleNode(@".//*[@id='column-left']//a[@class='active']") .InnerText }, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => { var a = node ._SelectNodes(@"//a[contains(@id, 'zoom')]") .Select(x => x.Attributes["href"].Value.Remove(4, 1)) .ToArray(); var result = node ._SelectNodes(@"//div[@class='zoom-top']/a") .Select(x => x.Attributes["href"].Value.Remove(4, 1)) .ToArray(); return(a.Extend(result).ToArray()); } }, singlePropertiesProduct: singlePropertiesProduct ); var arguments = new[] { @"https://playdorado.ru/piramidy/", @"https://playdorado.ru/katalki/", @"https://playdorado.ru/razvivaushie/", @"https://playdorado.ru/igrovye/", @"https://playdorado.ru/kukly/", @"https://playdorado.ru/posuda/", @"https://playdorado.ru/transport/", @"https://playdorado.ru/sport/", @"https://playdorado.ru/sezonnye/", @"https://playdorado.ru/pvcl/", @"https://playdorado.ru/licenzionnaya-produkciya/" } .Select(x => new ArgumentObject(url: x + suffix, args: new object[] { 2 })); var collection = parser.GetProductOrCategory(arguments); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Temporary2" }, isCategory: true), new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "!Playdorado" }, isCategory: true) }.Extend(collection); Import.Write(path: @"..\..\..\CSV\playdorado.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var mainUrl = @"http://www.polesie-toys.com/"; var addedUrl = @"http://www.polesie-toys.com/cat/"; var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//h1") .InnerText, [@"""Код артикула"""] = (node, args) => "PLS-" + node .SelectSingleNode(@"//*[@class='catalog-item-fields-table']//tr[1]/td[2]") .InnerText.Trim(), ["Артикул"] = (node, args) => node .SelectSingleNode(@"//*[@class='catalog-item-fields-table']//tr[1]/td[2]") .InnerText.Trim(), ["Описание"] = (node, args) => node .SelectSingleNode(@".//*[@id='catalog_item_view_tabs_item_0']") ?.InnerText ?? string.Empty + node.SelectSingleNode(@"//*[@class='catalog-item-fields-table']") .InnerHtml, ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args) + "-" + singlePropertiesProduct[@"""Код артикула"""](node, args)); //singlePropertiesProduct[@"""Краткое описание"""] = // (node, args) => singlePropertiesProduct["Описание"](node, args).Substring(0, 100) + "..."; var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//*[@class='catalog-item-fields-table']") .Count == 0, findSubcatalogs: (node, args) => node ._SelectNodes(@"//*[@class='catalog-categories-div']/div[2]/a[not (contains (text(), 'Сервисное'))]") .Select(x => new ArgumentObject(x.Attributes["href"].Value, new[] { (object)((int)args.Args[0] + 1) })) .ToArray(), findProducts: (node, args) => node ._SelectNodes(@"//*[@class='catalog-items-div']/div[3]/a") .Select(x => new ArgumentObject(x.Attributes["href"].Value)) .ToArray(), singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => { var a = new string('!', (int)args.Args[0]) + node .SelectSingleNode(@"//h1").InnerText; return(a); } }, singlePropertiesProduct: singlePropertiesProduct, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => node ._SelectNodes(@"//*[@class='img-thumbnail']") .Select(x => x.Attributes["src"].Value) .ToArray() }, encoding: Encoding.UTF8 ); var arguments = new[] { "yelektromobili", "katalki", "konstruktory", "transport", "voennaya-tehnika", "razvivayushhie_igrushki", "igrovye_kompleksy", "letnij_assortiment", "mebel-dlya-kukol", "posuda_dlya_kukol", "nabory_produktov", "produkciya_v_displeyah", "tovary_hozyajstvenno_bytovogo_naznacheniya" } .Select(x => new ArgumentObject(url: addedUrl + x, args: new object[] { 2 })); var collection = Merger.Merge( collection: parser.GetProductOrCategory(arguments), other: PolisieToysDataExtractorExample.Extract(), setKeyCollection: o => o.IsCategory ? o.SingleProperties["Наименование"] : o.SingleProperties["Артикул"], setKeyOtherCollection: o => o.SingleProperties["Артикул"]); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Temporary2" }, isCategory: true), new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "!PolisieToys" }, isCategory: true) }.Extend(collection); Import.Write(path: "../../../CSV/polisieToys.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var random = new Random(); var URL = @"http://www.geokont.ru"; var badString = "НЕТ В НАЛИЧИИ"; var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//*[@class='prodHead']/h3") .InnerText, ["Цена"] = (node, args) => { var a = Regex.Replace(node .SelectSingleNode(@"//*[@class='prodCard'][2]/p[4]/text()") .InnerText, @"\s+", string.Empty); return(a.Substring(0, a.Length - 7).Replace(" ", "")); }, [@"""Код артикула"""] = (node, args) => "GEK", //[@"""Возраст детей"""] = (node, args) => { // var a = Regex.Replace(node // .SelectSingleNode(@"//*[@class='prodCard'][2]/p[2]") // .InnerText, @"Возраст: ", string.Empty); // return a; //}, ["Описание"] = (node, args) => { var first = node .SelectSingleNode(@"//*[@class='prodCard'][2]/p[1]/text()") ?.InnerText.Replace(badString, string.Empty) ?? string.Empty; var second = node .SelectSingleNode(@"//*[@class='prodCard'][2]/p[2]") .InnerText; return(first + second); }, ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args) + "-GEK-" + random.Next()); var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//*[@class='cardsPr']/div/*[@class='smallProdTbl']") .Count != 0, findProducts: (node, args) => { var a = node ._SelectNodes(@"//*[@class='container-fluid body-content']/div[last()]//td[@class='smProdHead']/a") .Select(x => new ArgumentObject(URL + x.Attributes["href"].Value)) .ToArray(); return(a); }, singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => { var b = node.InnerText; var a = new string('!', (int)args.Args[0]) + Regex.Match(node.InnerHtml, @"<h2 class=""grpHead"">(.*?)</h2>", RegexOptions.Singleline).Groups[1].ToString().Trim(); return(a); } }, singlePropertiesProduct: singlePropertiesProduct, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => { var a = node ._SelectNodes(@"//a[@id='popupImg']") .Select(x => URL + x.Attributes["href"].Value) .ToArray(); return(a); } }, encoding: Encoding.UTF8 ); var argument = new ArgumentObject( url: @"http://www.geokont.ru/ProductGroup/", args: new object[] { 2 }); var collection = parser.GetProductOrCategory(parser.GetLinks(argument, @"//*[@class='vBlock' and position() < last()]/a", prefix: URL)); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Temporary2" }, isCategory: true), new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "!GeoCont" }, isCategory: true) }.Extend(collection); Import.Write(path: "../../../CSV/geoCont.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Наименование"] = (node, o) => node .SelectSingleNode(@"//div[@id='content']//h1") .InnerText, ["Цена"] = (node, o) => node .SelectSingleNode(@"//div[@id=""content""]//table/tr[1]/td[2]/span") .InnerText.Replace("руб.", String.Empty) .Replace(" ", String.Empty), [@"""Код артикула"""] = (node, o) => node .SelectSingleNode(@"//div[@id='content']/div[2]/div/table/tr/td[2]/table/tr[3]/td[2]") .InnerText, ["Размеры"] = (node, o) => node .SelectSingleNode(@"//div[@id='content']/div[2]/div/table/tr/td[2]/table/tr[5]/td[2]") .InnerText, ["Описание"] = (node, o) => node .SelectSingleNode(@"//div[@id='description']") .InnerHtml, ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args)); singlePropertiesProduct[@"""Краткое описание"""] = (node, args) => singlePropertiesProduct["Описание"](node, args) .Split('.')[0]; var parser = new LiquiMolyClass( isCategory: node => node.SelectNodes(@"//*[@class=""list""]")?.Any() ?? false, findProducts: (node, o) => node ._SelectNodes(@"//a[img[contains(@id, 'image')]]") .Select(x => new ArgumentObject(url: x.Attributes["href"].Value)) .ToArray(), findSubcatalogs: (node, o) => node ._SelectNodes(@"//div[@class=""citem""]/a[img]") .Select(x => new ArgumentObject(url: x.Attributes["href"].Value , args: new object[] { (int)o.Args[0] + 1 })) .ToArray(), singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, o) => new string('!', (int)o.Args[0]) + node .SelectSingleNode(@"//div[@id='content']//h1") .InnerText }, singlePropertiesProduct: singlePropertiesProduct, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, o) => node .SelectNodes(@"//div[@id='content']//table//a[@class=""sc_menu_item""]") .Select(x => x.Attributes["href"].Value) .ToArray() }, xPathPagination: (node, args) => node ._SelectNodes(@"//div[@class=""pagination""]/div[@class=""links""]/a[position() < last() - 1]") .Select(x => new ArgumentObject( WebUtility.HtmlDecode(@"http://oksva-tm.ru" + x.Attributes["href"].Value))) .ToArray() ); var argument = new ArgumentObject( url: @"http://www.pelikan-7.ru/index.php?route=common/home", //prefix: @"http://www.pelikan-7.ru/index.php?route=product/category&path=38_45", args: new object[] { 0 }); var collection = parser.GetProductOrCategory(parser.GetLinks(argument, @".//*[@id='category_menu']/ul/li/a")); //parser.GetProductOrCategory(argument); Import.Write(path: "pelikan7.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var mainUrl = @"http://www.gratwest.ru"; var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//h1") .InnerText.Trim(), [@"""Код артикула"""] = (node, args) => "GW-" + node .SelectSingleNode(@".//div[@class='clearfix' and count(child::div)=3]/div[2]/b[1]") .InnerText, ["Цена"] = (node, args) => node .SelectSingleNode(@".//*[contains(@id,'ajax_div_')]/p[2]/nobr/span") .InnerText.Replace("руб", string.Empty).Replace(" ", string.Empty), ["Размеры"] = (node, args) => node .SelectSingleNode(@".//div[@class='clearfix' and count(child::div)=3]/div[2]/b[3]") .InnerText, ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", ["Описание"] = (node, args) => node .SelectSingleNode(@".//*[@id='detail_dop_info']") .InnerHtml + node.SelectSingleNode(@".//table[@class='catalog-detail']") .InnerHtml }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args) + "-" + singlePropertiesProduct[@"""Код артикула"""](node, args)); //singlePropertiesProduct[@"""Возраст детей"""] = (node, args) => // Regex.Match(singlePropertiesProduct["Описание"](node, args), // @"Для\s+детей\s+от\s+\d+\s+лет", // RegexOptions.IgnoreCase).Value; var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//*[@class='catalog-item-sorting']") .Count != 0, findProducts: (node, args) => node ._SelectNodes(@"//*[@class='catalog-item-title']/a") .Select(x => new ArgumentObject(mainUrl + x.Attributes["href"].Value)) .ToArray(), singlePropertiesProduct: singlePropertiesProduct, singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => new string('!', (int)args.Args[0]) + node .SelectSingleNode(@".//*[@id='breadcrumb']/span") .InnerText, }, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => node ._SelectNodes(@"//div[@class='clearfix' and count(child::div)=3]/div[1]//img") .Select(x => mainUrl + x.Attributes["src"].Value) .ToArray(), }, encoding: Encoding.Default, xPathPagination: (node, args) => Enumerable.Range(2, 11) .Select(x => new ArgumentObject($"http://www.gratwest.ru/catalog/36355/?elcount=20&PAGEN_1={x}")) .ToArray() ); var argument = new ArgumentObject( url: @"http://www.gratwest.ru/catalog/36355/", args: new object[] { 2 }); var collection = parser.GetProductOrCategory(argument); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Temporary2" }, isCategory: true), new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "!Gratwest" }, isCategory: true) }.Extend(collection); Import.Write(path: @"..\..\..\CSV\gratwest.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var badString = "(RNToys)"; var URL = @"http://www.rntoys.com"; var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//h1") .InnerText.Replace(badString, String.Empty), [@"""Код артикула"""] = (node, args) => "RS-" + node .SelectSingleNode(@".//*[@id='ctl00_MainContent_panTopRight']/div[1]/span") .InnerText, ["Цена"] = (node, args) => Regex.Replace(node .SelectSingleNode(@".//*[@id='ctl00_MainContent_panTopRight']/div[2]/span") .InnerText.Replace("р.", String.Empty), @"\s+", string.Empty), ["Вес"] = (node, args) => node .SelectSingleNode(@".//*[@id='ctl00_MainContent_panWeight']/span") .InnerText, ["Размеры"] = (node, args) => node .SelectSingleNode(@".//*[@id='ctl00_MainContent_panDimensions']/span") .InnerText, ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", [@"Страна-производитель"] = (node, args) => node .SelectSingleNode(@".//*[@id='ctl00_MainContent_countryElement_iconCountry']") .Attributes["alt"].Value, ["Описание"] = (node, args) => node .SelectSingleNode(@".//*[@id='ctl00_MainContent_panFeature']/span") .InnerText }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args) + "-" + singlePropertiesProduct[@"""Код артикула"""](node, args)); var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//a[contains(@class, 'rpSelected')]") .Count != 0, findProducts: (node, args) => node ._SelectNodes(@"//div[@class='mm-pl-tiles-Item']/div/div[2]/a") .Select(x => new ArgumentObject(URL + x.Attributes["href"].Value)) .ToArray(), singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => new string('!', (int)args.Args[0]) + node .SelectSingleNode(@"//h1") .InnerText, ["Описание"] = (node, args) => node .SelectSingleNode(@"//h1/..") .InnerHtml }, singlePropertiesProduct: singlePropertiesProduct, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => node ._SelectNodes(@".//*[@id='ctl00_MainContent_panImageContainer']/div/div/img") .Select(x => URL + x.Attributes["src"].Value) .Select(HttpUtility.UrlPathEncode) .ToArray() } ); var argument = new ArgumentObject( url: URL, args: new object[] { 2 }); var collection = parser.GetProductOrCategory(parser.GetLinks(argument, @".//*[@id='ctl00_productCategoriesMenu_menu']/ul/li/a", URL)); //parser.GetProductOrCategory(argument); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Temporary2" }, isCategory: true), new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "!Rntoys" }, isCategory: true) }.Extend(collection); Import.Write(path: @"..\..\..\CSV\rntoys.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var random = new Random(); var URL = @"http://www.vesna.kirov.ru"; var suffix = @"?page_count=1000&sort=PROPERTY_IS_AVAILABLE|DESC&PAGEN_1=13"; var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//h1") ?.InnerText ?? string.Empty, [@"""Код артикула"""] = (node, args) => { Thread.Sleep(1000); Console.WriteLine("Delay"); return("VS-" + node .SelectSingleNode(@".//*[@id='content']/div[2]/div/div/div[1]/div[2]/div[2]") ?.InnerText?.Substring(9) ?? string.Empty); }, ["Цена"] = (node, args) => node .SelectSingleNode(@".//*[@id='content']//div[contains(@class, 'pro-roght-price')]") ?.InnerText?.TrimEnd(new[] { '₽', ' ' }) ?? string.Empty, ["Описание"] = (node, args) => node .SelectSingleNode(@".//*[@id='tabs']/div/div[1]") ?.InnerHtml ?? string.Empty + node .SelectSingleNode(@".//*[@id='tabs']/div/div[2]") ?.InnerHtml?.Replace("pro-info-list", string.Empty) ?? string.Empty, ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args) + "-VS-" + random.Next()); var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//span[contains(text(), 'Сортировка')]") .Count != 0, findProducts: (node, args) => node ._SelectNodes(@"//div[@class='catalog-item']/div[1]/a[1]") .Select(x => new ArgumentObject(URL + x.Attributes["href"].Value)) .ToArray(), singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => new string('!', (int)args.Args[0]) + node .SelectSingleNode(@"//h1") .InnerText, }, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => node ._SelectNodes(@"//*[contains(@class,'pic-default')]/div/a/img") .Select(x => URL + x.Attributes["data-zoom-image"].Value) .ToArray() }, singlePropertiesProduct: singlePropertiesProduct ); var arguments = new ArgumentObject( url: URL, //url: @"http://vesna.kirov.ru/catalog/igrushki-iz-pvh/" + suffix, args: new object[] { 2 }); var collection = parser.GetProductOrCategory(parser.GetLinks(args: arguments, prefix: URL, xPath: @".//*[@id='header']/div[2]/div/nav/ul/li[1]/div/ul/li/a", suffix: suffix)); //var collection = parser.GetProductOrCategory(arguments); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Temporary2" }, isCategory: true), new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "!VesnaKirov" }, isCategory: true) }.Extend(collection); Import.Write(path: @"..\..\..\CSV\VesnaKirov.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var URL = @"https://masteras.ru"; var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//*[@id='product-top']/div[2]/h1") .InnerText, [@"""Код артикула"""] = (node, args) => "SER-" + node .SelectSingleNode(@"//*[@class='sku']/span") .InnerText .Trim(), ["Цена"] = (node, args) => node .SelectSingleNode(@"//*[@id='product-top']/div[2]/div[1]/p/span") ?.InnerText .Replace(" ", string.Empty) ?? string.Empty, ["Описание"] = (node, args) => node .SelectSingleNode(@"//*[@class='hide-me closed']") ?.InnerText ?? string.Empty, ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args) + "-" + singlePropertiesProduct[@"""Код артикула"""](node, args)); var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//*[@class='sku']/span") .Count == 0, findProducts: (node, args) => { var prods = node .SelectNodes(@"//a[@class='product-title']") .Select(x => new ArgumentObject(x.Attributes["href"].Value)) .ToArray(); return((int)args.Args[0] > 1 ? prods : new ArgumentObject[0]); }, singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => { var a = node .SelectSingleNode(@"//*[@id='cat-description']/h2") ?.InnerText.Trim() ?? string.Empty; var b = node .SelectSingleNode(@"//*[@id='categories']/h2") ?.InnerText.Trim() ?? string.Empty; return(new string('!', (int)args.Args[0]) + (a == string.Empty ? b : a)); } }, singlePropertiesProduct: singlePropertiesProduct, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => node ._SelectNodes(@"//*[@id='photos']/div/a/img") .Select(x => x.Attributes["src"].Value) .ToArray() }, xPathPagination: (node, args) => { var number = int.Parse(node .SelectSingleNode(@"//*[@class='page-numbers']/li[last() -1]/a") ?.InnerText ?? "1"); return(Enumerable.Range(2, number - 1) .Select(x => args.Url + $"page/{x}/") .Select(x => new ArgumentObject(x)) .ToArray()); }, encoding: Encoding.UTF8 ); var argument = new ArgumentObject( url: @"https://masteras.ru/shop/", args: new object[] { 2 }); var collection = parser.GetProductOrCategory(parser.GetLinks(argument, @"//*[@class='descr']/../../a", prefix: "")); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Temporary2" }, isCategory: true), new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "!Masteras" }, isCategory: true) }.Extend(collection); //collection = JoinerArticles.JoinInOrderEnumerable(collection, "Наименование", // productFieldsForPluralProp: new[] { "Изображения" }, // productFieldsForSingleProp: new[] { "Описание" }); Import.Write(path: "../../../CSV/masteras.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var URL = @"http://addinol.ru"; var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => node .SelectSingleNode(@"//h1") .InnerText, ["Описание"] = (node, args) => node .SelectSingleNode(@"//*[@id='art_container']/p") ?.InnerHtml ?? string.Empty, ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args)); var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@"//*[@id='art_container']") .Count == 0, findProducts: (node, args) => node ._SelectNodes(@"//h2/a") .Select(x => new ArgumentObject(URL + x.Attributes["href"].Value)) .ToArray(), singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => { var sub1 = node .SelectSingleNode(@"//li[contains(@class, 'submenu_sel')]/a") .InnerText; var sub2 = node .SelectSingleNode(@".//*[contains(@class, 'submenu_2_sel')]/a") ?.InnerText; return(new string('!', (int)args.Args[0]) + (sub2 != null ? sub2 : sub1).Trim()); } }, findSubcatalogs: (node, args) => { if ((int)args.Args[0] < 2) { return(node ._SelectNodes(@"//li[contains(@class, 'submenu_sel')]/ul/li/a") .Select(x => new ArgumentObject(URL + x.Attributes["href"].Value, new object[] { (int)args.Args[0] + 1 })) .ToArray()); } else { return(new ArgumentObject[0]); } }, singlePropertiesProduct: singlePropertiesProduct, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => { var a = node ._SelectNodes(@"//*[@id='right_col']/div/a") .Select(x => x.Attributes["href"].Value) .ToArray(); return(a); } }, encoding: Encoding.UTF8 ); var argument = new ArgumentObject( url: @"http://addinol.ru/index.php?id=15804", args: new object[] { 1 }); var collection = parser.GetProductOrCategory(parser.GetLinks(argument, @"//*[@class='submenu' and position() < 4]/a", prefix: URL)); collection = Merger.Merge( collection: collection, other: AddinolDataExtractorExample.Extract(), setKeyCollection: o => o.SingleProperties["Наименование"], setKeyOtherCollection: o => o.SingleProperties["Наименование"]); collection = JoinerArticles.JoinInOrderEnumerable(collection, "Наименование", productFieldsForPluralProp: new[] { "Изображения" }, productFieldsForSingleProp: new[] { "Описание" }); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Addinol" }, isCategory: true) }.Extend(collection); Import.Write(path: "../../../CSV/addinol.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }
public static void Parse() { var random = new Random(); var singlePropertiesProduct = new Dictionary <string, Search <string> > { ["Цена"] = (node, args) => node ?.SelectSingleNode(@".//*[contains(@id,'node')]/div[1]/div[2]/span[2]") ?.InnerText?.Replace("р.", string.Empty).Replace(" ", string.Empty) ?? string.Empty, ["Описание"] = (node, args) => node ?.SelectSingleNode(@".//*[contains(@id, 'node')]//div[@property]") ?.InnerText ?? string.Empty, [@"""Код артикула"""] = (node, args) => "OKS", ["Размеры"] = (node, args) => node ?.SelectSingleNode(@".//*[contains(@id,'node')]/div[1]/div[9]/span[2]") ?.InnerText ?? string.Empty, ["Наименование"] = (node, args) => node .SelectSingleNode(@"//h1") .InnerText.Trim(), ["Валюта"] = (node, o) => "RUB", [@"""Доступен для заказа"""] = (node, o) => "1", [@"Статус"] = (node, o) => "1", //[@"""Возраст детей"""] = (node, args) => node // .SelectSingleNode(@"//div[@class='field-label' and contains(text(), 'Возраст')]/../div[2]/div") // .InnerText, [@"Материал"] = (node, args) => node .SelectSingleNode(@"//div[@class='field-label' and contains(text(), 'Материал')]/../div[2]/div/text()") ?.InnerText ?? string.Empty }; singlePropertiesProduct["Заголовок"] = (node, args) => singlePropertiesProduct["Наименование"](node, args); singlePropertiesProduct[@"""Ссылка на витрину"""] = (node, args) => Humanization.GetHumanLink(singlePropertiesProduct["Наименование"](node, args) + "-OKS-" + random.Next()); var parser = new LiquiMolyClass( isCategory: node => node ._SelectNodes(@".//*[contains(@id,'node')]") .Count == 0, findProducts: (node, args) => node ._SelectNodes(@"//table/tbody/tr/td[2]/a") .Select(x => new ArgumentObject(@"http://oksva-tm.ru" + x.Attributes["href"].Value)) .ToArray(), singlePropertiesCategory: new Dictionary <string, Search <string> > { ["Наименование"] = (node, args) => new string('!', (int)args.Args[0]) + node .SelectSingleNode(@"//*[@id='page-title']") .InnerText.Trim() }, singlePropertiesProduct: singlePropertiesProduct, pluralPropertiesProduct: new Dictionary <string, Search <string[]> > { ["Изображения"] = (node, args) => node ._SelectNodes(@"//*[contains(@id,'node')]//img") .Select(x => x.Attributes["src"].Value) .ToArray(), }, xPathPagination: (node, args) => node ._SelectNodes(@".//*[@id='block-system-main']/div/div[2]/div[2]/ul/li[position() < last() - 1]/a") .Select(x => new ArgumentObject( WebUtility.HtmlDecode(@"http://oksva-tm.ru" + x.Attributes["href"].Value))) .ToArray() ); var argument = new ArgumentObject( url: @"http://oksva-tm.ru/catalog/", //prefix: @"http://oksva-tm.ru/catalog/15", args: new object[] { 2 }); var collection = parser.GetProductOrCategory(parser.GetLinks(argument, @".//*[@id='block-system-main']/div/div[1]/div/table/tbody/tr/td/div/span/a", prefix: @"http://oksva-tm.ru")); //parser.GetProductOrCategory(argument); collection = new[] { new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "Temporary2" }, isCategory: true), new ProductCategoryObject( new Dictionary <string, string> { ["Наименование"] = "!Оксва-тм" }, isCategory: true) }.Extend(collection); Import.Write(path: @"..\..\..\CSV\oksvatm.csv", collection: collection.ToArray(), headers: Constants.WebAsystKeys, format: Constants.WebAsystFormatter); }