public override IEnumerable <Vacancy> ParseByDate(string category, DateTime date) { var item = this.category.Where(x => x.Key == category && x.Value != string.Empty).FirstOrDefault(); if (item.Value != null) { int numberOfPages = GetNumberOfPages(item.Value); checkDate = false; for (int i = 1; i <= numberOfPages; i++) { HtmlNodeCollection vacancyCollection = new HtmlWeb().Load(item.Value + "/pg" + i).DocumentNode.Descendants("table").Where(x => x.Attributes["class"].Value == "f-vacancylist-tablewrap").FirstOrDefault().ChildNodes; foreach (var itemNode in vacancyCollection) { if (itemNode != vacancyCollection[vacancyCollection.Count - 1]) { if (checkDate && dayAgo != "1 день назад") { checkDate = false; } if (!checkDate) { Vacancy vacancy = new Vacancy { ParseSiteId = webSiteId, Сategory = item.Key }; ParseVacancyHeader(itemNode, ref vacancy, date); if (vacancy != null) { yield return(vacancy); } else { continue; } } else { break; } } } } } }
private void ParseSecondTemplateVacancyParams(HtmlNode node, ref Vacancy vacancy) { try { foreach (var itemNode in node.SelectSingleNode("//div[@class='d_des']").ChildNodes["table"].ChildNodes) { if (itemNode.NodeType == HtmlNodeType.Element) { foreach (var childNode in itemNode.ChildNodes) { if (childNode.NodeType == HtmlNodeType.Element) { if (childNode.InnerText.Contains("Сайт")) { vacancy.CompanyWebSite = childNode.InnerText.Trim(); } else if (childNode.InnerText.Contains("Вид занятости")) { vacancy.TypeOfEmployment = childNode.InnerText.Trim(); } else if (childNode.InnerText.Contains("Контактное лицо")) { vacancy.ContactPerson = childNode.InnerText.Trim(); } else if (childNode.InnerText.Contains("Опыт работы")) { vacancy.Experience = childNode.InnerText.Trim(); } else if (childNode.InnerText.Contains("Телефон")) { vacancy.PhoneNumber = childNode.LastChild.InnerText.Trim(); } } } } } } catch { } }
private async Task <IEnumerable <Vacancy> > ParserCategory(string keyCategory) { try { this.url = this.category[keyCategory]; } catch { return(null); } if (this.url != string.Empty) { List <Vacancy> list = new List <Vacancy>(); var Url = this.site + this.url + this.prefPage; for (int i = 1; ; i++) { var document = await BrowsingContext.New(config).OpenAsync(Url + i); var cells = document.QuerySelectorAll("#center > div > div.row > div.col-md-8.col-left > div.card.card-hover.card-visited.job-link > h2 > a"); if (cells.Length == 0) // перевірка на завершення сторінок { break; } foreach (var item in cells) { Vacancy vacancy = new Vacancy(); vacancy.ParseSiteId = this.siteId; vacancy.PublicationDate = DateTime.Today; var link = await BrowsingContext.New(config).OpenAsync(site + item.GetAttribute("href")); vacancy.VacancyHref = site + item.GetAttribute("href"); try { vacancy.Title = link.QuerySelector("div.card > h1.cut-top.wordwrap").TextContent; vacancy.Company = link.QuerySelector("dl.dl-horizontal > dd > a > b").TextContent; var attributes = link.QuerySelectorAll("dl.dl-horizontal > dt"); foreach (var el in attributes) { SwitchAttributeByName(ref vacancy, el); } } catch (Exception ex) { } try { vacancy.Salary = link.QuerySelector("div.card > h3.wordwrap").TextContent; } catch (Exception ex) { } var desc = link.QuerySelectorAll("div.card > div.overflow > p"); foreach (var el in desc) { vacancy.Description += el.TextContent; } list.Add(vacancy); } } return(list); } else { return(null); } }
public override IEnumerable <Vacancy> ParseByDate(string keyCategory, DateTime date) { List <Vacancy> tempList = new List <Vacancy>(); int countFive = 0; int page = 1; string url = ""; try { url = "https://www.olx.ua/rabota/" + categoryCollection[keyCategory]; } catch { return(null); } var Webget = new HtmlWeb(); var doc = Webget.Load(url); var pages = doc.DocumentNode.SelectSingleNode("//div[@class='pager rel clr']").ChildNodes; int pageCount = Convert.ToInt32(pages[pages.Count - 4].InnerText.Trim()); if (categoryCollection[keyCategory] != "") { while (page < pageCount) { foreach (var node in doc.DocumentNode.SelectNodes("//table//tbody//tr[@class='wrap']//td//article")) { Vacancy newVacancy = new Vacancy(); if (node.SelectSingleNode("//h3") != null) { string title = node.SelectSingleNode("div[1]//h3").InnerText.Trim(); newVacancy.Title = title; string link = node.SelectSingleNode("div[1]//h3//a").Attributes["href"].Value; if ((getTime(link) < date) && countFive >= 5) { return(tempList); } else if (getTime(link) < date) { countFive++; continue; } newVacancy.VacancyHref = link; getInnerInformation(link, ref newVacancy); } if (node.SelectSingleNode("//ul") != null) { string city = node.SelectSingleNode("div[1]//ul//li[1]").InnerText; if (city.Contains(",")) { city = city.Remove(city.IndexOf(',')); newVacancy.Location = city; } else { newVacancy.Location = city; } } if (node.SelectSingleNode("//div[2]//div[1]") != null) { string salary = node.SelectSingleNode("//div[@class='list-item__col list-item__col--price']//div[@class='list-item__price']").InnerText.Trim(); newVacancy.Salary = salary; } tempList.Add(newVacancy); countFive++; } page++; url = "https://www.olx.ua/rabota/" + categoryCollection[keyCategory] + "/?page=" + page; Webget = new HtmlWeb(); doc = Webget.Load(url); } } return(tempList); }
public override IEnumerable <Vacancy> ParseByCategory(string keyCategory) { List <Vacancy> tempList = new List <Vacancy>(); int page = 0; string url = ""; try { url = "https://www.olx.ua/rabota/" + categoryCollection[keyCategory]; } catch { return(null); } var Webget = new HtmlWeb(); var doc = Webget.Load(url); var pages = doc.DocumentNode.SelectSingleNode("//div[@class='pager rel clr']").ChildNodes; int pageCount = Convert.ToInt32(pages[pages.Count - 4].InnerText.Trim()); if (categoryCollection[keyCategory] != "") { while (page < pageCount) { foreach (var node in doc.DocumentNode.SelectNodes("//table//tbody//tr[@class='wrap']//td//article")) { Vacancy newVacancy = null; try { newVacancy = new Vacancy() { ParseSiteId = id }; newVacancy.Сategory = keyCategory; if (node.SelectSingleNode("//h3") != null) { string title = node.SelectSingleNode("div[1]//h3").InnerText.Trim(); newVacancy.Title = title; string link = node.SelectSingleNode("div[1]//h3//a").Attributes["href"].Value; newVacancy.VacancyHref = link; getInnerInformation(link, ref newVacancy); } if (node.SelectSingleNode("//ul") != null) { string city = node.SelectSingleNode("div[1]//ul//li[1]").InnerText; if (city.Contains(",")) { city = city.Remove(city.IndexOf(',')); newVacancy.Location = city; } else { newVacancy.Location = city; } } } catch { } if (newVacancy != null) { tempList.Add(newVacancy); } } page++; url = null; while (url == null) { try { url = "https://www.olx.ua/rabota/" + categoryCollection[keyCategory] + "/?page=" + page; } catch { } } Webget = new HtmlWeb(); doc = null; while (doc == null) { try { doc = Webget.Load(url); } catch { } } } } return(tempList); }
public override IEnumerable <Vacancy> ParseByDate(string keyCategory, DateTime date) { List <Vacancy> tempList = new List <Vacancy>(); string valuecategory = null; try { valuecategory = category[keyCategory]; } catch { yield break; } string href = "https://jobs.ua/vacancy/" + valuecategory; string additionalPeriod = ""; int countpages = GetnumbersOfPage(href); for (int i = 1; i <= countpages; i++) { additionalPeriod = "/page-" + i; HtmlDocument document = null; HtmlNode[] links = null; try { document = new HtmlWeb().Load(href + additionalPeriod); } catch { document = GetDokumentByURL(href + additionalPeriod); } try { links = document.DocumentNode.SelectNodes("//ul[@class='b-vacancy__list js-items_block']").ToArray(); } catch { links = GetHodeByUrl(href); } foreach (var item in links[0].ChildNodes.Where(x => x.NodeType != HtmlNodeType.Text)) { if (item.Name == "#text") { continue; } Vacancy tempVacancy = null; if (item != null) { try { tempVacancy = GetVacancyByNode(item, keyCategory); } catch { while (tempVacancy == null) { tempVacancy = GetVacancyByNode(item, keyCategory); } } } if (tempVacancy.PublicationDate != date) { if (i != 1) { yield break; } } else { yield return(tempVacancy); } } } yield break; }
private Vacancy GetVacancyByNode(HtmlNode node, string category) { try { if (node == null) { return(null); } Vacancy tempVacancy = new Vacancy(); tempVacancy.Сategory = category; tempVacancy.ParseSiteId = siteId; foreach (var itemNode in node.ChildNodes.Where(x => x.NodeType != HtmlNodeType.Text)) { switch (itemNode.Attributes[0].Value) { case "b-vacancy__top": foreach ( var childNode in itemNode.ChildNodes.Where(x => x.NodeType != HtmlNodeType.Text || x.Name != "br")) { if (childNode.Name == "a") { tempVacancy.VacancyHref = childNode.Attributes["href"].Value; tempVacancy = GetContentFromHttp(tempVacancy.VacancyHref, tempVacancy); tempVacancy.Title = childNode.InnerText; } else if (childNode.Name == "div") { tempVacancy.Salary = childNode.InnerText.Replace(" ", ""); } else if (childNode.Name == "span") { string input = childNode.InnerText; input.Replace(" ", ""); MatchCollection match = regex.Matches(input); tempVacancy.PublicationDate = new DateTime(DateTime.Now.Year, GetNumberMounth(match[0].Groups[2].Value), Convert.ToInt32(match[0].Groups[1].Value)); break; } } break; case "b-vacancy__tech": foreach ( var childNode in itemNode.ChildNodes.Where(item => item.NodeType != HtmlNodeType.Text)) { if (childNode.Attributes["class"].Value == "b-vacancy__tech__item") { tempVacancy.Company = childNode.InnerText.Replace(" ", "").Replace(" ", " "); } else { if (childNode.ChildNodes.Count > 2) { tempVacancy.Location = childNode.ChildNodes[2].InnerText; } break; } } break; case "b-vacancy__tech__item": switch (itemNode.ChildNodes[1].InnerText) { case "Образование": tempVacancy.Education = itemNode.ChildNodes[3].InnerText; break; case "Опыт работы": tempVacancy.Experience = itemNode.ChildNodes[3].InnerText; break; case "График работы": tempVacancy.TypeOfEmployment = itemNode.ChildNodes[3].InnerText; break; default: break; } break; default: break; } } return(tempVacancy); } catch { return(null); } }
private Vacancy ParseVacancy(HtmlNode node, ref Vacancy vacancy, DateTime date) { try { HtmlDocument page = new HtmlWeb().Load(vacancy.VacancyHref); vacancy.PublicationDate = Convert.ToDateTime(page.DocumentNode.SelectSingleNode("//meta[@property='article:published_time']").Attributes["content"].Value.Substring(0, 10)); if (date != new DateTime()) { if (vacancy.PublicationDate < date) { checkDate = true; return(vacancy = null); } } if (page.DocumentNode.SelectNodes("//div[@class='f-vacancy-inner-wrapper']") != null) { ParceFirstTemplateVacancyParams(page.DocumentNode.SelectSingleNode("//div[@class='f-vacancy-inner-wrapper']"), ref vacancy); ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='f-vacancy-description']").ChildNodes["div"].ChildNodes["div"], ref vacancy); } else if (page.DocumentNode.SelectNodes("//div[@id='content_vcVwPopup_VacancyViewInner1_pnlBody']//span//table//tbody//tr[3]//td//div[1]") != null) { ParseThirdTemplateVacancyParams(page.DocumentNode.SelectSingleNode("//div[@id='content_vcVwPopup_VacancyViewInner1_pnlBody']//span//table//tbody//tr[3]//td//div[1]"), ref vacancy); ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='descr']"), ref vacancy); } else if (page.DocumentNode.SelectNodes("//div[@class='descr']") != null) { ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='descr']"), ref vacancy); } else if (page.DocumentNode.SelectSingleNode("//div[@id='content_vcVwPopup_VacancyViewInner1_pnlBody']//span//div//table//tr//td[2]//div") != null) { ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@id='content_vcVwPopup_VacancyViewInner1_pnlBody']//span//div//table//tr//td[2]//div"), ref vacancy); } else if (page.DocumentNode.SelectSingleNode("//*[@id='content_vcVwPopup_VacancyViewInner1_pnlBody']//span//table//tbody//tr[2]//td//table//tbody//tr//td[2]//div[2]") != null) { ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//*[@id='content_vcVwPopup_VacancyViewInner1_pnlBody']//span//table//tbody//tr[2]//td//table//tbody//tr//td[2]//div[2]"), ref vacancy); } else if (page.DocumentNode.SelectNodes("//div[@class='d_des']") != null) { if (page.DocumentNode.SelectNodes("//div[@class='d-items']") != null && page.DocumentNode.SelectNodes("//div[@class='d_des']").FirstOrDefault().LastChild.Name == "div") { ParseThirdTemplateVacancyParams(page.DocumentNode.SelectSingleNode("//div[@class='d_des']"), ref vacancy); ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='d_des']").LastChild, ref vacancy); } else if (page.DocumentNode.SelectNodes("//div[@class='d-items']") != null) { ParseThirdTemplateVacancyParams(page.DocumentNode.SelectSingleNode("//div[@class='d_des']"), ref vacancy); ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='d_des']"), ref vacancy); } else if (page.DocumentNode.SelectNodes("//div[@class='d_des_in']") != null) { ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='d_des_in']"), ref vacancy); } else { ParseSecondTemplateVacancyParams(page.DocumentNode.SelectSingleNode("//div[@class='d_des']"), ref vacancy); ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='d_des']"), ref vacancy); } } return(vacancy); } catch { return(vacancy); } }