Ejemplo n.º 1
0
        public override IEnumerable <Vacancy> ParseByDate(string category, DateTime date)
        {
            var item = this.category.Where(x => x.Key == category && x.Value != string.Empty).FirstOrDefault();

            if (item.Value != null)
            {
                int numberOfPages = GetNumberOfPages(item.Value);
                checkDate = false;

                for (int i = 1; i <= numberOfPages; i++)
                {
                    HtmlNodeCollection vacancyCollection = new HtmlWeb().Load(item.Value + "/pg" + i).DocumentNode.Descendants("table").Where(x => x.Attributes["class"].Value == "f-vacancylist-tablewrap").FirstOrDefault().ChildNodes;
                    foreach (var itemNode in vacancyCollection)
                    {
                        if (itemNode != vacancyCollection[vacancyCollection.Count - 1])
                        {
                            if (checkDate && dayAgo != "1&nbsp;день назад")
                            {
                                checkDate = false;
                            }
                            if (!checkDate)
                            {
                                Vacancy vacancy = new Vacancy {
                                    ParseSiteId = webSiteId, Сategory = item.Key
                                };
                                ParseVacancyHeader(itemNode, ref vacancy, date);
                                if (vacancy != null)
                                {
                                    yield return(vacancy);
                                }
                                else
                                {
                                    continue;
                                }
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                }
            }
        }
Ejemplo n.º 2
0
 private void ParseSecondTemplateVacancyParams(HtmlNode node, ref Vacancy vacancy)
 {
     try
     {
         foreach (var itemNode in node.SelectSingleNode("//div[@class='d_des']").ChildNodes["table"].ChildNodes)
         {
             if (itemNode.NodeType == HtmlNodeType.Element)
             {
                 foreach (var childNode in itemNode.ChildNodes)
                 {
                     if (childNode.NodeType == HtmlNodeType.Element)
                     {
                         if (childNode.InnerText.Contains("Сайт"))
                         {
                             vacancy.CompanyWebSite = childNode.InnerText.Trim();
                         }
                         else if (childNode.InnerText.Contains("Вид занятости"))
                         {
                             vacancy.TypeOfEmployment = childNode.InnerText.Trim();
                         }
                         else if (childNode.InnerText.Contains("Контактное лицо"))
                         {
                             vacancy.ContactPerson = childNode.InnerText.Trim();
                         }
                         else if (childNode.InnerText.Contains("Опыт работы"))
                         {
                             vacancy.Experience = childNode.InnerText.Trim();
                         }
                         else if (childNode.InnerText.Contains("Телефон"))
                         {
                             vacancy.PhoneNumber = childNode.LastChild.InnerText.Trim();
                         }
                     }
                 }
             }
         }
     }
     catch { }
 }
Ejemplo n.º 3
0
        private async Task <IEnumerable <Vacancy> > ParserCategory(string keyCategory)
        {
            try
            {
                this.url = this.category[keyCategory];
            }
            catch
            {
                return(null);
            }
            if (this.url != string.Empty)
            {
                List <Vacancy> list = new List <Vacancy>();
                var            Url  = this.site + this.url + this.prefPage;
                for (int i = 1; ; i++)
                {
                    var document = await BrowsingContext.New(config).OpenAsync(Url + i);

                    var cells = document.QuerySelectorAll("#center > div > div.row > div.col-md-8.col-left > div.card.card-hover.card-visited.job-link > h2 > a");

                    if (cells.Length == 0)    // перевірка на завершення сторінок
                    {
                        break;
                    }

                    foreach (var item in cells)
                    {
                        Vacancy vacancy = new Vacancy();
                        vacancy.ParseSiteId     = this.siteId;
                        vacancy.PublicationDate = DateTime.Today;
                        var link = await BrowsingContext.New(config).OpenAsync(site + item.GetAttribute("href"));

                        vacancy.VacancyHref = site + item.GetAttribute("href");
                        try
                        {
                            vacancy.Title   = link.QuerySelector("div.card > h1.cut-top.wordwrap").TextContent;
                            vacancy.Company = link.QuerySelector("dl.dl-horizontal > dd > a > b").TextContent;

                            var attributes = link.QuerySelectorAll("dl.dl-horizontal > dt");

                            foreach (var el in attributes)
                            {
                                SwitchAttributeByName(ref vacancy, el);
                            }
                        }
                        catch (Exception ex) { }

                        try
                        {
                            vacancy.Salary = link.QuerySelector("div.card > h3.wordwrap").TextContent;
                        }
                        catch (Exception ex) { }


                        var desc = link.QuerySelectorAll("div.card > div.overflow > p");

                        foreach (var el in desc)
                        {
                            vacancy.Description += el.TextContent;
                        }

                        list.Add(vacancy);
                    }
                }
                return(list);
            }
            else
            {
                return(null);
            }
        }
Ejemplo n.º 4
0
        public override IEnumerable <Vacancy> ParseByDate(string keyCategory, DateTime date)
        {
            List <Vacancy> tempList  = new List <Vacancy>();
            int            countFive = 0;
            int            page      = 1;
            string         url       = "";

            try
            {
                url = "https://www.olx.ua/rabota/" + categoryCollection[keyCategory];
            }
            catch
            {
                return(null);
            }
            var Webget = new HtmlWeb();
            var doc    = Webget.Load(url);


            var pages     = doc.DocumentNode.SelectSingleNode("//div[@class='pager rel clr']").ChildNodes;
            int pageCount = Convert.ToInt32(pages[pages.Count - 4].InnerText.Trim());

            if (categoryCollection[keyCategory] != "")
            {
                while (page < pageCount)
                {
                    foreach (var node in doc.DocumentNode.SelectNodes("//table//tbody//tr[@class='wrap']//td//article"))
                    {
                        Vacancy newVacancy = new Vacancy();
                        if (node.SelectSingleNode("//h3") != null)
                        {
                            string title = node.SelectSingleNode("div[1]//h3").InnerText.Trim();
                            newVacancy.Title = title;
                            string link = node.SelectSingleNode("div[1]//h3//a").Attributes["href"].Value;
                            if ((getTime(link) < date) && countFive >= 5)
                            {
                                return(tempList);
                            }
                            else if (getTime(link) < date)
                            {
                                countFive++;
                                continue;
                            }

                            newVacancy.VacancyHref = link;
                            getInnerInformation(link, ref newVacancy);
                        }
                        if (node.SelectSingleNode("//ul") != null)
                        {
                            string city = node.SelectSingleNode("div[1]//ul//li[1]").InnerText;
                            if (city.Contains(","))
                            {
                                city = city.Remove(city.IndexOf(','));
                                newVacancy.Location = city;
                            }
                            else
                            {
                                newVacancy.Location = city;
                            }
                        }

                        if (node.SelectSingleNode("//div[2]//div[1]") != null)
                        {
                            string salary = node.SelectSingleNode("//div[@class='list-item__col list-item__col--price']//div[@class='list-item__price']").InnerText.Trim();
                            newVacancy.Salary = salary;
                        }

                        tempList.Add(newVacancy);
                        countFive++;
                    }
                    page++;
                    url    = "https://www.olx.ua/rabota/" + categoryCollection[keyCategory] + "/?page=" + page;
                    Webget = new HtmlWeb();
                    doc    = Webget.Load(url);
                }
            }
            return(tempList);
        }
Ejemplo n.º 5
0
        public override IEnumerable <Vacancy> ParseByCategory(string keyCategory)
        {
            List <Vacancy> tempList = new List <Vacancy>();
            int            page     = 0;
            string         url      = "";

            try
            {
                url = "https://www.olx.ua/rabota/" + categoryCollection[keyCategory];
            }
            catch
            {
                return(null);
            }
            var Webget = new HtmlWeb();
            var doc    = Webget.Load(url);


            var pages     = doc.DocumentNode.SelectSingleNode("//div[@class='pager rel clr']").ChildNodes;
            int pageCount = Convert.ToInt32(pages[pages.Count - 4].InnerText.Trim());

            if (categoryCollection[keyCategory] != "")
            {
                while (page < pageCount)
                {
                    foreach (var node in doc.DocumentNode.SelectNodes("//table//tbody//tr[@class='wrap']//td//article"))
                    {
                        Vacancy newVacancy = null;
                        try
                        {
                            newVacancy = new Vacancy()
                            {
                                ParseSiteId = id
                            };
                            newVacancy.Сategory = keyCategory;
                            if (node.SelectSingleNode("//h3") != null)
                            {
                                string title = node.SelectSingleNode("div[1]//h3").InnerText.Trim();
                                newVacancy.Title = title;
                                string link = node.SelectSingleNode("div[1]//h3//a").Attributes["href"].Value;
                                newVacancy.VacancyHref = link;
                                getInnerInformation(link, ref newVacancy);
                            }

                            if (node.SelectSingleNode("//ul") != null)
                            {
                                string city = node.SelectSingleNode("div[1]//ul//li[1]").InnerText;
                                if (city.Contains(","))
                                {
                                    city = city.Remove(city.IndexOf(','));
                                    newVacancy.Location = city;
                                }
                                else
                                {
                                    newVacancy.Location = city;
                                }
                            }
                        }
                        catch
                        {
                        }
                        if (newVacancy != null)
                        {
                            tempList.Add(newVacancy);
                        }
                    }
                    page++;
                    url = null;
                    while (url == null)
                    {
                        try
                        {
                            url = "https://www.olx.ua/rabota/" + categoryCollection[keyCategory] + "/?page=" + page;
                        }
                        catch { }
                    }
                    Webget = new HtmlWeb();
                    doc    = null;
                    while (doc == null)
                    {
                        try
                        {
                            doc = Webget.Load(url);
                        }
                        catch { }
                    }
                }
            }
            return(tempList);
        }
Ejemplo n.º 6
0
        public override IEnumerable <Vacancy> ParseByDate(string keyCategory, DateTime date)
        {
            List <Vacancy> tempList      = new List <Vacancy>();
            string         valuecategory = null;

            try
            {
                valuecategory = category[keyCategory];
            }
            catch
            {
                yield break;
            }
            string href             = "https://jobs.ua/vacancy/" + valuecategory;
            string additionalPeriod = "";
            int    countpages       = GetnumbersOfPage(href);

            for (int i = 1; i <= countpages; i++)
            {
                additionalPeriod = "/page-" + i;
                HtmlDocument document = null;
                HtmlNode[]   links    = null;
                try
                {
                    document = new HtmlWeb().Load(href + additionalPeriod);
                }
                catch
                {
                    document = GetDokumentByURL(href + additionalPeriod);
                }
                try
                {
                    links = document.DocumentNode.SelectNodes("//ul[@class='b-vacancy__list js-items_block']").ToArray();
                }
                catch
                {
                    links = GetHodeByUrl(href);
                }
                foreach (var item in links[0].ChildNodes.Where(x => x.NodeType != HtmlNodeType.Text))
                {
                    if (item.Name == "#text")
                    {
                        continue;
                    }
                    Vacancy tempVacancy = null;
                    if (item != null)
                    {
                        try
                        {
                            tempVacancy = GetVacancyByNode(item, keyCategory);
                        }
                        catch
                        {
                            while (tempVacancy == null)
                            {
                                tempVacancy = GetVacancyByNode(item, keyCategory);
                            }
                        }
                    }
                    if (tempVacancy.PublicationDate != date)
                    {
                        if (i != 1)
                        {
                            yield break;
                        }
                    }
                    else
                    {
                        yield return(tempVacancy);
                    }
                }
            }
            yield break;
        }
Ejemplo n.º 7
0
        private Vacancy GetVacancyByNode(HtmlNode node, string category)
        {
            try
            {
                if (node == null)
                {
                    return(null);
                }
                Vacancy tempVacancy = new Vacancy();

                tempVacancy.Сategory    = category;
                tempVacancy.ParseSiteId = siteId;
                foreach (var itemNode in node.ChildNodes.Where(x => x.NodeType != HtmlNodeType.Text))
                {
                    switch (itemNode.Attributes[0].Value)
                    {
                    case "b-vacancy__top":
                        foreach (
                            var childNode in
                            itemNode.ChildNodes.Where(x => x.NodeType != HtmlNodeType.Text || x.Name != "br"))
                        {
                            if (childNode.Name == "a")
                            {
                                tempVacancy.VacancyHref = childNode.Attributes["href"].Value;
                                tempVacancy             = GetContentFromHttp(tempVacancy.VacancyHref, tempVacancy);
                                tempVacancy.Title       = childNode.InnerText;
                            }
                            else if (childNode.Name == "div")
                            {
                                tempVacancy.Salary = childNode.InnerText.Replace("&nbsp;", "");
                            }
                            else if (childNode.Name == "span")
                            {
                                string input = childNode.InnerText;
                                input.Replace("&nbsp;", "");
                                MatchCollection match = regex.Matches(input);
                                tempVacancy.PublicationDate = new DateTime(DateTime.Now.Year,
                                                                           GetNumberMounth(match[0].Groups[2].Value),
                                                                           Convert.ToInt32(match[0].Groups[1].Value));
                                break;
                            }
                        }
                        break;

                    case "b-vacancy__tech":
                        foreach (
                            var childNode in itemNode.ChildNodes.Where(item => item.NodeType != HtmlNodeType.Text))
                        {
                            if (childNode.Attributes["class"].Value == "b-vacancy__tech__item")
                            {
                                tempVacancy.Company = childNode.InnerText.Replace(" ", "").Replace("&nbsp;", " ");
                            }
                            else
                            {
                                if (childNode.ChildNodes.Count > 2)
                                {
                                    tempVacancy.Location = childNode.ChildNodes[2].InnerText;
                                }

                                break;
                            }
                        }
                        break;

                    case "b-vacancy__tech__item":
                        switch (itemNode.ChildNodes[1].InnerText)
                        {
                        case "Образование":
                            tempVacancy.Education = itemNode.ChildNodes[3].InnerText;
                            break;

                        case "Опыт работы":
                            tempVacancy.Experience = itemNode.ChildNodes[3].InnerText;
                            break;

                        case "График работы":
                            tempVacancy.TypeOfEmployment = itemNode.ChildNodes[3].InnerText;
                            break;

                        default:
                            break;
                        }
                        break;

                    default:
                        break;
                    }
                }
                return(tempVacancy);
            }
            catch
            {
                return(null);
            }
        }
Ejemplo n.º 8
0
        private Vacancy ParseVacancy(HtmlNode node, ref Vacancy vacancy, DateTime date)
        {
            try
            {
                HtmlDocument page = new HtmlWeb().Load(vacancy.VacancyHref);
                vacancy.PublicationDate = Convert.ToDateTime(page.DocumentNode.SelectSingleNode("//meta[@property='article:published_time']").Attributes["content"].Value.Substring(0, 10));

                if (date != new DateTime())
                {
                    if (vacancy.PublicationDate < date)
                    {
                        checkDate = true;
                        return(vacancy = null);
                    }
                }
                if (page.DocumentNode.SelectNodes("//div[@class='f-vacancy-inner-wrapper']") != null)
                {
                    ParceFirstTemplateVacancyParams(page.DocumentNode.SelectSingleNode("//div[@class='f-vacancy-inner-wrapper']"), ref vacancy);
                    ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='f-vacancy-description']").ChildNodes["div"].ChildNodes["div"], ref vacancy);
                }
                else if (page.DocumentNode.SelectNodes("//div[@id='content_vcVwPopup_VacancyViewInner1_pnlBody']//span//table//tbody//tr[3]//td//div[1]") != null)
                {
                    ParseThirdTemplateVacancyParams(page.DocumentNode.SelectSingleNode("//div[@id='content_vcVwPopup_VacancyViewInner1_pnlBody']//span//table//tbody//tr[3]//td//div[1]"), ref vacancy);
                    ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='descr']"), ref vacancy);
                }
                else if (page.DocumentNode.SelectNodes("//div[@class='descr']") != null)
                {
                    ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='descr']"), ref vacancy);
                }
                else if (page.DocumentNode.SelectSingleNode("//div[@id='content_vcVwPopup_VacancyViewInner1_pnlBody']//span//div//table//tr//td[2]//div") != null)
                {
                    ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@id='content_vcVwPopup_VacancyViewInner1_pnlBody']//span//div//table//tr//td[2]//div"), ref vacancy);
                }
                else if (page.DocumentNode.SelectSingleNode("//*[@id='content_vcVwPopup_VacancyViewInner1_pnlBody']//span//table//tbody//tr[2]//td//table//tbody//tr//td[2]//div[2]") != null)
                {
                    ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//*[@id='content_vcVwPopup_VacancyViewInner1_pnlBody']//span//table//tbody//tr[2]//td//table//tbody//tr//td[2]//div[2]"), ref vacancy);
                }
                else if (page.DocumentNode.SelectNodes("//div[@class='d_des']") != null)
                {
                    if (page.DocumentNode.SelectNodes("//div[@class='d-items']") != null && page.DocumentNode.SelectNodes("//div[@class='d_des']").FirstOrDefault().LastChild.Name == "div")
                    {
                        ParseThirdTemplateVacancyParams(page.DocumentNode.SelectSingleNode("//div[@class='d_des']"), ref vacancy);
                        ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='d_des']").LastChild, ref vacancy);
                    }
                    else if (page.DocumentNode.SelectNodes("//div[@class='d-items']") != null)
                    {
                        ParseThirdTemplateVacancyParams(page.DocumentNode.SelectSingleNode("//div[@class='d_des']"), ref vacancy);
                        ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='d_des']"), ref vacancy);
                    }

                    else if (page.DocumentNode.SelectNodes("//div[@class='d_des_in']") != null)
                    {
                        ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='d_des_in']"), ref vacancy);
                    }
                    else
                    {
                        ParseSecondTemplateVacancyParams(page.DocumentNode.SelectSingleNode("//div[@class='d_des']"), ref vacancy);
                        ParseVacancyDescription(page.DocumentNode.SelectSingleNode("//div[@class='d_des']"), ref vacancy);
                    }
                }
                return(vacancy);
            }
            catch { return(vacancy); }
        }