Beispiel #1
0
        private async Task <WebScraperModel> GetPageData(string url)
        {
            var config  = Configuration.Default.WithDefaultLoader();
            var context = BrowsingContext.New(config);

            var document = await context.OpenAsync(url);

            WebScraperModel mymodel = new WebScraperModel();

            var HeadHtml = document.Head.InnerHtml;
            var AllHtml  = document.DocumentElement.InnerHtml;
            var parser   = new AngleSharp.Html.Parser.HtmlParser();

            var data        = parser.ParseDocument(HeadHtml);
            var allHtmlData = parser.ParseDocument(AllHtml);
            var MetaTags    = data.All.Where(x => x.LocalName == "meta");
            var AnchorTags  = allHtmlData.QuerySelectorAll("a");



            var metaTitle           = document.Title;
            var mataTagsKeywords    = MetaTags.FirstOrDefault(x => x.GetAttribute("Name") == "keywords").GetAttribute("Content");
            var mataTagsDescription = MetaTags.FirstOrDefault(x => x.GetAttribute("Name") == "description").GetAttribute("Content");
            var AllLinks            = AnchorTags.Where(x => x.GetAttribute("href").StartsWith("h")).ToList();
            var linksPath           = AllLinks.Cast <IHtmlAnchorElement>()
                                      .Select(m => m.Href)
                                      .ToList();



            ChromeOptions options = new ChromeOptions();

            options.AddArgument("headless");//Comment if we want to see the window.
            var driver = new ChromeDriver(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), options);

            driver.Navigate().GoToUrl(url);
            var screenshot = (driver as ITakesScreenshot).GetScreenshot();

            screenshot.SaveAsFile(Guid.NewGuid() + ".png");
            driver.Close();
            driver.Quit();

            mymodel.title         = metaTitle;
            mymodel.description   = mataTagsDescription;
            mymodel.keywords      = mataTagsKeywords;
            mymodel.AllHyperLinks = linksPath;
            mymodel.screenShot    = screenshot;



            return(mymodel);
        }
Beispiel #2
0
        /// <summary>
        /// Метод получает html-код страницы и запускает работу парсера.
        /// </summary>
        public async void StartParse()
        {
            OnStart?.Invoke(this);

            // Получаем код страницы.
            string source = await _loader.GetSource();

            if (string.IsNullOrEmpty(source))
            {
                return;
            }

            // Парсим код страницы с помощью AngleSharp.
            var           domParser = new AngleSharp.Html.Parser.HtmlParser();
            IHtmlDocument document  = await domParser.ParseDocumentAsync(source);

            T result = Parser.Parse(document);

            OnComplited?.Invoke(this, result);
        }
Beispiel #3
0
        private async void Worker()
        {
            for (int i = parserSettings.StartPoint; i <= parserSettings.EndPoint; i++)
            {
                if (!isActive)
                {
                    OnCompleted?.Invoke(this);
                    return;
                }

                var source = await htmlLoader.GetSourceByPageId(i);

                var domParser = new Parser();

                var document = await domParser.ParseDocumentAsync(source);

                var result = parser.Parse(document, Settings.ClassName, Settings.QuerySelector);

                OnNewData?.Invoke(this, result);
            }
            OnCompleted?.Invoke(this);
            isActive = false;
        }
Beispiel #4
0
        public async Task <int> AddAsyncDoctor()
        {
            var parser = new AngleSharp.Html.Parser.HtmlParser();
            var client = new HttpClient();


            for (int cityNumber = 1; cityNumber < 28; cityNumber++)
            {
                var city = "";
                if (cityNumber != 1 && cityNumber != 2 && cityNumber != 23)
                {
                    switch (cityNumber)
                    {
                    case 1:
                        city = "готово";
                        break;

                    case 2:
                        city = "готово";
                        break;

                    case 3:
                        city = "Бургас";
                        break;

                    case 4:
                        city = "Варна";
                        break;

                    case 5:
                        city = "Велико Търново";
                        break;

                    case 6:
                        city = "Видин";
                        break;

                    case 7:
                        city = "Враца";
                        break;

                    case 8:
                        city = "Габрово";
                        break;

                    case 9:
                        city = "Добрич";
                        break;

                    case 10:
                        city = "Кърджали";
                        break;

                    case 11:
                        city = "Кюстендил";
                        break;

                    case 12:
                        city = "Ловеч";
                        break;

                    case 13:
                        city = "Монтана";
                        break;

                    case 14:
                        city = "Пазарджик";
                        break;

                    case 15:
                        city = "Перник";
                        break;

                    case 16:
                        city = "Плевен";
                        break;

                    case 17:
                        city = "Пловдив";
                        break;

                    case 18:
                        city = "Разград";
                        break;

                    case 19:
                        city = "Русе";
                        break;

                    case 20:
                        city = "Силистра";
                        break;

                    case 21:
                        city = "Сливен";
                        break;

                    case 22:
                        city = "Смолян";
                        break;

                    case 23:
                        city = "София";
                        break;

                    case 24:
                        city = "София-област";
                        break;

                    case 25:
                        city = "Стара Загора";
                        break;

                    case 26:
                        city = "Търговище";
                        break;

                    case 27:
                        city = "Хасково";
                        break;

                    case 28:
                        city = "Шумен";
                        break;

                    case 29:
                        city = "Ямбол";
                        break;
                    }
                }
                else
                {
                    continue;
                }
                var numberOfDoctors = 1.0;
                for (var page = 1; page <= numberOfDoctors; page++)
                {
                    var    url  = $"https://blsbg.eu/bg/medics/unionlist/{cityNumber}?UIN_page={page}&ajax=yw0";
                    string html = null;
                    for (var i = 0; i < 5; i++)
                    {
                        try
                        {
                            var response = await client.GetAsync(url);

                            html = await response.Content.ReadAsStringAsync();

                            break;
                        }
                        catch
                        {
                            Console.Write('!');
                            Thread.Sleep(500);
                        }
                    }

                    if (string.IsNullOrWhiteSpace(html))
                    {
                        continue;
                    }

                    var document           = parser.ParseDocument(html);
                    var numberOfDoctorsDiv = document.GetElementsByClassName("summary");
                    var numberOfDoctorsArr = numberOfDoctorsDiv[0].TextContent.Split(' ');
                    numberOfDoctors = Math.Ceiling(double.Parse(numberOfDoctorsArr[numberOfDoctorsArr.Count() - 1]) / 25);

                    var doctorsTable = document.GetElementsByClassName("items");
                    var test         = document.GetElementsByTagName("tr");



                    var count = 1;
                    foreach (var item in test)
                    {
                        var tdElement = item.GetElementsByTagName("td");

                        if (count == 1)
                        {
                            count++;
                            continue;
                        }

                        var addDoctor = new UserAcc()
                        {
                            UIN       = long.Parse(tdElement[0].TextContent),
                            Name      = tdElement[2].TextContent,
                            City      = city,
                            Specialty = tdElement[3].TextContent,
                        };

                        await this.addRepository.AddAsync(addDoctor);
                    }
                }
            }
            await this.addRepository.SaveChangesAsync();

            return(1);
        }
Beispiel #5
0
        public async Task <int> AddDoctor(int fromId, int toId)
        {
            var parser = new AngleSharp.Html.Parser.HtmlParser();
            var client = new HttpClient();


            for (var page = fromId; page <= toId; page++)
            {
                Console.Write('^');
                var    url  = $"https://bestdoctors.bg/doctors/p/{page}";
                string html = null;
                for (var i = 0; i < 10; i++)
                {
                    try
                    {
                        var response = await client.GetAsync(url);

                        html = await response.Content.ReadAsStringAsync();

                        break;
                    }
                    catch
                    {
                        Console.Write('!');
                        Thread.Sleep(500);
                    }
                }

                if (string.IsNullOrWhiteSpace(html))
                {
                    continue;
                }

                var document = parser.ParseDocument(html);
                var docBox   = document.GetElementsByClassName("docbox");
                foreach (var item in docBox)
                {
                    var docNameCollection = item.GetElementsByTagName("h4");
                    var docName           = docNameCollection[0].TextContent.Replace("\n", string.Empty).Trim();

                    var cityCollection = item.GetElementsByTagName("p");
                    var cityReplace    = cityCollection[1].TextContent.Replace(",", string.Empty);
                    var city           = cityReplace.Replace("\n", string.Empty).Trim();

                    var specialtyCollection = item.GetElementsByTagName("span");
                    var specialty           = specialtyCollection[0].TextContent.Replace("\n", string.Empty).Trim();

                    var imageCollection = item.GetElementsByTagName("img");
                    var imageAttributes = imageCollection[0].Attributes;
                    var imageSrc        = imageAttributes[0].Value;

                    var hospitalCollection = item.GetElementsByTagName("a");
                    if (hospitalCollection.Length > 1)
                    {
                        var hospital = hospitalCollection[1].TextContent.Replace("\n", string.Empty).Trim();

                        var addDoctor = new UserAcc()
                        {
                            Name       = docName,
                            City       = city,
                            SecondName = hospital,
                            Specialty  = specialty,
                            FilePath   = imageSrc,
                        };

                        await this.addRepository.AddAsync(addDoctor);
                    }
                    else
                    {
                        var addDoctor = new UserAcc()
                        {
                            Name      = docName,
                            City      = city,
                            Specialty = specialty,
                            FilePath  = imageSrc,
                        };

                        await this.addRepository.AddAsync(addDoctor);
                    }
                }
            }

            await this.addRepository.SaveChangesAsync();

            return(1);
        }