Пример #1
0
        private async Task _scrapePhase3Inner(ScraperWinWinStateModel state)
        {
            var listItems = await _loadListItemsAsync(state);

            await _fixListItems(listItems, state);

            var maxItems = 50;
            Func <int, List <ShortItemDtoModel> > NeedToDo = (i) => listItems.Where(x => x.Value.Done == false).Select(x => x.Value).Take(i).ToList();
            var task     = new List <Task <bool> >();
            var needToDo = NeedToDo(maxItems);

            do
            {
                foreach (var shortItem in needToDo)
                {
                    var item = shortItem;
                    item.Done = true;
                    task.Add(Task.Run(async() => item.Done = await _downloadItemAsync(shortItem, state)));
                }

                Thread.Sleep(1000 * 3);
                Task.WaitAny(task.ToArray());
                task.RemoveAll(x => x.IsCompleted);

                needToDo = NeedToDo(maxItems - task.Count());
            } while (needToDo.Count() > 0);

            Task.WaitAll(task.ToArray());
        }
Пример #2
0
        private void _openMainPage(SelenoidStateModel selenoidState, ScraperWinWinStateModel state)
        {
            var url         = $"https://www.winwin.co.il/RealEstate/ForRent/Search/SearchResults/RealEstatePage.aspx?search=8bdb5277c594afddcf9414e7541fd518";
            var hasError    = false;
            var tryCountMax = 20;
            var indexTry    = 0;

            do
            {
                try
                {
                    hasError = false;

                    selenoidState.WindowMain.Navigate().GoToUrl(url);

                    selenoidState.WaitMain.Until(ExpectedConditions.ElementIsVisible(By.Id("carAreasDV")));
                }
                catch
                {
                    indexTry++;
                    if (indexTry < tryCountMax)
                    {
                        hasError = true;
                        _initSelenoidBase(selenoidState, state);
                        Thread.Sleep(1000 * 5);
                    }
                }
            } while (hasError);
        }
Пример #3
0
        private async Task ScrapePhase2_GenerateListItemsAsync(ScraperWinWinStateModel state)
        {
            SetWorkPhaseBase("GenerateListItems", state);

            var listFilePages = _loadListPages(state);
            var listItems     = await _loadListItemsAsync(state);

            foreach (var filePage in listFilePages)
            {
                var filename       = $"{filePage.FullName}";
                var listShortItems = JsonConvert.DeserializeObject <List <ShortItemDtoModel> >(await File.ReadAllTextAsync(filename));

                foreach (var shortItem in listShortItems)
                {
                    if (!listItems.ContainsKey(shortItem.ItemId))
                    {
                        shortItem.Done = false;
                        listItems.Add(shortItem.ItemId, shortItem);
                    }
                }
            }

            await _saveListItemsAsync(listItems, state);

            LogDone(state);
        }
Пример #4
0
        private void _winWinScrapeThenSaveStore(bool isNew = false, bool needScrape = true)
        {
            _log($"Start WinWinScrapeThenSaveStore (isNew={isNew})");

            var state = new ScraperWinWinStateModel()
            {
                IsNew = isNew,
            };

            var scraper = new ScraperWinWin(state);

            if (needScrape)
            {
                scraper.Scrape();
            }

            var dataOfScrape = scraper.GetDomainModel();

            var excelService = new ExcelWinWinService(state);

            var excelData = excelService.CreateExcel(dataOfScrape);

            var pathToFile = excelService.SaveToFile(excelData);

            var archive = new ArchiveRepository();

            archive.Save(pathToFile, state.TypeScraper);

            _log($"End WinWinScrapeThenSaveStore (isNew={isNew}), Spent time {_calcSpentTime2String(state)}");
        }
Пример #5
0
        private async Task ScrapePhase3Async(ScraperWinWinStateModel state)
        {
            SetWorkPhaseBase("DownloadItems", state);

            await _scrapePhase3Inner(state);

            LogDone(state);
        }
Пример #6
0
        private bool _isDownloadedItem(ShortItemDtoModel shortItem, ScraperWinWinStateModel state)
        {
            var filename = $"{state.ItemsPath}/{shortItem.ItemId}.json";

            var result = File.Exists(filename);

            return(result);
        }
Пример #7
0
        private async Task _saveItemDtoAsync(AdItemWinWinDtoModel item, ScraperWinWinStateModel state)
        {
            var filename = $"{state.ItemsPath}/{item.ItemId}.json";

            await File.WriteAllTextAsync($"{filename}", JsonConvert.SerializeObject(item, Newtonsoft.Json.Formatting.Indented));

            _log($"Save item {item.ItemId}, filename:{filename}");
        }
Пример #8
0
        private int _statusWorkspace_AmountItemsFromPages(ScraperWinWinStateModel state)
        {
            var list = _statusWorkspace_AmountItemsFromPages_GetItems(state);

            //var dubs = list.GroupBy(x => x.Id, StringComparer.OrdinalIgnoreCase).ToDictionary(x => x.First().Id, x => x.First().Done);

            return(list.Count());
        }
Пример #9
0
        private void _saveScraperConfig(ScraperWinWinConfigModel config, ScraperWinWinStateModel state)
        {
            var configFilename = state.ConfigFilename;

            File.WriteAllText(configFilename, JsonConvert.SerializeObject(config, Newtonsoft.Json.Formatting.Indented));

            _log($"Save config:{configFilename} is done");
        }
Пример #10
0
        private async Task ScrapePhase1Async(ScraperWinWinStateModel state)
        {
            SetWorkPhaseBase("Phase-1", state);

            await _scrapePhase1InnerAsync(state);

            LogDone(state);
        }
Пример #11
0
        private void _saveListPage(Dictionary <int, bool> list, ScraperWinWinStateModel state)
        {
            var filename = state.ListPagesFilename;

            File.WriteAllText($"{filename}", JsonConvert.SerializeObject(list, Newtonsoft.Json.Formatting.Indented));

            _log($"Saved list-page:{state.ListPagesFilename}");
        }
Пример #12
0
        private void _saveListRegions(List <RegionModel> list, ScraperWinWinStateModel state)
        {
            var filename = $"{state.ListRegionsFilename}";

            File.WriteAllText($"{filename}", JsonConvert.SerializeObject(list, Newtonsoft.Json.Formatting.Indented));

            _log($"Save list-regions, filename:{filename}");
        }
Пример #13
0
        private ShortItemDtoModel _loadShortItems(string itemId, ScraperWinWinStateModel state)
        {
            var filename = $"{state.ItemsPath}/{itemId}.json";

            var shortItem = JsonConvert.DeserializeObject <ShortItemDtoModel>(File.ReadAllText(filename));

            return(shortItem);
        }
Пример #14
0
        private int _getAmountPages(ScraperWinWinStateModel state)
        {
            var result = 0;

            //var page = _loadPageAsync(1, state).Result;
            //result = _detectLastNumPage(page);

            return(result);
        }
Пример #15
0
        private void _regErrorScrape_ListItemIds(List <string> list, ScraperWinWinStateModel state)
        {
            var filename = state.LogErrorFilename;
            var message  = $"Errors itemIds:{string.Join(",", list)}\r\n";

            File.AppendAllText(filename, message);

            _log($"Save errors");
        }
Пример #16
0
        private async Task _scrapePhase2Inner(ScraperWinWinStateModel state)
        {
            var listRegions = await _loadListRegionsAsync(state);

            foreach (var region in listRegions)
            {
                _downloadRegionPages(region, state);
            }
        }
Пример #17
0
        private List <ShortItemDtoModel> _parsePage(HtmlDocument document, ScraperWinWinStateModel state)
        {
            var result = new List <ShortItemDtoModel>();

            result.AddRange(_getItemShortFromNodes(document.DocumentNode.CssSelect(".paid")));

            result.AddRange(_getItemShortFromNodes(document.DocumentNode.CssSelect(".TitleData")));

            return(result);
        }
Пример #18
0
        private void _closeSelenoidBase(SelenoidStateModel selenoidState, ScraperWinWinStateModel state)
        {
            if (selenoidState.WindowMain != null)
            {
                selenoidState.WindowMain.Quit();
                //_windowMain.Close();

                _log($"Close Selenoid Service done");
            }
        }
Пример #19
0
        private void _downloadRegionPages(RegionModel region, ScraperWinWinStateModel state)
        {
            var listTasks = new List <Task <bool> >();

            foreach (var page in Enumerable.Range(1, region.AmountPages))
            {
                listTasks.Add(Task.Run(() => _downloadRegionPageAsync(page, region, state)));
            }

            Task.WaitAll(listTasks.ToArray());
        }
Пример #20
0
        public ScraperWinWin(ScraperWinWinStateModel state = null)
        {
            if (state is null)
            {
                state = new ScraperWinWinStateModel();
            }

            _state = state;

            //_config = _loadScraperConfig(state);
        }
Пример #21
0
        private List <ShortItemDtoModel> _loadShortItemsFromPages(ScraperWinWinStateModel state)
        {
            var result = new List <ShortItemDtoModel>();
            var path   = new DirectoryInfo(state.PagesPath);

            foreach (var file in path.GetFiles())
            {
                result.AddRange(JsonConvert.DeserializeObject <List <ShortItemDtoModel> >(File.ReadAllText(file.FullName)));
            }

            return(result);
        }
Пример #22
0
        private async Task <AdItemWinWinDtoModel> _loadItemDtoAsync(string file, ScraperWinWinStateModel state)
        {
            var filename = $"{file}";

            var itemDto = JsonConvert.DeserializeObject <AdItemWinWinDtoModel>(await File.ReadAllTextAsync(filename));

            var filenameShort = Path.GetFileName(filename);

            _log($"Load itemDto from filename:{filenameShort}");

            return(itemDto);
        }
Пример #23
0
        private async Task <List <RegionModel> > _loadListRegionsAsync(ScraperWinWinStateModel state)
        {
            var list     = new List <RegionModel>();
            var filename = $"{state.ListRegionsFilename}";

            if (File.Exists(filename))
            {
                list = JsonConvert.DeserializeObject <List <RegionModel> >(await File.ReadAllTextAsync(filename));
            }

            _log($"Load list-regions is done (file:{filename})");

            return(list);
        }
Пример #24
0
        private void _generateNewListPages(ScraperWinWinStateModel state)
        {
            _log($"Generate new list-pages");

            var amountPages = _getAmountPages(state);

            var listPages = new Dictionary <int, bool>();

            foreach (var page in Enumerable.Range(1, amountPages))
            {
                listPages.Add(page, false);
            }

            _log($"Generated list-pages is done");

            _saveListPage(listPages, state);
        }
Пример #25
0
        private ScraperWinWinConfigModel _loadScraperConfig(ScraperWinWinStateModel state)
        {
            ScraperWinWinConfigModel result = null;
            var filename = state.ConfigFilename;

            if (File.Exists(filename))
            {
                result = JsonConvert.DeserializeObject <ScraperWinWinConfigModel>(File.ReadAllText(filename));
            }
            else
            {
                result = new ScraperWinWinConfigModel();
                _saveScraperConfig(result, state);
            }

            return(result);
        }
Пример #26
0
        private List <ItemTest> _statusWorkspace_AmountItemsFromPages_GetItems(ScraperWinWinStateModel state)
        {
            var listPages = _statusWorkspace_AmountPages_GetFilesBase(state);

            var totalItems = 0;
            var list       = new List <ItemTest>();

            foreach (var page in listPages)
            {
                var filename  = page.FullName;
                var pageData  = JsonConvert.DeserializeObject <List <ShortItemDtoModel> >(File.ReadAllText(filename));
                var listItems = pageData.Select(x => new ItemTest()
                {
                    Id = x.ItemId, Done = false
                }).ToList();
                list.AddRange(listItems);
                totalItems += listItems.Count;
            }

            return(list);
        }
Пример #27
0
        private async Task <bool> _downloadItemAsync(ShortItemDtoModel shortItem, ScraperWinWinStateModel state)
        {
            var result = false;

            try
            {
                var itemPage = await _loadItemPageAsync(shortItem, state);

                var itemDto = await _parseItemPageAsync(itemPage, shortItem);

                await _saveItemDtoAsync(itemDto, state);

                result = true;
            }
            catch (Exception exception)
            {
                _log($"Error-z2. {exception.Message}");
            }

            return(result);
        }
Пример #28
0
        static void Main(string[] args)
        {
            Thread.CurrentThread.CurrentCulture = new CultureInfo("en-US");

            var state = new ScraperWinWinStateModel()
            {
                IsNew = false,
            };

            var scraper = new ScraperWinWin(state);

            UpdateRepository();

            //Scrape(scraper);

            //GetExcelFile(scraper);

            //PrintSaveStatus(scraper);

            var itemDto = scraper.GetItemDtoAsync("4389448").Result;
        }
Пример #29
0
        private async Task <HtmlDocument> _loadItemPageAsync(ShortItemDtoModel shortItem, ScraperWinWinStateModel state)
        {
            var itemId = shortItem.ItemId;

            _log($"Loading item-id:{itemId}");
            var          url    = $"https://www.winwin.co.il/RealEstate/ForRent/Ads/RealEstateAds,{itemId}.aspx";
            var          webGet = new HtmlWeb();
            HtmlDocument result = null;

            try
            {
                result = await webGet.LoadFromWebAsync(url);
            }
            catch (Exception exception)
            {
                _log($"Error-z3. {exception.Message}");
            }

            //_log($"Load item-id:{itemId} is done");

            return(result);
        }
Пример #30
0
        private async Task <Dictionary <string, ShortItemDtoModel> > _loadListItemsAsync(ScraperWinWinStateModel state)
        {
            var listItems = new Dictionary <string, ShortItemDtoModel>();

            var filename = $"{state.ListItemsFilename}";

            if (File.Exists(filename))
            {
                listItems = JsonConvert.DeserializeObject <Dictionary <string, ShortItemDtoModel> >(await File.ReadAllTextAsync(filename));
            }
            else
            {
                await _saveListItemsAsync(listItems, state);
            }

            return(listItems);
        }