Exemple #1
0
        private async Task <Dictionary <int, bool> > _loadListPreLoadsAsync(ScraperYad2StateModel state)
        {
            Dictionary <int, bool> result = null;

            _log($"Load list-pre-loads starting");
            var filename = $"{state.PathListPreLoads}";

            if (File.Exists(filename))
            {
                try
                {
                    result = JsonConvert.DeserializeObject <Dictionary <int, bool> >(await File.ReadAllTextAsync(filename));
                    _log($"Load list-pre-loads is complete");
                }
                catch (Exception exception)
                {
                    _log($"Load list-pre-loads is fail");
                    _log($"Error L2. {exception.Message}");
                }
            }
            else
            {
                _log($"no file {filename}");
            }

            return(result);
        }
        private void _yad2ScrapeThenSaveStore(bool isNew = false, bool needScrape = true)
        {
            _log($"Start Yad2ScrapeThenSaveStore (isNew={isNew})");

            var state = new ScraperYad2StateModel()
            {
                IsNew = isNew,
            };

            var scraper = new ScraperYad2(state);

            if (needScrape)
            {
                scraper.Scrape();
            }

            var dataOfScrape = scraper.GetDomainModel();

            var excelService = new ExcelYad2Service(state);

            var excelData = excelService.CreateExcel(dataOfScrape);

            var pathToFile = excelService.SaveToFile(excelData);

            var archive = new ArchiveRepository();

            archive.Save(pathToFile, state.TypeScraper);

            _log($"End Yad2ScrapeThenSaveStore (isNew={isNew}), Spent time {_calcSpentTime2String(state)}");
        }
Exemple #3
0
        private async Task <Dictionary <string, bool> > _loadListItemsAsync(ScraperYad2StateModel state)
        {
            Dictionary <string, bool> result = null;

            _log($"Load list-items");

            try
            {
                var filename = state.PathListItems;
                if (File.Exists(filename))
                {
                    result = JsonConvert.DeserializeObject <Dictionary <string, bool> >(await File.ReadAllTextAsync(filename));
                    _log($"Load list-items is completed");
                }
                else
                {
                    throw new Exception();
                }
            }
            catch (Exception exception)
            {
                _log($"Load list-items is fail");
                _log($"Error L1. {exception.Message}");
            }

            return(result);
        }
Exemple #4
0
        private async Task <bool> _downloadPreLoadAsync(int page, ScraperYad2StateModel state)
        {
            var url      = $"https://www.yad2.co.il/api/pre-load/getFeedIndex/realestate/rent?page={page}&compact-req=1";
            var filename = $"{state.PathPreLoads}/page-{page}.json";

            var isDoneGetPage = await _downloadFilenameAsync(url, filename);

            return(isDoneGetPage);
        }
Exemple #5
0
        private async Task <bool> _downloadItemAsync(string item, ScraperYad2StateModel state)
        {
            var url      = $"https://www.yad2.co.il/api/item/{item}";
            var filename = $"{state.ItemsPath}/{item}.json";

            var isDoneGetObject = await _downloadFilenameAsync(url, filename);

            return(isDoneGetObject);
        }
Exemple #6
0
        private int _statusWorkspace_AmountItemUniquesFromPages(ScraperYad2StateModel state)
        {
            var list = _statusWorkspace_AmountItemsFromPages_GetItems(state);

            var dups = list.GroupBy(x => x.Id, StringComparer.OrdinalIgnoreCase).ToDictionary(x => x.First()?.Id ?? "NULL", x => x.First().Done);

            var result = dups.Count() - dups.Where(x => x.Key == "NULL").Count();

            return(result);
        }
Exemple #7
0
        private void _saveConfig(ScraperYad2ConfigModel config, ScraperYad2StateModel state)
        {
            var configFilename = state.ConfigFilename;

            _log($"Start saving config:{configFilename}");

            File.WriteAllText(configFilename, JsonConvert.SerializeObject(config, Newtonsoft.Json.Formatting.Indented));

            _log($"Save config:{configFilename} is done");
        }
Exemple #8
0
        private async Task _scrapePhase1_GenerateListPreLoadsAsync(ScraperYad2StateModel state)
        {
            SetWorkPhaseBase($"GenerateListPreLoads", state);
            var listPreLoads = await _loadListPreLoadsAsync(state);

            if (listPreLoads == null)
            {
                _log($"Generate new list-pre-loads");
                var tryCount    = 1;
                var tryCountMax = 10;
                var doNeedTry   = false;

                do
                {
                    doNeedTry = false;
                    var isDoneGetPage1 = await _downloadPreLoadAsync(page : 1, state : state);

                    if (isDoneGetPage1)
                    {
                        var page1Dto = _getPreloadFromFilestore(page: 1, state: state);
                        var lastPage = int.Parse(page1Dto.Pagination.LastPage);

                        _log($"Detect {lastPage} pages");

                        var list = new Dictionary <int, bool>();

                        foreach (var i in Enumerable.Range(1, lastPage))
                        {
                            list.Add(i, false);
                        }
                        list[1] = true;

                        await _saveListPreLoadsAsync(list, state);
                    }
                    else
                    {
                        tryCount++;
                        if (tryCount <= tryCountMax)
                        {
                            doNeedTry = true;
                            _log($"Try count {tryCount}/{tryCountMax}. Pause 10 sec.");
                            Thread.Sleep(TimeSpan.FromSeconds(3));
                        }
                        else
                        {
                            _log($"Tred {tryCount} download PreLoad Page1. Stop scrap process.");
                        }
                    }
                } while (doNeedTry);
            }
            else
            {
                _log($"Generate list-pre-loads is missing");
            }
        }
Exemple #9
0
        public ScraperYad2(ScraperYad2StateModel state = null)
        {
            if (state is null)
            {
                state = new ScraperYad2StateModel();
            }

            _state = state;

            _config = _loadConfig((ScraperYad2StateModel)_state);
        }
Exemple #10
0
 private static void _parseParams(string[] args, ScraperYad2StateModel state)
 {
     foreach (var arg in args)
     {
         switch (arg.ToLower())
         {
         case "-new":
             state.IsNew = true;
             break;
         }
     }
 }
Exemple #11
0
        private async Task _scrapePhase5_GenerateListItemsContactsAsync(ScraperYad2StateModel state)
        {
            SetWorkPhaseBase($"GenerateListItemsContacts", state);
            var listItemsContacts = await _loadListItemsContactsAsync(state);

            if (listItemsContacts == null)
            {
                _log($"Generate new list items-contacts");

                listItemsContacts = new Dictionary <string, bool>();

                var files      = Directory.GetFiles(state.PathPreLoads);
                var dublicates = new List <string>();

                foreach (var file in files)
                {
                    var fileData = File.ReadAllText(file);

                    var data = JsonConvert.DeserializeObject <PreloadDtoModel>(fileData);

                    var items = data.Feed.feed_items.Where(x => !string.IsNullOrEmpty(x.id)).Select(x => x.id).ToList();

                    foreach (var item in items)
                    {
                        if (listItemsContacts.ContainsKey(item))
                        {
                            dublicates.Add(item);
                        }
                        else
                        {
                            listItemsContacts.Add(item, false);
                        }
                    }
                }

                _log($"Generate new list items-contacts is completed");

                _log($"Total uniq:{listItemsContacts.Count}, Dublicate:{dublicates.Count}, Total:{listItemsContacts.Count + dublicates.Count}");

                File.WriteAllText($"{state.ListItemsContactsDublicatesFilename}", JsonConvert.SerializeObject(dublicates, Formatting.Indented));

                await _saveListItemsContactsAsync(listItemsContacts, state);
            }
            else
            {
                _log($"Generate list items-contacts is missing");
            }
        }
Exemple #12
0
        private ScraperYad2ConfigModel _loadConfig(ScraperYad2StateModel state)
        {
            ScraperYad2ConfigModel result = null;
            var filename = state.ConfigFilename;

            if (File.Exists(filename))
            {
                result = JsonConvert.DeserializeObject <ScraperYad2ConfigModel>(File.ReadAllText(filename));
            }
            else
            {
                result = new ScraperYad2ConfigModel();
                _saveConfig(result, state);
            }

            return(result);
        }
Exemple #13
0
        private async Task <bool> _scrapePhase2_GetPreLoads(ScraperYad2StateModel state)
        {
            var result = true;

            SetWorkPhaseBase($"GetPreLoads", state);

            var listPreLoads = await _loadListPreLoadsAsync(state);

            if (listPreLoads != null)
            {
                await _scrapePhase2_DownloadsPreLoadsAsync(listPreLoads, state);
            }
            else
            {
                _log($"Fail load list-pre-loads");
                result = false;
            }

            return(result);
        }
Exemple #14
0
        private int _statusWorkspace_AmountItemsWithWrongDataFromPath(ScraperYad2StateModel state)
        {
            var listItems = _statusWorkspace_AmountItemsFromPath_GetFilesBase(state);
            var amountItemsWithWrongData = 0;

            foreach (var item in listItems)
            {
                try
                {
                    var itemObject = JsonConvert.DeserializeObject <Phase3ObjectDto>(File.ReadAllText(item.FullName));
                }
                catch (Exception exception)
                {
                    amountItemsWithWrongData++;
                    _log($"Error w1. {exception.Message}");
                }
            }

            return(amountItemsWithWrongData);
        }
Exemple #15
0
        private async Task <bool> _scrapePhase4_GetItems(ScraperYad2StateModel state)
        {
            var result = true;

            SetWorkPhaseBase($"GetItems", state);

            var listItems = await _loadListItemsAsync(state);

            if (listItems != null)
            {
                await _scrapePhase4_DownloadsItemsAsync(listItems, state);
            }
            else
            {
                _log($"Fail load list-items");
                result = false;
            }

            return(result);
        }
Exemple #16
0
        private async Task <bool> _scrapePhase6_GetItemsContacts(ScraperYad2StateModel state)
        {
            var result = true;

            SetWorkPhaseBase($"GetItemsContacts", state);

            var listItemsContacts = await _loadListItemsContactsAsync(state);

            if (listItemsContacts != null)
            {
                await _scrapePhase6_DownloadsItemsContactsAsync(listItemsContacts, state);
            }
            else
            {
                Console.WriteLine($"Fail load list-items");
                result = false;
            }

            return(result);
        }
Exemple #17
0
        private async Task <bool> _scrapingAsync(ScraperYad2StateModel state)
        {
            var result = false;

            try
            {
                await _scrapePhase1_GenerateListPreLoadsAsync(state);
                await _scrapePhase2_GetPreLoads(state);
                await _scrapePhase3_GenerateListItems(state);
                await _scrapePhase4_GetItems(state);
                await _scrapePhase5_GenerateListItemsContactsAsync(state);
                await _scrapePhase6_GetItemsContacts(state);

                result = true;
            }
            catch (Exception exception) {
                _log($"Erro-x1. {exception.Message} / {exception.StackTrace}");
            }

            return(result);
        }
Exemple #18
0
        private List <ItemTest> _statusWorkspace_AmountItemsFromPages_GetItems(ScraperYad2StateModel state)
        {
            var listPages = _statusWorkspace_AmountPages_GetFilesBase(state);

            var totalItems = 0;
            var list       = new List <ItemTest>();

            foreach (var page in listPages)
            {
                var filename  = page.FullName;
                var pageData  = JsonConvert.DeserializeObject <PreloadDtoModel>(File.ReadAllText(filename));
                var listItems = pageData.Feed.feed_items.Select(x => new ItemTest()
                {
                    Id = x.id, Done = false
                }).ToList();
                list.AddRange(listItems);
                totalItems += listItems.Count;
            }

            return(list);
        }
Exemple #19
0
        static void Main(string[] args)
        {
            Thread.CurrentThread.CurrentCulture = new CultureInfo("en-US");

            var state = new ScraperYad2StateModel()
            {
                IsNew = true,
            };

            var scraper = new ScraperYad2(state);

            UpdateRepository();

            //Scrape(scraper);

            //GetExcelFile(scraper);

            //PrintSaveStatus(scraper);

            //SaveDomainModel(scraper);

            //LoadDomainModelFromFile(scraper);
        }
Exemple #20
0
        private PreloadDtoModel _getPreloadFromFilestore(int page, ScraperYad2StateModel state)
        {
            var result = JsonConvert.DeserializeObject <PreloadDtoModel>(File.ReadAllText($"{state.PathPreLoads}/page-{page}.json"));

            return(result);
        }
Exemple #21
0
 private async Task _saveListItemsAsync(Dictionary <string, bool> list, ScraperYad2StateModel state)
 {
     _log($"Save {state.PathListItems}");
     await File.WriteAllTextAsync($"{state.PathListItems}", JsonConvert.SerializeObject(list, Formatting.Indented));
 }
Exemple #22
0
        private int _statusWorkspace_AmountItemsFromPages(ScraperYad2StateModel state)
        {
            var list = _statusWorkspace_AmountItemsFromPages_GetItems(state);

            return(list.Count());
        }
Exemple #23
0
        private async Task _saveListPreLoadsAsync(Dictionary <int, bool> list, ScraperYad2StateModel state)
        {
            _log($"Save list-pre-loads: {state.PathListPreLoads}");

            await File.WriteAllTextAsync($"{state.PathListPreLoads}", JsonConvert.SerializeObject(list, Formatting.Indented));
        }
Exemple #24
0
        private async Task <List <AdItemYad2DomainModel> > _scrapePhase7_GenerateDomainModelAsync(ScraperYad2StateModel state)
        {
            List <AdItemYad2DomainModel> result = null;

            var listItems = await _loadListItemsAsync(state) ?? new Dictionary <string, bool>();

            if (listItems.Count > 0)
            {
                result = new List <AdItemYad2DomainModel>();
            }
            var sb = new StringBuilder(1000);
            var indexParseItems = 0;

            foreach (var itemId in listItems)
            {
                try
                {
                    var key             = itemId.Key;
                    var itemDto         = JsonConvert.DeserializeObject <Phase3ObjectDto>(await File.ReadAllTextAsync($"{state.ItemsPath}/{key}.json"));
                    var itemContactsDto = JsonConvert.DeserializeObject <Phase3ObjectContactsDto>(await File.ReadAllTextAsync($"{state.PathItemsContacts}/item-contacts-{key}.json"));

                    var itemDomain = new AdItemYad2DomainModel().FromDto(itemDto, itemContactsDto);

                    result.Add(itemDomain);

                    sb.Append($",{itemId.Key}=ok");
                }
                catch (Exception exception)
                {
                    sb.Append($",{itemId.Key}=fail, {exception.Message}");
                }

                indexParseItems++;
                if (indexParseItems % 3000 == 0)
                {
                    _log($"Parsed {indexParseItems} items");
                }
            }

            _log($"Parsed items are {sb.ToString()}");

            return(result);
        }
Exemple #25
0
        private async Task _scrapePhase2_DownloadsPreLoadsAsync(Dictionary <int, bool> list, ScraperYad2StateModel state)
        {
            var hasError = false;
            var amount   = 0;

            Func <IEnumerable <int> > toDoAll = () => list.Where(x => x.Value == false).Select(x => x.Key);
            Func <IEnumerable <int> > toDone  = () => list.Where(x => x.Value == true).Select(x => x.Key);
            Action showStat = () => _log($"Downloads: done: {toDone().Count()}, balance: {toDoAll().Count()}, total: {list.Count}");

            showStat();

            var amountGetters = state.CountScrapers;

            if (toDoAll().Count() > 0)
            {
                _log($"Use {amountGetters} threads");
            }

            while (toDoAll().Count() > 0)
            {
                List <Task <bool> > arrayTask = new List <Task <bool> >();
                var listToDo = toDoAll().Take(amountGetters).ToList();

                foreach (var toDo in listToDo)
                {
                    arrayTask.Add(Task.Run(async() => await _downloadPreLoadAsync(toDo, state)));
                }

                await _saveListPreLoadsAsync(list, state);

                hasError = false;
                try
                {
                    Task.WaitAll(arrayTask.ToArray(), TimeSpan.FromMinutes(5));
                }
                catch (Exception exception)
                {
                    hasError = true;
                    _log($"Error D2. {exception.Message}");
                }

                var i = 0;
                if (!hasError)
                {
                    foreach (var task in arrayTask)
                    {
                        bool itemData = task.Result;
                        if (itemData)
                        {
                            list[listToDo.Skip(i).Take(1).FirstOrDefault()] = true;
                            amount++;
                        }
                        else
                        {
                            hasError = true;
                        }

                        i++;
                    }
                }

                if (hasError)
                {
                    _log($"Error pass page {listToDo.Skip(i).Take(1).FirstOrDefault()}");
                    _log($"Pause {state.CountWaitSecondForFailRequest} sec");
                    Thread.Sleep(TimeSpan.FromSeconds(state.CountWaitSecondForFailRequest));
                }

                showStat();
            }

            _log($"List pre-loads completed");
            await _saveListPreLoadsAsync(list, state);
        }