Esempio n. 1
0
        public IActionResult Today()
        {
            var uri       = new Uri("https://www.finn.no/realestate/homes/search.html?location=0.20061&published=1&rows=9999");
            var container = "<div class=\"unit flex align-items-stretch result-item\">";
            var content   = Browser.Fetch(uri);

            var parser = new TextParser(content)
                         .Strip(TextParser.WhiteSpace)
                         .Strip(TextParser.ScriptTags);

            var indices    = parser.FindIndices(container);
            var partitions = parser.Partition(indices);

            var chunks = parser.Chunk(partitions)
                         .Where(e => !new Regex(@"id=""promoted-[0-9]{3,16}""").Match(e).Success);

            var models = chunks.Aggregate(new List <RealtyModel>(), (accumulator, chunk) => {
                var realty = new RealtyParser().Parse(chunk);

                if (TryValidateModel(realty))
                {
                    accumulator.Add(realty);
                }
                else
                {
                    Logger.LogError("Realty did not pass validation");
                    Logger.LogError(chunk);
                }

                return(accumulator);
            });

            var documents   = models.Select(e => AutoMapper.Mapper.Map <RealtyDocument>(e));
            var todays      = Repository.FindAny(RealtyRepository.FilterToday).ToList();
            var newRealties = documents.Where(e => !todays.Any(item => item.RealtyId == e.RealtyId));
            var duplicates  = documents.Where(e => todays.Any(item => item.RealtyId == e.RealtyId));

            if (newRealties.Count() > 0)
            {
                Repository.InsertMany(newRealties);
            }

            return(Json(new {
                total = documents.Count(),
                newRealties = newRealties.Count(),
                duplicateRealties = duplicates.Count(),
                newToday = todays.Count() + newRealties.Count(),
            }));
        }
Esempio n. 2
0
        public IActionResult Parse()
        {
            var file      = Utilities.ReadFile(Environment.GetDataPath(), "today-180927.html");
            var container = "<div class=\"unit flex align-items-stretch result-item\">";

            var parser = new TextParser(file)
                         .Strip(TextParser.WhiteSpace)
                         .Strip(TextParser.ScriptTags);

            var indices    = parser.FindIndices(container);
            var partitions = parser.Partition(indices);

            var chunks = parser.Chunk(partitions)
                         .Where(e => !new Regex(@"id=""promoted-[0-9]{3,16}""").Match(e).Success);

            var models = chunks.Aggregate(new List <RealtyModel>(), (accumulator, chunk) => {
                var realty = new RealtyParser().Parse(chunk);

                if (TryValidateModel(realty))
                {
                    accumulator.Add(realty);
                }
                else
                {
                    Logger.LogError("Realty did not pass validation");
                    Logger.LogError(chunk);
                }

                return(accumulator);
            });

            var documents = models.Select(e => AutoMapper.Mapper.Map <RealtyDocument>(e));

            var todays      = Repository.FindAny(RealtyRepository.FilterToday).ToList();
            var newRealties = documents.Where(e => !todays.Any(item => item.RealtyId == e.RealtyId));
            var duplicates  = documents.Where(e => todays.Any(item => item.RealtyId == e.RealtyId));

            return(Json(new {
                total = documents.Count(),
                newRealties = newRealties.Count(),
                duplicateRealties = duplicates.Count(),
                newToday = todays.Count() + newRealties.Count(),
                documents = documents,
            }));
        }