public IActionResult Today() { var uri = new Uri("https://www.finn.no/realestate/homes/search.html?location=0.20061&published=1&rows=9999"); var container = "<div class=\"unit flex align-items-stretch result-item\">"; var content = Browser.Fetch(uri); var parser = new TextParser(content) .Strip(TextParser.WhiteSpace) .Strip(TextParser.ScriptTags); var indices = parser.FindIndices(container); var partitions = parser.Partition(indices); var chunks = parser.Chunk(partitions) .Where(e => !new Regex(@"id=""promoted-[0-9]{3,16}""").Match(e).Success); var models = chunks.Aggregate(new List <RealtyModel>(), (accumulator, chunk) => { var realty = new RealtyParser().Parse(chunk); if (TryValidateModel(realty)) { accumulator.Add(realty); } else { Logger.LogError("Realty did not pass validation"); Logger.LogError(chunk); } return(accumulator); }); var documents = models.Select(e => AutoMapper.Mapper.Map <RealtyDocument>(e)); var todays = Repository.FindAny(RealtyRepository.FilterToday).ToList(); var newRealties = documents.Where(e => !todays.Any(item => item.RealtyId == e.RealtyId)); var duplicates = documents.Where(e => todays.Any(item => item.RealtyId == e.RealtyId)); if (newRealties.Count() > 0) { Repository.InsertMany(newRealties); } return(Json(new { total = documents.Count(), newRealties = newRealties.Count(), duplicateRealties = duplicates.Count(), newToday = todays.Count() + newRealties.Count(), })); }
public IActionResult Parse() { var file = Utilities.ReadFile(Environment.GetDataPath(), "today-180927.html"); var container = "<div class=\"unit flex align-items-stretch result-item\">"; var parser = new TextParser(file) .Strip(TextParser.WhiteSpace) .Strip(TextParser.ScriptTags); var indices = parser.FindIndices(container); var partitions = parser.Partition(indices); var chunks = parser.Chunk(partitions) .Where(e => !new Regex(@"id=""promoted-[0-9]{3,16}""").Match(e).Success); var models = chunks.Aggregate(new List <RealtyModel>(), (accumulator, chunk) => { var realty = new RealtyParser().Parse(chunk); if (TryValidateModel(realty)) { accumulator.Add(realty); } else { Logger.LogError("Realty did not pass validation"); Logger.LogError(chunk); } return(accumulator); }); var documents = models.Select(e => AutoMapper.Mapper.Map <RealtyDocument>(e)); var todays = Repository.FindAny(RealtyRepository.FilterToday).ToList(); var newRealties = documents.Where(e => !todays.Any(item => item.RealtyId == e.RealtyId)); var duplicates = documents.Where(e => todays.Any(item => item.RealtyId == e.RealtyId)); return(Json(new { total = documents.Count(), newRealties = newRealties.Count(), duplicateRealties = duplicates.Count(), newToday = todays.Count() + newRealties.Count(), documents = documents, })); }