Exemplo n.º 1
0
        public UsedCarViewModel CreateFrom(UsedCarModel model)
        {
            int yearTemp;

            return(new UsedCarViewModel()
            {
                Year = int.TryParse(model.Year, out yearTemp) ? yearTemp : default(int),
                Brand = model.Brand,
                Model = model.Model,
                Body = model.Body,
                Gearbox = model.Gearbox,
                Fuel = model.Fuel,
                Mileage = model.Mileage,
                EngineCapacity = model.EngineCapacity,
                Price = model.Price
            });
        }
        public async Task <IActionResult> Run(
            [HttpTrigger(AuthorizationLevel.Function, "post", Route = null)] HttpRequest req,
            ILogger log)
        {
            log.LogInformation("C# HTTP trigger function processed a request.");
            string requestBody = await new StreamReader(req.Body).ReadToEndAsync();
            UsedCarPricePrediction prediction = null;

            try
            {
                UsedCarModel   data    = JsonConvert.DeserializeObject <UsedCarModel>(requestBody);
                UsedCarMlModel mlModel = data.ToMlModel();
                prediction = _predictionEnginePool.Predict(modelName: "UsedCarsPricePredictionModel", example: mlModel);
            }
            catch (Exception e)
            {
                log.LogError(e, $"Error trying to estimate the price for {requestBody}");
            }

            return(new OkObjectResult(prediction?.Price));
        }
        public override UsedCarModel ScrapeAdvert(UsedCarModel usedCar)
        {
            HttpStatusCode status = HttpStatusCode.NotFound;
            string         html   = HttpUtils.DownloadPage(usedCar.Url, ref status);

            if (status == HttpStatusCode.OK)
            {
                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.OptionFixNestedTags = true;
                htmlDoc.LoadHtml(html);

                // ParseErrors is an ArrayList containing any errors from the Load statement
                if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
                {
                    Log.Debug("Error trying to scrape " + usedCar.Url);
                    foreach (var error in htmlDoc.ParseErrors)
                    {
                        Log.Debug(error.ToString());
                    }
                }

                usedCar = ParseDocument(usedCar, htmlDoc, html);

                if (htmlDoc.DocumentNode != null)
                {
                    usedCar.Scraped = (int)ProcessingStatus.Processed;
                }
            }
            else
            {
                usedCar.Scraped = (int)ProcessingStatus.Invalid;
            }

            usedCar.LastModified = DateTime.Now;

            Interlocked.Increment(ref Program.UrlsScraped);

            return(usedCar);
        }
        public static void ScrapeAdverts(Scraper scraper)
        {
            using (UsedCarsDbContext db = new UsedCarsDbContext())
            {
                while (true)
                {
                    List <UsedCarModel> cars = db.Usedcars.Where(c => c.Scraped == (int)ProcessingStatus.Unprocessed && scraper.BaseUrls.Any(u => c.Url.Contains(u)))
                                               .Take(1000).ToList();

                    if (cars == null || cars.Count == 0)
                    {
                        break;
                    }

                    for (int i = 0; i < cars.Count; i++)
                    {
                        UsedCarModel tempCar = cars.ElementAt(i);

                        scraper.ScrapeAdvert(tempCar);
                    }
                    db.SaveChanges();
                }
            }
        }
        public override UsedCarModel ScrapeAdvert(UsedCarModel usedCar)
        {
            HttpStatusCode status = HttpStatusCode.NotFound;
            string         html   = HttpUtils.DownloadPage(usedCar.Url, ref status);

            if (status == HttpStatusCode.OK)
            {
                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.OptionFixNestedTags = true;
                htmlDoc.LoadHtml(html);

                if (htmlDoc.DocumentNode != null)
                {
                    try
                    {
                        usedCar.Title = htmlDoc.DocumentNode.SelectSingleNode("//h1")?.FirstChild.InnerText;
                        usedCar.Title = HtmlUtils.SanitizeString(usedCar.Title);
                        var descriere = htmlDoc.DocumentNode.Descendants().Where(n => n.Name == "div" && n.HasClass("offer-description__description")).FirstOrDefault()?.InnerText;
                        usedCar.Description = HtmlUtils.SanitizeString(descriere);

                        //Features
                        usedCar.OferitDe = ScrapeAdvertFeature("Oferit de", htmlDoc);
                        usedCar.Model    = ScrapeAdvertFeature("Model", htmlDoc);
                        usedCar.Brand    = ScrapeAdvertFeature("Brand", htmlDoc);
                        usedCar.Year     = ScrapeAdvertFeature("Anul fabricatiei", htmlDoc);
                        usedCar.Fuel     = ScrapeAdvertFeature("Fuel", htmlDoc);
                        usedCar.Gearbox  = ScrapeAdvertFeature("Cutie de viteze", htmlDoc);
                        usedCar.Body     = ScrapeAdvertFeature("Body", htmlDoc);
                        usedCar.Color    = ScrapeAdvertFeature("Color", htmlDoc);
                        usedCar.Stare    = FixCarState(ScrapeAdvertFeature("Stare", htmlDoc));

                        float capacitateMotorTemp;
                        bool  capacitateWasParsed = float.TryParse(ScrapeAdvertFeature("Capacitate cilindrica", htmlDoc, removeNonNumericCharacters: true, capacitateMotor: true), out capacitateMotorTemp);
                        if (capacitateWasParsed)
                        {
                            usedCar.EngineCapacity = capacitateMotorTemp;
                        }

                        float rulajTemp;
                        bool  rulajWasParsed = float.TryParse(ScrapeAdvertFeature("Km", htmlDoc, removeNonNumericCharacters: true), out rulajTemp);
                        if (rulajWasParsed)
                        {
                            usedCar.Mileage = rulajTemp;
                        }

                        //Price
                        string pret = htmlDoc.DocumentNode.Descendants()
                                      .Where(n => n.Name == "span" && n.HasClass("offer-price__number"))
                                      .FirstOrDefault()?.InnerText;
                        pret = HtmlUtils.RemoveNonNumericCharacters(pret);
                        float pretTemp;
                        bool  pretWasParsed = float.TryParse(pret, out pretTemp);
                        if (pretWasParsed)
                        {
                            usedCar.Price = pretTemp;
                        }

                        usedCar.Scraped = (int)ProcessingStatus.Processed;
                    }
                    catch (Exception e)
                    {
                        Log.Debug(e, "Error trying to scrape " + usedCar.Url);
                        return(usedCar);
                    }
                }
            }
            else
            {
                usedCar.Scraped = (int)ProcessingStatus.Invalid;
            }
            usedCar.LastModified = DateTime.Now;

            Interlocked.Increment(ref Program.UrlsScraped);

            return(usedCar);
        }
        private UsedCarModel ParseDocument(UsedCarModel usedCar, HtmlDocument htmlDoc, string html)
        {
            try
            {
                var pret = htmlDoc.DocumentNode.Descendants("strong").Where(d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("pricelabel__value")).FirstOrDefault()?.InnerText;
                if (pret != null)
                {
                    float pretParsed;
                    bool  parsed = float.TryParse(HtmlUtils.RemoveNonNumericCharacters(pret), out pretParsed);
                    if (parsed)
                    {
                        usedCar.Price = pretParsed;
                    }
                }

                //an fabricatie
                var anDeFabricatie = GetDetail(htmlDoc, "An de fabricatie");
                usedCar.Year = anDeFabricatie;

                usedCar.Title       = HtmlUtils.RemoveNewLineCharacters(htmlDoc.DocumentNode.SelectSingleNode("//h1")?.InnerText);
                usedCar.Description = HtmlUtils.RemoveNewLineCharacters(htmlDoc.GetElementbyId("textContent")?.InnerText);

                var oferitDe = GetDetail(htmlDoc, "Oferit de");
                usedCar.OferitDe = oferitDe;

                //model
                var model = GetDetail(htmlDoc, "Model");
                usedCar.Model = model;

                //combustibil
                var combustibil = GetDetail(htmlDoc, "Fuel");
                usedCar.Fuel = combustibil = GetDetail(htmlDoc, "Fuel");



                //caroserie
                var caroserie = GetDetail(htmlDoc, "Body");
                usedCar.Body = caroserie != null ? caroserie : null;

                //stare
                var stare = GetDetail(htmlDoc, "Stare");
                usedCar.Stare = stare;

                //marca
                var marca = GetDetail(htmlDoc, "Brand");
                usedCar.Brand = marca;

                //culoare
                var culoare = GetDetail(htmlDoc, "Color");
                usedCar.Color = culoare;

                //Cutie de viteze
                var cutie = GetDetail(htmlDoc, "Cutie de viteze");
                usedCar.Gearbox = cutie;

                //Turnover
                var rulaj = GetDetail(htmlDoc, "Turnover", true);
                if (rulaj != null)
                {
                    usedCar.Mileage = float.Parse(rulaj);
                }

                //Capacitate motor
                var capacitate = GetDetail(htmlDoc, "Capacitate motor", true);
                if (capacitate != null)
                {
                    usedCar.EngineCapacity = float.Parse(capacitate);
                }

                return(usedCar);
            }
            catch (Exception e)
            {
                Console.WriteLine("Error trying to scrape " + usedCar.Url);
                Console.WriteLine(e.Message);
                Log.Debug(e, "Error trying to scrape " + usedCar.Url);
                Log.Information("HTML *************");
                Log.Information(html);
                Log.Information(Environment.NewLine);
                Log.Information(Environment.NewLine);

                return(usedCar);
            }
        }
Exemplo n.º 7
0
 public abstract UsedCarModel ScrapeAdvert(UsedCarModel car);
        public ActionResult UsedCarInventory()
        {
            IRepository  rep = new Repository();
            UsedCarModel uc  = new UsedCarModel();
            DataTable    dt  = rep.getUsedCarsData("none", "none");
            //for full list:-
            List <CarsData> carsdata = new List <CarsData>();

            carsdata    = DBList.CovertDataTableToList <CarsData>(dt);
            uc.carsdata = carsdata;
            //for car years filter:-
            List <CarYear> caryears = new List <CarYear>();

            caryears = DBList.CovertDataTableToList <CarYear>(dt);
            List <string> caryearsstring = new List <string>();

            foreach (var item in caryears)
            {
                string items;
                items = item.Year.ToString();
                caryearsstring.Add(items);
            }
            List <int> caryearsint = new List <int>();

            caryearsint = caryearsstring.Select(int.Parse).ToList();
            List <int> distinctcaryears = new List <int>();

            distinctcaryears = caryearsint.Distinct().ToList();
            distinctcaryears.Sort();
            uc.caryears = new List <SelectListItem>();
            uc.caryears.Add(new SelectListItem()
            {
                Text = "None", Value = "none"
            });
            foreach (var item in distinctcaryears)
            {
                SelectListItem si = new SelectListItem()
                {
                    Value = item.ToString(),
                    Text  = item.ToString()
                };
                uc.caryears.Add(si);
            }
            //for makes filter:-
            List <CarMake> carmake = new List <CarMake>();

            carmake = DBList.CovertDataTableToList <CarMake>(dt);
            List <string> carmakestring = new List <string>();

            foreach (var item in carmake)
            {
                string items;
                items = item.Make.ToString();
                carmakestring.Add(items);
            }
            List <string> distinctcarmake = new List <string>();

            distinctcarmake = carmakestring.Distinct().ToList();
            distinctcarmake.Sort();
            uc.carmakes = new List <SelectListItem>();
            uc.carmakes.Add(new SelectListItem()
            {
                Text = "None", Value = "none"
            });
            foreach (var item in distinctcarmake)
            {
                SelectListItem si = new SelectListItem()
                {
                    Value = item,
                    Text  = item
                };
                uc.carmakes.Add(si);
            }

            return(View(uc));
        }