public IExtractionResults Read(IExtractionArguments arguments, string sourceSystem)
        {
            IExtractionResults results = new ExtractionResults();

            var matchingExtractionResult = _dataContext.ExtractionResultsViews.Where(sqlResult => sqlResult.MakeName == arguments.Make &&
                sqlResult.ModelName == arguments.Model &&
                sqlResult.From == arguments.From &&
                sqlResult.To == arguments.To &&
                sqlResult.SourceSystem == sourceSystem);

            if (!matchingExtractionResult.Any()) return null;
            var mostRecentExtractionResult = matchingExtractionResult.OrderByDescending(p => p.Id).First();

            var vehicles = _dataContext.Vehicles.Where(p => p.ExtractionResults_FK == mostRecentExtractionResult.Id);
            foreach (var vehicle in vehicles)
            {
                results.Vehicles.Add(new Core.Extraction.Vehicle()
                {
                    Make = mostRecentExtractionResult.MakeName,
                    Model = mostRecentExtractionResult.ModelName,
                    Milage = vehicle.Milage.ToString(),
                    Price = Convert.ToDouble(vehicle.Price),
                    Title = vehicle.Title,
                    Year = vehicle.Year
                });
            }

            return results;
        }
        public void Extract(IExtractionArguments args, IExtractionResults extractionResults)
        {
            if (extractionResults == null) throw new ArgumentNullException("extractionResults");

            _log.Debug("Extraction starting");
            extractionResults.Start();

            var firstPageUrl = _pageScraper.GetFirstPageUrl(args);
            HtmlDocument firstPage = _htmlWebWrapper.Load(firstPageUrl.OriginalString);

            extractionResults.Vehicles.AddRange(_pageScraper.Scrape(args, firstPage));

            List<string> remainingUrls = _pageScraper.GetRemainingUrls(firstPage);

            Parallel.ForEach(remainingUrls, pagedUrl =>
            {
                var resultsPage = _htmlWebWrapper.Load(pagedUrl);

                lock (_lockObject)
                {
                    extractionResults.Vehicles.AddRange(_pageScraper.Scrape(args, resultsPage));
                }

            });

            extractionResults.Stop();
        }
예제 #3
0
        public Uri GetFirstPageUrl(IExtractionArguments extractionArgs)
        {
            //http://www.goo-net.com/usedcar/LEXUS__IS_F.html
            var uri = string.Format("{0}/{1}__{2}.html", GooNetArgumentBuilder.RootUrl, extractionArgs.Make,
                extractionArgs.Model);

            return new Uri(uri);
        }
        public Uri GetFirstPageUrl(IExtractionArguments args)
        {
            _firstPageUrl = new Uri(string.Format(_baseUri.OriginalString, args.Make, args.Model));
            string dateRange = string.Empty;
            for (int i = args.From; i <= args.To; i++)
            {
                dateRange += string.Format(yearString, i);
            }

            _firstPageUrl = new Uri(_firstPageUrl.OriginalString + dateRange + "search");
            return _firstPageUrl;
        }
        public void Init()
        {
            _htmlWrapper = MockRepository.GenerateMock<IHtmlWebWrapper>();
            _log = MockRepository.GenerateMock<ILog>();
            _extractionArgs = MockRepository.GenerateMock<IExtractionArguments>();
            _extractionResults = MockRepository.GenerateMock<IExtractionResults>();
            _pageScraper = MockRepository.GenerateMock<IAutoTraderZaPageScraper>();

            _pageScraper.Stub(p => p.Scrape(Arg<IExtractionArguments>.Is.Anything, Arg<HtmlDocument>.Is.Anything)).Return(new List<IVehicle>());
            _extractionResults.Stub(p => p.Vehicles).Return(new List<IVehicle>());

            _dummyUri = new Uri("http://dummyUrl.com");
            _dummyFirstDocument = new HtmlDocument();

            _extractorEngine = new AutoTraderExtractionEngine(_htmlWrapper, _log, _pageScraper);
        }
예제 #6
0
        public IList<IVehicle> Scrape(IExtractionArguments args, HtmlDocument page)
        {
            IList<IVehicle> vehicles = new List<IVehicle>();

            var nodes = page.DocumentNode.SelectNodes("//div[@class='box_item_detail section ']");
            var gooNodes = page.DocumentNode.SelectNodes("//div[@class='box_item_detail section no_goo_area']");

            if (nodes == null)
                nodes = gooNodes;
            else if (gooNodes != null)
            {
                foreach (var selectNode in gooNodes)
                {
                    nodes.Add(selectNode);
                }
            }

            foreach (var node in nodes)
            {
                Vehicle vehicle = new Vehicle();
                vehicle.Title =
                    node.SelectNodes("div/div[@class='heading_inner']")
                        .First()
                        .InnerText.Trim()
                        .Replace("&nbsp;", " ")
                        .Replace("\t", string.Empty)
                        .Replace("\n", string.Empty);
                vehicle.Make = args.Make;
                vehicle.Model = args.Model;
                vehicle.Milage = node.SelectNodes("div/div/table/tr/td[@class='w63']").First().InnerText;

                double price = 0;
                double.TryParse(node.SelectNodes("div/div/table/tr/td/div[@class='priceInfo']/p/em").First().InnerText,
                  out price);
                vehicle.Price = price;

                var dirtyYear = node.SelectNodes("div/div/table/tr/td[@class='w66']")[0].InnerText;
                dirtyYear = dirtyYear.Substring(0, dirtyYear.IndexOf("("));

                vehicle.Year = int.Parse(dirtyYear);
                vehicles.Add(vehicle);
            }

            return vehicles;
        }
        public IList<IVehicle> Scrape(IExtractionArguments args, HtmlDocument page)
        {
            var vehicleList = new List<IVehicle>();
            var searchResults = page.DocumentNode.SelectNodes("//div[@class='searchResult   ']");
            if (searchResults == null)
                return vehicleList;

            foreach (var searchResult in searchResults)
            {
                IVehicle vehicle = new Vehicle();
                vehicle.Make = args.Make;
                vehicle.Model = args.Model;
                vehicle.Title = searchResult.SelectSingleNode(".//h2[@class='serpTitle']").InnerText.Trim();

                vehicle.Price = GetVehiclePrice(searchResult);
                vehicle.Milage = GetMilage(searchResult);
                vehicle.Year = GetAge(searchResult);

                vehicleList.Add(vehicle);
                _log.DebugFormat("Added {0}", vehicle);
            }
            return vehicleList;
        }
 public void Init()
 {
     _log = MockRepository.GenerateMock<ILog>();
     _extractionArguments = MockRepository.GenerateMock<IExtractionArguments>();
     _pageScraper = new AutoTraderZaPageScraper(_log);
 }
        public void Write(IExtractionArguments arguments, IExtractionResults extractionResults, string sourceSystem)
        {
            _log.DebugFormat("Writing extractionResults for {0}", extractionResults);

            var sourceSystemEntity = _dataContext.SourceSystems.FirstOrDefault(p => p.Name == sourceSystem);
            if (sourceSystemEntity == null)
                throw new ArgumentException("SourceSystem does not exist in the database", "sourceSystem");

            var makeEntity = _dataContext.Makes.FirstOrDefault(p => p.MakeName == arguments.Make && p.SourceSystem == sourceSystemEntity);
            if (makeEntity == null)
                throw new ArgumentException("Make does not exist in the database", "arguments");

            var modelEntity = makeEntity.Models.FirstOrDefault(p => p.ModelName == arguments.Model);
            if (modelEntity == null)
                throw new ArgumentException("Model does not exist in the database", "arguments");

            var resultEntity = new ExtractionResult() { Model = modelEntity, From = arguments.From, To = arguments.To, ExtractionDateTime = DateTime.Now };

            _dataContext.ExtractionResults.InsertOnSubmit(resultEntity);
            _dataContext.SubmitChanges();

            foreach (var vehicle in extractionResults.Vehicles)
            {
                _dataContext.Vehicles.InsertOnSubmit(new Vehicle()
                {
                    ExtractionResult = resultEntity,
                    Milage = SafeInt( vehicle.Milage),
                    Year = vehicle.Year,
                    Price = Convert.ToDecimal( vehicle.Price),
                    Title = vehicle.Title
                });
            }

            _dataContext.SubmitChanges();
        }
 public bool TryRead(IExtractionArguments arguments, out IExtractionResults results)
 {
     throw new NotImplementedException();
 }