public IEnumerable <string> Get() { List <ScrapeInfo> scrapeInfos = ScrapeInfo.LoadScrapeInfos(); List <string> names = scrapeInfos.Select(x => x.Name).ToList(); return(names); }
public Uploader(ScrapeInfo scrapeInfo, DimensionInfo dimensionInfo, InspectionInfo inspectionInfo, AeiInfo aei) { this.scrapeInfo = scrapeInfo ?? throw new ArgumentNullException(nameof(scrapeInfo)); this.dimensionInfo = dimensionInfo ?? throw new ArgumentNullException(nameof(dimensionInfo)); this.inspectionInfo = inspectionInfo ?? throw new ArgumentNullException(nameof(inspectionInfo)); this.aei = aei ?? throw new ArgumentNullException(nameof(aei)); }
public ResultLinkInfo Get(string name) { List <ScrapeInfo> scrapeInfos = ScrapeInfo.LoadScrapeInfos(); ScrapeInfo scrapeInfo = scrapeInfos.Where(x => x.Name == name).FirstOrDefault(); HtmlElementScraper elementScraper = new HtmlElementScraper(); ResultLinkInfo info = new ResultLinkInfo(); info = elementScraper.GetResult(scrapeInfo); return(info); }
public List <CombinedStockDataVM> GetStocksDataByScrapeId(IScrapeInfoRepository scrapeInfoRepo, int scrapeId) { using (IDbConnection connection = new SqlConnection(config.GetConnectionString("ScraperData"))) { ScrapeInfo scrapeInfo = scrapeInfoRepo.GetScrapeInfo(connection, scrapeId); List <CombinedStockDataVM> stocksData = connection.Query <Company, StockData, CombinedStockDataVM> ("dbo.uspStocksData_Companies_GetByScrapeId @ScrapeId", MapResults, new { ScrapeId = scrapeId }, splitOn: "LastPrice").ToList(); stocksData[0].ScrapeInfo = scrapeInfo; return(stocksData); } }
private static ScrapeInfo useXPath() { ScrapeInfo values = new ScrapeInfo(); HtmlWeb webClient = new HtmlWeb(); HtmlDocument firstInventoryPage = webClient.Load(url); HtmlNodeNavigator navigator = (HtmlNodeNavigator)firstInventoryPage.DocumentNode.SelectSingleNode("//div[contains(@class,\"hproduct\")][@data-index-position=\"1\"]").CreateNavigator(); values.Vin = navigator.SelectSingleNode("@data-vin").Value; values.Price = navigator.SelectSingleNode("//span[contains(@class,\"internetPrice\")]//span[@class=\"value\"]/text()").Value; values.Make = navigator.SelectSingleNode("@data-make").Value; values.Model = navigator.SelectSingleNode("@data-model").Value; values.PhotoUrl = navigator.SelectSingleNode("//div[@class=\"media\"]//img/@src").Value; return(values); }
private static async Task <ScrapeInfo> usePuppeteer() { ScrapeInfo values = new ScrapeInfo(); await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision); var options = new LaunchOptions { Headless = true }; var browser = await Puppeteer.LaunchAsync(options); var page = await browser.NewPageAsync(); var response = await page.GoToAsync(url); string html = await page.GetContentAsync(); return(values); }
static async Task Main(string[] args) { bool again = true; while (again) { ScrapeInfo info; Console.WriteLine("Would you like to scrape information using [x]path or [p]uppeteer?"); string parseType = Console.ReadLine().ToLower(); if (parseType == "r") { Console.WriteLine("This functionality isn't ready yet"); info = new ScrapeInfo(); //info = useRegex(); } else if (parseType == "x") { info = useXPath(); } else if (parseType == "p") { info = await usePuppeteer(); } else { Console.WriteLine("Unrecognized input, would you like to try again? y/n"); again = Console.ReadLine().ToLower() == "y"; continue; } Console.WriteLine($"VIN: {info.Vin}\nPrice: {info.Price}\nMake: {info.Make}\nModel: {info.Model}\nCover Photo: {info.PhotoUrl}"); Console.WriteLine("Would you like to try again? y/n"); again = Console.ReadLine().ToLower() == "y"; } Console.WriteLine("Goodbye"); Console.ReadLine(); return; }
private void DisplayStocksData(ScrapeInfo scrapeInfo) { Console.WriteLine("Data scraped on: " + scrapeInfo.ScrapeDate.ToString("MMM, dd yyyy h:mm tt") + " " + scrapeInfo.TimeZone); Console.WriteLine(); foreach (StockDataCompany data in stocksData) { Console.Write(data.ScrapedCompanyData.SymbolName + " " + data.ScrapedCompanyData.CompanyName + "\t"); Console.Write("{0:F2} \t", data.ScrapedStockData.LastPrice); var change = data.ScrapedStockData.Change; Console.Write((change > 0) ? "+{0:F2}\t" : "{0:F2}\t", change); var percentChange = data.ScrapedStockData.PercentChange; Console.Write((percentChange > 0) ? "+{0:F2}\t" : "{0:F2}\t", percentChange); Console.WriteLine(data.ScrapedStockData.Shares + "\t" + data.ScrapedStockData.TradeDate?.ToString("MMM, dd yyyy") ); } Console.WriteLine(); }
private static ScrapeInfo useRegex() { ScrapeInfo values = new ScrapeInfo(); HtmlWeb webClient = new HtmlWeb(); HtmlDocument firstInventoryPage = webClient.Load(url); string html = firstInventoryPage.Text; Regex vinRegex = new Regex("data-index-position=\"1\"[\\s\\S]*?data-vin=\"([^\"]+)\"[\\s\\S]*?data-index-position=\"2\"", RegexOptions.Multiline | RegexOptions.IgnoreCase); Regex priceRegex = new Regex("data-index-position=\"1\"[\\s\\S]*?class=\"value\">(\\$[\\d,.]+)<[\\s\\S]*?data-index-position=\"2\"", RegexOptions.Multiline | RegexOptions.IgnoreCase); Regex makeRegex = new Regex("data-make=\"([^\"]+)\"", RegexOptions.Multiline | RegexOptions.IgnoreCase); Regex modelRegex = new Regex("data-model=\"([^\"]+)\"", RegexOptions.Multiline | RegexOptions.IgnoreCase); Regex photoUrlRegex = new Regex("data-index-position=\"1\"[\\s\\S]*?<img(?=[^>]*class=\"[^\"]*thumb[^\"]* \") src=\"([^\"]+)\"[\\s\\S] *? data - index - position = \"2\"", RegexOptions.Multiline | RegexOptions.IgnoreCase); values.Vin = vinRegex.Matches(html)[1].Value; values.Price = priceRegex.Matches(html)[1].Value; values.Make = makeRegex.Matches(html)[1].Value; values.Model = modelRegex.Matches(html)[1].Value; values.PhotoUrl = photoUrlRegex.Matches(html)[1].Value; return(values); }
private ScrapeInfo Scrape(string url, string method, IEnumerable <KeyValuePair <string, string> > postData) { var retVal = new ScrapeInfo(); HttpWebResponse objResponse = null; try { if (method.ToUpper() == "GET") { objResponse = this.PerformGet(url); } else if (method.ToUpper() == "POST") { objResponse = this.PerformPost(url, postData); } if (objResponse != null) { // Try to fetch the page from the given URL, in case of any error return null string using (objResponse) { // In case of page not found error, return null string if (objResponse.StatusCode != HttpStatusCode.NotFound) { // If there is a proper response then read the contents in the response and return these contents using (var sr = new StreamReader(objResponse.GetResponseStream())) { retVal.Content = sr.ReadToEnd(); sr.Close(); } } retVal.ContentType = objResponse.ContentType; retVal.FinalUrl = objResponse.ResponseUri.ToString(); retVal.ResponseCode = objResponse.StatusCode; retVal.Success = objResponse.StatusCode == HttpStatusCode.OK; } } } catch (WebException wex) { if (wex.Status == WebExceptionStatus.ProtocolError) { // Try to fetch the page from the given URL, in case of any error return null string using (objResponse = wex.Response as HttpWebResponse) { if (objResponse != null) { retVal.Success = false; retVal.ResponseCode = objResponse.StatusCode; try { // Although the response code indicates some problem there may still be some content. using (var sr = new StreamReader(objResponse.GetResponseStream())) { retVal.Content = sr.ReadToEnd(); sr.Close(); } } catch (Exception) { } } } } else { retVal = null; } } catch { retVal = null; } return(retVal); }
public static void Load() { int axleCount; DateTime time; //找出该列车的探测时间和轴数 //采用数据的优先级为 AEI -> 探伤 -> 几何尺寸 -> 擦伤 if (AeiFile != null) { var a = AeiInfo.Create(AeiFile); axleCount = a.AxleCount; time = a.Time; } else if (InspectionFile != null) { var ins = InspectionInfo.Create(InspectionFile); axleCount = ins.Formations.Count * 32; time = DateTime.ParseExact(ins.Time, "yyyyMMddHHmmss", null); } else if (DimensionFile != null) { var dim = DimensionInfo.Create(DimensionFile); axleCount = dim.BaseInfo.AxleCount; time = DateTime.ParseExact(dim.BaseInfo.DetectionTime, "yyyyMMddHHmmss", null); } else if (ScrapeFile != null) { var scr = ScrapeInfo.Create(ScrapeFile); axleCount = scr.AxleCnt; time = DateTime.ParseExact(scr.Time, "yyyyMMddHHmmss", null); } else { throw new Exception("AEI,探伤,几何尺寸,擦伤报文均不存在,无法入库"); } if (axleCount >= 22 && axleCount <= 42) { axleCount = 32; } else if (axleCount > 42 && axleCount <= 74) { axleCount = 64; } ScrapeInfo scrape; DimensionInfo dimension; AeiInfo aei; InspectionInfo inspection; if (ScrapeFile is null) { scrape = ScrapeInfo.CreateDefault(axleCount, time); } else { scrape = ScrapeInfo.Create(ScrapeFile); } if (AeiFile is null) { aei = AeiInfo.CreateDefault(axleCount, time); } else { aei = AeiInfo.Create(AeiFile); } if (DimensionFile is null) { dimension = DimensionInfo.CreateDefault(axleCount, time); } else { dimension = DimensionInfo.Create(DimensionFile); } if (InspectionFile is null) { inspection = InspectionInfo.CreateDefault(axleCount, time); } else { inspection = InspectionInfo.Create(InspectionFile); } var loader = new Uploader(scrape, dimension, inspection, aei); loader.Insert(); Clear(); }