private static void ParseWebPage() { HtmlWeb web = new HtmlWeb(); //TODO Find decent way to do this string xpathXmlPath = Directory.GetParent(Directory.GetParent(Directory.GetParent(System.AppDomain.CurrentDomain.BaseDirectory).FullName).FullName).FullName + @"\Assets\Data\XPathElements.xml"; XDocument xpathXml = XDocument.Load(xpathXmlPath); try { HtmlDocument playOverwatchPage = web.Load(PlayerInformation.WebsiteToScrapeFrom); Console.WriteLine("Full URL to parse from: " + PlayerInformation.WebsiteToScrapeFrom); //Parses website information //TODO change the keys to a separate Key->Value file currentSr = HtmlScraper.XPathScraper(playOverwatchPage, getXmlValueByElement(xpathXml, "CurrentSr")); competitiveMatchesWon = HtmlScraper.XPathScraper(playOverwatchPage, getXmlValueByElement(xpathXml, "CompetitiveMatchesWon")); competitiveMatchesPlayed = HtmlScraper.XPathScraper(playOverwatchPage, getXmlValueByElement(xpathXml, "CompetitiveMatchesTotal")); nonPrestigeLevel = HtmlScraper.XPathScraper(playOverwatchPage, getXmlValueByElement(xpathXml, "NonPrestigeLevel")); prestigeBorderUrl = HtmlScraper.HtmlElementScraper(playOverwatchPage, getXmlValueByElement(xpathXml, "PrestigeBorder"), "style"); prestigeRankUrl = HtmlScraper.HtmlElementScraper(playOverwatchPage, getXmlValueByElement(xpathXml, "PrestigeRank"), "style"); } catch (System.Net.WebException e) { Console.WriteLine("Error loading page: " + e.Message); } catch (NullReferenceException e) { Console.WriteLine("Error loading competitive information " + e.Message); } }
public async Task TestCrape() { var parser = new HtmlScraper("https://batdongsan.com.vn/"); await parser.ParseAsync(); Assert.True(true); }
private IEnumerable<ITrainRoute> GetTrainRoutes(DateTime date, int trainNo, string fromId, string toId) { var htmlExtractor = new TrainRouteExtractor(fromId, toId, date); var htmlScraper = new HtmlScraper<TrainRoute>(htmlExtractor); var url = _settingsProvider.GetRouteUrl(date, trainNo); var routes = htmlScraper.Scrape(url); return routes; }
private IEnumerable<IStation> GetStations() { var htmlExtractor = new StationExtractor(); var htmlScraper = new HtmlScraper<Station>(htmlExtractor); var url = _settingsProvider.GetAllStationsUrl(); var routes = htmlScraper.Scrape(url); return routes; }
private IEnumerable<IRoute> GetStationRoutes(string fromId) { var htmlExtractor = new StationRouteExtractor(fromId); var htmlScraper = new HtmlScraper<Route>(htmlExtractor); var url = _settingsProvider.GetStationRoutesUrl(fromId); var routes = htmlScraper.Scrape(url); return routes; }
private IEnumerable <ITrainRoute> GetTrainRoutes(DateTime date, int trainNo, string fromId, string toId) { var htmlExtractor = new TrainRouteExtractor(fromId, toId, date); var htmlScraper = new HtmlScraper <TrainRoute>(htmlExtractor); var url = _settingsProvider.GetRouteUrl(date, trainNo); var routes = htmlScraper.Scrape(url); return(routes); }
private IEnumerable <IRoute> GetStationRoutes(string fromId) { var htmlExtractor = new StationRouteExtractor(fromId); var htmlScraper = new HtmlScraper <Route>(htmlExtractor); var url = _settingsProvider.GetStationRoutesUrl(fromId); var routes = htmlScraper.Scrape(url); return(routes); }
private IEnumerable <IStation> GetStations() { var htmlExtractor = new StationExtractor(); var htmlScraper = new HtmlScraper <Station>(htmlExtractor); var url = _settingsProvider.GetAllStationsUrl(); var routes = htmlScraper.Scrape(url); return(routes); }
public async Task <List <EntryDto> > Index([FromServices] HtmlScraper htmlScraper) { var startTime = DateTime.Now; var entrys = await htmlScraper.Run(); var endTime = DateTime.Now - startTime; return(entrys.ToList()); }
protected void Page_Load(object sender, EventArgs e) { string xml = HtmlScraper.GetHtmlAsXml().OuterXml; XmlDataSource1.Data = xml; Trace.Write("INFO", xml); XmlDataSource1.DataBind(); Trace.Write("Info", "PRE BIND: Repeater Item Count " + this.Repeater1.Items.Count); this.Repeater1.DataBind(); Trace.Write("Info", "POST BIND: Repeater Item Count " + this.Repeater1.Items.Count); }
private MessageMetaData[] GetMessages(object state) { var args = state as Tuple <HtmlDocument, Logger>; var htmlDoc = args.Item1; var logger = args.Item2; HtmlScraper scraper = new HtmlScraper(logger, htmlDoc); var messages = scraper.Scrape(); return(messages); }
private static TResponse Get <TResponse>(string url, string accessToken, string clientSecret, IHtmlScraper scraper = null) where TResponse : ResponseBase, new() { if (scraper.IsNull()) { scraper = new HtmlScraper(); } RequestHelper.MakeGetRequest(url, accessToken, clientSecret, scraper); var response = scraper.LastContent; var deserialized = Deserialize <TResponse>(response); return(deserialized); }
private async Task <ProvincialCityDataModel> GetProvincialCityData() { ProvincialCityDataModel provCityDataObj = new ProvincialCityDataModel(); _returnedHtmlDocs = HtmlScraper.GetHtmlDocs(_webInteriaDict[SelectedCity], _webGisDict[SelectedCity], _webOnetDict[SelectedCity]).Result; await Task.Factory.StartNew(() => { provCityDataObj.AverageTemperature = HtmlScraper.AverageTemp(_returnedHtmlDocs[0], _returnedHtmlDocs[1], _returnedHtmlDocs[2]); provCityDataObj.AverageWind = HtmlScraper.AverageWind(_returnedHtmlDocs[0], _returnedHtmlDocs[1], _returnedHtmlDocs[2]); provCityDataObj.AverageHumidity = HtmlScraper.AverageHumidity(_returnedHtmlDocs[0], _returnedHtmlDocs[1], _returnedHtmlDocs[2]); provCityDataObj.AveragePressure = HtmlScraper.AveragePressure(_returnedHtmlDocs[0], _returnedHtmlDocs[1], _returnedHtmlDocs[2]); }); return(provCityDataObj); }
public override void When() { IHtmlScraper scraper = new HtmlScraper(c => { c.UseHtmlFetcher(new ResourceHtmlFetcher()); }); var instruction = new HtmlScrapeInstruction <AmazonEntry>(config => { var linkConverter = new LinkConverter("https://www.amazon.com"); config .ScrapeUrl(Resource) .WithPaging("page", pagingStart: 1, pagingIncrement: 1, pagingEnd: 3) .AsEntries(e => e.FromCssSelector(".s-item-container") .MapProperty(p => p.Title, p => p.FromCssSelector("a.s-access-detail-page h2")) .MapProperty(p => p.Url, p => p.FromCssSelector("a.s-access-detail-page").UsingConverter(linkConverter))); }); _results = scraper.Scrape(instruction).Result; }
/// <summary> /// Scrapes course and assignment marks from TeachAssist. /// </summary> /// <param name="loginData">The data representing the student login.</param> /// <returns>A string representing the json information.</returns> public string ScrapeMarks(UserInfo loginData) { var login = new HtmlScraper(loginData); var htmlFiles = login.ScrapeHtml(); ObjectParser scraper = new ObjectParser(); var courses = scraper.ScrapeMarks(htmlFiles); string json = ""; courses.ForEach(c => { json += JsonConvert.SerializeObject(c, new JsonSerializerSettings() { ReferenceLoopHandling = ReferenceLoopHandling.Ignore, Formatting = Formatting.Indented }); }); //var converter = new JsonConverter(); return(json); }