Ejemplo n.º 1
0
        private static void ParseWebPage()
        {
            HtmlWeb web = new HtmlWeb();

            //TODO Find decent way to do this
            string    xpathXmlPath = Directory.GetParent(Directory.GetParent(Directory.GetParent(System.AppDomain.CurrentDomain.BaseDirectory).FullName).FullName).FullName + @"\Assets\Data\XPathElements.xml";
            XDocument xpathXml     = XDocument.Load(xpathXmlPath);

            try
            {
                HtmlDocument playOverwatchPage = web.Load(PlayerInformation.WebsiteToScrapeFrom);
                Console.WriteLine("Full URL to parse from: " + PlayerInformation.WebsiteToScrapeFrom);

                //Parses website information
                //TODO change the keys to a separate Key->Value file
                currentSr                = HtmlScraper.XPathScraper(playOverwatchPage, getXmlValueByElement(xpathXml, "CurrentSr"));
                competitiveMatchesWon    = HtmlScraper.XPathScraper(playOverwatchPage, getXmlValueByElement(xpathXml, "CompetitiveMatchesWon"));
                competitiveMatchesPlayed = HtmlScraper.XPathScraper(playOverwatchPage, getXmlValueByElement(xpathXml, "CompetitiveMatchesTotal"));
                nonPrestigeLevel         = HtmlScraper.XPathScraper(playOverwatchPage, getXmlValueByElement(xpathXml, "NonPrestigeLevel"));

                prestigeBorderUrl = HtmlScraper.HtmlElementScraper(playOverwatchPage, getXmlValueByElement(xpathXml, "PrestigeBorder"), "style");
                prestigeRankUrl   = HtmlScraper.HtmlElementScraper(playOverwatchPage, getXmlValueByElement(xpathXml, "PrestigeRank"), "style");
            }
            catch (System.Net.WebException e)
            {
                Console.WriteLine("Error loading page: " + e.Message);
            }
            catch (NullReferenceException e)
            {
                Console.WriteLine("Error loading competitive information " + e.Message);
            }
        }
Ejemplo n.º 2
0
        public async Task TestCrape()
        {
            var parser = new HtmlScraper("https://batdongsan.com.vn/");
            await parser.ParseAsync();

            Assert.True(true);
        }
 private IEnumerable<ITrainRoute> GetTrainRoutes(DateTime date, int trainNo, string fromId, string toId)
 {
     var htmlExtractor = new TrainRouteExtractor(fromId, toId, date);
     var htmlScraper = new HtmlScraper<TrainRoute>(htmlExtractor);
     var url = _settingsProvider.GetRouteUrl(date, trainNo);
     var routes = htmlScraper.Scrape(url);
     return routes;
 }
 private IEnumerable<IStation> GetStations()
 {
     var htmlExtractor = new StationExtractor();
     var htmlScraper = new HtmlScraper<Station>(htmlExtractor);
     var url = _settingsProvider.GetAllStationsUrl();
     var routes = htmlScraper.Scrape(url);
     return routes;
 }
 private IEnumerable<IRoute> GetStationRoutes(string fromId)
 {
     var htmlExtractor = new StationRouteExtractor(fromId);
     var htmlScraper = new HtmlScraper<Route>(htmlExtractor);
     var url = _settingsProvider.GetStationRoutesUrl(fromId);
     var routes = htmlScraper.Scrape(url);
     return routes;
 }
        private IEnumerable <ITrainRoute> GetTrainRoutes(DateTime date, int trainNo, string fromId, string toId)
        {
            var htmlExtractor = new TrainRouteExtractor(fromId, toId, date);
            var htmlScraper   = new HtmlScraper <TrainRoute>(htmlExtractor);
            var url           = _settingsProvider.GetRouteUrl(date, trainNo);
            var routes        = htmlScraper.Scrape(url);

            return(routes);
        }
        private IEnumerable <IRoute> GetStationRoutes(string fromId)
        {
            var htmlExtractor = new StationRouteExtractor(fromId);
            var htmlScraper   = new HtmlScraper <Route>(htmlExtractor);
            var url           = _settingsProvider.GetStationRoutesUrl(fromId);
            var routes        = htmlScraper.Scrape(url);

            return(routes);
        }
        private IEnumerable <IStation> GetStations()
        {
            var htmlExtractor = new StationExtractor();
            var htmlScraper   = new HtmlScraper <Station>(htmlExtractor);
            var url           = _settingsProvider.GetAllStationsUrl();
            var routes        = htmlScraper.Scrape(url);

            return(routes);
        }
Ejemplo n.º 9
0
        public async Task <List <EntryDto> > Index([FromServices] HtmlScraper htmlScraper)
        {
            var startTime = DateTime.Now;

            var entrys = await htmlScraper.Run();

            var endTime = DateTime.Now - startTime;

            return(entrys.ToList());
        }
Ejemplo n.º 10
0
    protected void Page_Load(object sender, EventArgs e)
    {
        string xml = HtmlScraper.GetHtmlAsXml().OuterXml;

        XmlDataSource1.Data = xml;
        Trace.Write("INFO", xml);
        XmlDataSource1.DataBind();
        Trace.Write("Info", "PRE BIND: Repeater Item Count " + this.Repeater1.Items.Count);
        this.Repeater1.DataBind();
        Trace.Write("Info", "POST BIND: Repeater Item Count " + this.Repeater1.Items.Count);
    }
Ejemplo n.º 11
0
        private MessageMetaData[] GetMessages(object state)
        {
            var args    = state as Tuple <HtmlDocument, Logger>;
            var htmlDoc = args.Item1;
            var logger  = args.Item2;

            HtmlScraper scraper  = new HtmlScraper(logger, htmlDoc);
            var         messages = scraper.Scrape();

            return(messages);
        }
Ejemplo n.º 12
0
        private static TResponse Get <TResponse>(string url, string accessToken, string clientSecret,
                                                 IHtmlScraper scraper = null) where TResponse : ResponseBase, new()
        {
            if (scraper.IsNull())
            {
                scraper = new HtmlScraper();
            }

            RequestHelper.MakeGetRequest(url, accessToken, clientSecret, scraper);

            var response     = scraper.LastContent;
            var deserialized = Deserialize <TResponse>(response);

            return(deserialized);
        }
Ejemplo n.º 13
0
        private async Task <ProvincialCityDataModel> GetProvincialCityData()
        {
            ProvincialCityDataModel provCityDataObj = new ProvincialCityDataModel();

            _returnedHtmlDocs = HtmlScraper.GetHtmlDocs(_webInteriaDict[SelectedCity], _webGisDict[SelectedCity], _webOnetDict[SelectedCity]).Result;

            await Task.Factory.StartNew(() =>
            {
                provCityDataObj.AverageTemperature = HtmlScraper.AverageTemp(_returnedHtmlDocs[0], _returnedHtmlDocs[1], _returnedHtmlDocs[2]);

                provCityDataObj.AverageWind = HtmlScraper.AverageWind(_returnedHtmlDocs[0], _returnedHtmlDocs[1], _returnedHtmlDocs[2]);

                provCityDataObj.AverageHumidity = HtmlScraper.AverageHumidity(_returnedHtmlDocs[0], _returnedHtmlDocs[1], _returnedHtmlDocs[2]);

                provCityDataObj.AveragePressure = HtmlScraper.AveragePressure(_returnedHtmlDocs[0], _returnedHtmlDocs[1], _returnedHtmlDocs[2]);
            });

            return(provCityDataObj);
        }
Ejemplo n.º 14
0
        public override void When()
        {
            IHtmlScraper scraper = new HtmlScraper(c =>
            {
                c.UseHtmlFetcher(new ResourceHtmlFetcher());
            });

            var instruction = new HtmlScrapeInstruction <AmazonEntry>(config =>
            {
                var linkConverter = new LinkConverter("https://www.amazon.com");

                config
                .ScrapeUrl(Resource)
                .WithPaging("page", pagingStart: 1, pagingIncrement: 1, pagingEnd: 3)
                .AsEntries(e => e.FromCssSelector(".s-item-container")
                           .MapProperty(p => p.Title, p => p.FromCssSelector("a.s-access-detail-page h2"))
                           .MapProperty(p => p.Url, p => p.FromCssSelector("a.s-access-detail-page").UsingConverter(linkConverter)));
            });

            _results = scraper.Scrape(instruction).Result;
        }
Ejemplo n.º 15
0
        /// <summary>
        /// Scrapes course and assignment marks from TeachAssist.
        /// </summary>
        /// <param name="loginData">The data representing the student login.</param>
        /// <returns>A string representing the json information.</returns>
        public string ScrapeMarks(UserInfo loginData)
        {
            var login     = new HtmlScraper(loginData);
            var htmlFiles = login.ScrapeHtml();

            ObjectParser scraper = new ObjectParser();
            var          courses = scraper.ScrapeMarks(htmlFiles);

            string json = "";

            courses.ForEach(c => {
                json += JsonConvert.SerializeObject(c, new JsonSerializerSettings()
                {
                    ReferenceLoopHandling = ReferenceLoopHandling.Ignore,
                    Formatting            = Formatting.Indented
                });
            });

            //var converter = new JsonConverter();

            return(json);
        }