public void ProcessRequest(HttpContext context) { ScrapeResult scrapeResult = new ScrapeResult(context.Request); scrapeResult.Save(context.Server.MapPath("~/results/" + scrapeResult.FileName)); }
private (string[] Names, string[] Values) DetermineLabels(PrometheusMetricDefinition metricDefinition, ScrapeResult scrapeResult, MeasuredMetric measuredMetric) { var labels = new Dictionary <string, string>(scrapeResult.Labels); if (measuredMetric.IsDimensional) { labels.Add(measuredMetric.DimensionName.ToLower(), measuredMetric.DimensionValue); } if (metricDefinition?.Labels?.Any() == true) { foreach (var customLabel in metricDefinition.Labels) { if (labels.ContainsKey(customLabel.Key)) { _logger.LogWarning("Custom label {CustomLabelName} was already specified with value 'LabelValue' instead of 'CustomLabelValue'. Ignoring...", customLabel.Key, labels[customLabel.Key], customLabel.Value); continue; } labels.Add(customLabel.Key, customLabel.Value); } } return(labels.Keys.ToArray(), labels.Values.ToArray()); }
private static string GetNextUrl(ScrapeResult scrapeResult) { return scrapeResult.NextUrl == null ? null : "http://flights.thomson.co.uk" + scrapeResult.NextUrl; }
public async Task ReportMetricAsync(string metricName, string metricDescription, ScrapeResult scrapedMetricResult) { Guard.NotNullOrWhitespace(metricName, nameof(metricName)); Guard.NotNull(scrapedMetricResult, nameof(scrapedMetricResult)); var reportTasks = new List <Task>(); foreach (var sink in _configuredSinks) { var reportTask = ReportMetricAsync(sink, metricName, metricDescription, scrapedMetricResult); reportTasks.Add(reportTask); } await Task.WhenAll(reportTasks); }
private async Task ReportMetricAsync(IMetricSink sink, string metricName, string metricDescription, ScrapeResult scrapedMetricResult) { Guard.NotNull(sink, nameof(sink)); Guard.NotNullOrWhitespace(metricName, nameof(metricName)); Guard.NotNull(scrapedMetricResult, nameof(scrapedMetricResult)); Guard.NotNull(scrapedMetricResult.MetricValues, nameof(scrapedMetricResult.MetricValues)); try { await sink.ReportMetricAsync(metricName, metricDescription, scrapedMetricResult); } catch (Exception ex) { Logger.LogCritical(ex, "Failed to write {MetricName} metric for sink {SinkType}", metricName, sink.Type); } }
public ScrapeResult GetScrapeResult(CrawledPage page) { ScrapeResult result = null; return(result); }
public async Task ReportMetricAsync(string metricName, string metricDescription, ScrapeResult scrapeResult) { Guard.NotNullOrEmpty(metricName, nameof(metricName)); Guard.NotNull(scrapeResult, nameof(scrapeResult)); Guard.NotNull(scrapeResult.MetricValues, nameof(scrapeResult.MetricValues)); var reportMetricTasks = new List <Task>(); foreach (var measuredMetric in scrapeResult.MetricValues) { var metricValue = measuredMetric.Value ?? 0; var reportMetricTask = ReportMetricAsync(metricName, metricDescription, metricValue, new Dictionary <string, string>()); reportMetricTasks.Add(reportMetricTask); } await Task.WhenAll(reportMetricTasks); }
public async Task ReportMetricAsync(string metricName, string metricDescription, ScrapeResult scrapeResult) { Guard.NotNullOrEmpty(metricName, nameof(metricName)); Guard.NotNull(scrapeResult, nameof(scrapeResult)); Guard.NotNull(scrapeResult.MetricValues, nameof(scrapeResult.MetricValues)); var reportMetricTasks = new List <Task>(); foreach (var measuredMetric in scrapeResult.MetricValues) { var metricValue = DetermineMetricMeasurement(measuredMetric); var metricDefinition = _metricsDeclarationProvider.GetPrometheusDefinition(metricName); var defaultLabels = _metricsDeclarationProvider.GetDefaultLabels(); var metricLabels = DetermineLabels(metricDefinition, scrapeResult, measuredMetric, defaultLabels); var reportMetricTask = ReportMetricAsync(metricName, metricDescription, metricValue, metricLabels); reportMetricTasks.Add(reportMetricTask); } await Task.WhenAll(reportMetricTasks); }
private Dictionary <string, string> DetermineLabels(PrometheusMetricDefinition metricDefinition, ScrapeResult scrapeResult, MeasuredMetric measuredMetric, Dictionary <string, string> defaultLabels) { var labels = new Dictionary <string, string>(scrapeResult.Labels.Select(label => new KeyValuePair <string, string>(label.Key.SanitizeForPrometheusLabelKey(), label.Value))); if (measuredMetric.IsDimensional) { labels.Add(measuredMetric.DimensionName.SanitizeForPrometheusLabelKey(), measuredMetric.DimensionValue); } if (metricDefinition?.Labels?.Any() == true) { foreach (var customLabel in metricDefinition.Labels) { var customLabelKey = customLabel.Key.SanitizeForPrometheusLabelKey(); if (labels.ContainsKey(customLabelKey)) { _logger.LogWarning("Custom label {CustomLabelName} was already specified with value '{LabelValue}' instead of '{CustomLabelValue}'. Ignoring...", customLabel.Key, labels[customLabelKey], customLabel.Value); continue; } labels.Add(customLabelKey, customLabel.Value); } } foreach (var defaultLabel in defaultLabels) { var defaultLabelKey = defaultLabel.Key.SanitizeForPrometheusLabelKey(); if (labels.ContainsKey(defaultLabelKey) == false) { labels.Add(defaultLabelKey, defaultLabel.Value); } } // Add the tenant id var metricsDeclaration = _metricsDeclarationProvider.Get(applyDefaults: true); if (labels.ContainsKey("tenant_id") == false) { labels.Add("tenant_id", metricsDeclaration.AzureMetadata.TenantId); } // Transform labels, if need be if (_prometheusConfiguration.CurrentValue.Labels != null) { labels = LabelTransformer.TransformLabels(_prometheusConfiguration.CurrentValue.Labels.Transformation, labels); } var orderedLabels = labels.OrderBy(kvp => kvp.Key).ToDictionary(kvp => kvp.Key, kvp => kvp.Value); return(orderedLabels); }
private Dictionary <string, string> DetermineLabels(PrometheusMetricDefinition metricDefinition, ScrapeResult scrapeResult, MeasuredMetric measuredMetric) { var labels = new Dictionary <string, string>(scrapeResult.Labels.Select(label => new KeyValuePair <string, string>(label.Key.SanitizeForPrometheusLabelKey(), label.Value))); if (measuredMetric.IsDimensional) { labels.Add(measuredMetric.DimensionName.SanitizeForPrometheusLabelKey(), measuredMetric.DimensionValue); } if (metricDefinition?.Labels?.Any() == true) { foreach (var customLabel in metricDefinition.Labels) { var customLabelKey = customLabel.Key.SanitizeForPrometheusLabelKey(); if (labels.ContainsKey(customLabelKey)) { _logger.LogWarning("Custom label {CustomLabelName} was already specified with value 'LabelValue' instead of 'CustomLabelValue'. Ignoring...", customLabel.Key, labels[customLabelKey], customLabel.Value); continue; } labels.Add(customLabelKey, customLabel.Value); } } return(labels); }
public async Task <ScrapeResult> ScrapeAsync(IScrapeSession session, CancellationToken cancellationToken = default) { var result = new ScrapeResult(); var options = _options.Value; _logger.LogInformation("Scraping yts movies from {fromDate}", session.ScrapeFrom); for (var page = 1; page < int.MaxValue; page++) { _logger.LogInformation("scraping page {page}", page); var request = new YtsListMoviesRequest { Page = page, Limit = 50, OrderBy = "desc", SortBy = "date_added" }; var response = await _client.ListMoviesAsync(request, cancellationToken); if (response.Movies is null) { break; } var movies = session.ScrapeFrom.HasValue ? response.Movies .Where(x => x.DateUploaded > session.ScrapeFrom.Value) .ToList() : response.Movies; if (!movies.Any()) { break; } _logger.LogInformation("retrieved {movieCount} movies", movies.Count); var requests = _mapper.Map <ICollection <CreateMovieRequest> >(movies); await session.CreateMoviesAsync(requests); _logger.LogInformation("added {movieCount} movies", requests.Count); result.MovieCount += requests.Count; result.TorrentCount += requests.Sum(x => x.Torrents?.Count ?? 0); if (movies.Count < request.Limit) { break; } if (options.RemoteScrapeDelay > TimeSpan.Zero) { await Task.Delay(options.RemoteScrapeDelay, cancellationToken); } } _logger.LogInformation("done"); return(result); }
/// <summary> /// Gets all the professor and the courses from the database, and using /// populates the database using the name of the course and professor /// </summary> public void Update() { // Getting the list of courses from the database //var courses = _context.Courses.ToList(); var courses = _context?.Courses.Include(c => c.Sections) .ThenInclude(s => s.Professor) .Include(c => c.Sections) .ToList(); // List of professor and courses that do not have cape review List <Tuple <string, string> > profcourseNotFound = new List <Tuple <string, string> >(); // Keep track of number of course we scrape int delay = 0; // Iterate over all the courses we recieved from the database foreach (var course in courses) { // for each course list out all the sections that are present var sections = course.Sections.ToList(); // Variable to store all the professors that are teaching one course List <string> listOfProfessors = new List <string>(); // Iterate over all the sections for each course foreach (var section in sections) { Professor currProfessor = section.Professor; // Update the Cape review for each professor only one time if (!listOfProfessors.Contains(currProfessor.Name)) { listOfProfessors.Add(currProfessor.Name); // Note: Cape has already been initialized in the database model // as a empty list. // Gets the CAPE page for the specific professor string capePageURL = GenerateURL(currProfessor.Name, course.CourseAbbreviation); // If there is cape review page for the specific professor and specific course if (capePageURL.Length != 0) { ScrapeResult scrapedCapePage = InsertDataFromHtmlPage(capePageURL); Cape newCapeReview = null; // Boolean to keep track if we need to add the cape review to the DB or // just update bool addToDb = true; // Check if we already have the cape for specific professor and course foreach (Cape tempCape in course.Cape) { if (tempCape.Professor == currProfessor) { newCapeReview = tempCape; addToDb = false; } } // If no cape found then make a new one if (newCapeReview == null) { newCapeReview = new Cape(); } if (scrapedCapePage != null) { // Updating the cape review result for professor object newCapeReview.Term = scrapedCapePage.Term; newCapeReview.StudentsEnrolled = scrapedCapePage.StudentsEnrolled; newCapeReview.NumberOfEvaluation = scrapedCapePage.NumberOfEvaluation; newCapeReview.RecommendedClass = scrapedCapePage.RecommendedClass; newCapeReview.RecommendedProfessor = scrapedCapePage.RecommendedProfessor; newCapeReview.StudyHoursPerWeek = scrapedCapePage.StudyHoursPerWeek; newCapeReview.AverageGradeExpected = scrapedCapePage.AverageGradeExpected; newCapeReview.AverageGradeReceived = scrapedCapePage.AverageGradeReceived; newCapeReview.URL = capePageURL; // Adding cape review to professor and cource object if (addToDb) { currProfessor.Cape.Add(newCapeReview); course.Cape.Add(newCapeReview); } } } else { // Used to check when and for which tuples did we not find cape reviews. profcourseNotFound.Add(Tuple.Create(currProfessor.Name, course.CourseAbbreviation)); } } } // After every 100 courses scraped, wait for 10 seconds delay++; if (delay % 100 == 0) { System.Threading.Thread.Sleep(10000); _context.SaveChanges(); } } // Save Changes in the database _context.SaveChanges(); }
/// <summary> /// Scrapes a single Cape page specified by the URL input parameter. /// Note that this works only for FA12 term to the present /// </summary> /// <returns>ScrapeResult objects containing the data</returns> /// <param name="Url">URL of single Cape page to scrape</param> public ScrapeResult InsertDataFromHtmlPage(string Url) { // Check for null or empty URL if (String.IsNullOrEmpty(Url)) { throw new ArgumentNullException(Url); } // HTML read from url and assign into var htmlDoc HtmlWeb web = new HtmlWeb(); var htmlDoc = web.Load(Url); // Get the course name HtmlNode crseNameNode = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.CourseNamePath); string fullScrapedCourse = (crseNameNode != null) ? crseNameNode.InnerText : "N/A"; string[] splitCourse = fullScrapedCourse.Split(" "); // Cape is too old to scrape if (splitCourse.Length < 2) { return(null); } string courseName = splitCourse[0] + " " + splitCourse[1]; // Check if we're looking at the right course scrape if (CourseName != courseName) { return(null); } // Gather professor and check for null return HtmlNode instrNameNode = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.InstrNamePath); string instructorName = (instrNameNode != null) ? instrNameNode.InnerText : "N/A"; // Gather term and check for null return HtmlNode termNode = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.TermPath); string term = (termNode != null) ? termNode.InnerText : "N/A"; // Gather enrollment and check for null return HtmlNode enrollmentNode = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.EnrollmentPath); string enrollment = (enrollmentNode != null) ? enrollmentNode.InnerText : "0"; // Gather number of evalulations and check for null return HtmlNode evalsSubmittedNode = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.EvalsSubmittedPath); string evalsSubmitted = (enrollmentNode != null) ? evalsSubmittedNode.InnerText : "0"; // Gather percentage of enrolled students that recommend course and check for null return HtmlNode recCourseRow = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.RecommendClassTblRow); if (recCourseRow == null) { return(null); } HtmlNode recCourseNode = recCourseRow.SelectSingleNode(CapeXPaths.TblRowToMean); string recCourseMean = (recCourseNode != null) ? recCourseNode.InnerText : "0"; // Gather percentage of enrolled students that recommend professor and check for null return HtmlNode recProfRow = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.RecommendProfTblRow); HtmlNode recProfNode = recProfRow.SelectSingleNode(CapeXPaths.TblRowToMean); string recProfMean = (recProfNode != null) ? recProfNode.InnerText : "0"; // Gather average hours of studying per week and check for null return HtmlNode studyHoursRow = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.HoursPerWeekTblRow); HtmlNode studyHoursNode = studyHoursRow.SelectSingleNode(CapeXPaths.TblRowToMean); string studyHoursMean = (studyHoursNode != null) ? studyHoursNode.InnerText : "0"; // Gather average grade expected, if valid only grabs letter grade portion of string string avgGradeExpected = ""; string avgGradeExpectedCheck = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.AvgGradeExpectedNode)?.InnerText; if (String.IsNullOrEmpty(avgGradeExpectedCheck)) { // When cape doesn't have grade expected value avgGradeExpected = "0"; } else { avgGradeExpected = (avgGradeExpectedCheck.Substring(avgGradeExpectedCheck.IndexOf("("))); avgGradeExpected = avgGradeExpected.Replace("(", ""); avgGradeExpected = avgGradeExpected.Replace(")", ""); } // Gather average grade received, if valid only grabs letter grade portion of string string avgGradeReceived = ""; string avgGradeReceivedCheck = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.AvgGradeReceivedNode)?.InnerText; if (String.IsNullOrEmpty(avgGradeReceivedCheck)) { // When cape doesn't have grade recieved value avgGradeReceived = ("0"); } else { avgGradeReceived = (avgGradeReceivedCheck.Substring(avgGradeReceivedCheck.IndexOf('('))); avgGradeReceived = avgGradeReceived.Replace("(", ""); avgGradeReceived = avgGradeReceived.Replace(")", ""); } // Make a scrapeResult object to store the result ScrapeResult result = new ScrapeResult() { InstructorName = instructorName, Term = term, StudentsEnrolled = Convert.ToInt32(enrollment), NumberOfEvaluation = Convert.ToInt32(evalsSubmitted), RecommendedClass = Convert.ToDecimal(recCourseMean), RecommendedProfessor = Convert.ToDecimal(recProfMean), StudyHoursPerWeek = Convert.ToDecimal(studyHoursMean), AverageGradeExpected = Convert.ToDecimal(avgGradeExpected), AverageGradeReceived = Convert.ToDecimal(avgGradeReceived) }; return(result); }
public void ProcessRequest(HttpContext context) { ScrapeResult scrapeResult = new ScrapeResult(context.Request); scrapeResult.Save(context.Server.MapPath("~/results/" + scrapeResult.FileName)); }