Ejemplo n.º 1
0
 public void ProcessRequest(HttpContext context)
 {
     ScrapeResult scrapeResult = new ScrapeResult(context.Request);
     scrapeResult.Save(context.Server.MapPath("~/results/" + scrapeResult.FileName));
 }
Ejemplo n.º 2
0
        private (string[] Names, string[] Values) DetermineLabels(PrometheusMetricDefinition metricDefinition, ScrapeResult scrapeResult, MeasuredMetric measuredMetric)
        {
            var labels = new Dictionary <string, string>(scrapeResult.Labels);

            if (measuredMetric.IsDimensional)
            {
                labels.Add(measuredMetric.DimensionName.ToLower(), measuredMetric.DimensionValue);
            }

            if (metricDefinition?.Labels?.Any() == true)
            {
                foreach (var customLabel in metricDefinition.Labels)
                {
                    if (labels.ContainsKey(customLabel.Key))
                    {
                        _logger.LogWarning("Custom label {CustomLabelName} was already specified with value 'LabelValue' instead of 'CustomLabelValue'. Ignoring...", customLabel.Key, labels[customLabel.Key], customLabel.Value);
                        continue;
                    }

                    labels.Add(customLabel.Key, customLabel.Value);
                }
            }

            return(labels.Keys.ToArray(), labels.Values.ToArray());
        }
 private static string GetNextUrl(ScrapeResult scrapeResult)
 {
     return scrapeResult.NextUrl == null ? null : "http://flights.thomson.co.uk" + scrapeResult.NextUrl;
 }
Ejemplo n.º 4
0
        public async Task ReportMetricAsync(string metricName, string metricDescription, ScrapeResult scrapedMetricResult)
        {
            Guard.NotNullOrWhitespace(metricName, nameof(metricName));
            Guard.NotNull(scrapedMetricResult, nameof(scrapedMetricResult));

            var reportTasks = new List <Task>();

            foreach (var sink in _configuredSinks)
            {
                var reportTask = ReportMetricAsync(sink, metricName, metricDescription, scrapedMetricResult);
                reportTasks.Add(reportTask);
            }

            await Task.WhenAll(reportTasks);
        }
Ejemplo n.º 5
0
        private async Task ReportMetricAsync(IMetricSink sink, string metricName, string metricDescription, ScrapeResult scrapedMetricResult)
        {
            Guard.NotNull(sink, nameof(sink));
            Guard.NotNullOrWhitespace(metricName, nameof(metricName));
            Guard.NotNull(scrapedMetricResult, nameof(scrapedMetricResult));
            Guard.NotNull(scrapedMetricResult.MetricValues, nameof(scrapedMetricResult.MetricValues));

            try
            {
                await sink.ReportMetricAsync(metricName, metricDescription, scrapedMetricResult);
            }
            catch (Exception ex)
            {
                Logger.LogCritical(ex, "Failed to write {MetricName} metric for sink {SinkType}", metricName, sink.Type);
            }
        }
Ejemplo n.º 6
0
        public ScrapeResult GetScrapeResult(CrawledPage page)
        {
            ScrapeResult result = null;

            return(result);
        }
Ejemplo n.º 7
0
        public async Task ReportMetricAsync(string metricName, string metricDescription, ScrapeResult scrapeResult)
        {
            Guard.NotNullOrEmpty(metricName, nameof(metricName));
            Guard.NotNull(scrapeResult, nameof(scrapeResult));
            Guard.NotNull(scrapeResult.MetricValues, nameof(scrapeResult.MetricValues));

            var reportMetricTasks = new List <Task>();

            foreach (var measuredMetric in scrapeResult.MetricValues)
            {
                var metricValue = measuredMetric.Value ?? 0;

                var reportMetricTask = ReportMetricAsync(metricName, metricDescription, metricValue, new Dictionary <string, string>());
                reportMetricTasks.Add(reportMetricTask);
            }

            await Task.WhenAll(reportMetricTasks);
        }
Ejemplo n.º 8
0
        public async Task ReportMetricAsync(string metricName, string metricDescription, ScrapeResult scrapeResult)
        {
            Guard.NotNullOrEmpty(metricName, nameof(metricName));
            Guard.NotNull(scrapeResult, nameof(scrapeResult));
            Guard.NotNull(scrapeResult.MetricValues, nameof(scrapeResult.MetricValues));

            var reportMetricTasks = new List <Task>();

            foreach (var measuredMetric in scrapeResult.MetricValues)
            {
                var metricValue      = DetermineMetricMeasurement(measuredMetric);
                var metricDefinition = _metricsDeclarationProvider.GetPrometheusDefinition(metricName);
                var defaultLabels    = _metricsDeclarationProvider.GetDefaultLabels();

                var metricLabels = DetermineLabels(metricDefinition, scrapeResult, measuredMetric, defaultLabels);

                var reportMetricTask = ReportMetricAsync(metricName, metricDescription, metricValue, metricLabels);
                reportMetricTasks.Add(reportMetricTask);
            }

            await Task.WhenAll(reportMetricTasks);
        }
Ejemplo n.º 9
0
        private Dictionary <string, string> DetermineLabels(PrometheusMetricDefinition metricDefinition, ScrapeResult scrapeResult, MeasuredMetric measuredMetric, Dictionary <string, string> defaultLabels)
        {
            var labels = new Dictionary <string, string>(scrapeResult.Labels.Select(label => new KeyValuePair <string, string>(label.Key.SanitizeForPrometheusLabelKey(), label.Value)));

            if (measuredMetric.IsDimensional)
            {
                labels.Add(measuredMetric.DimensionName.SanitizeForPrometheusLabelKey(), measuredMetric.DimensionValue);
            }

            if (metricDefinition?.Labels?.Any() == true)
            {
                foreach (var customLabel in metricDefinition.Labels)
                {
                    var customLabelKey = customLabel.Key.SanitizeForPrometheusLabelKey();
                    if (labels.ContainsKey(customLabelKey))
                    {
                        _logger.LogWarning("Custom label {CustomLabelName} was already specified with value '{LabelValue}' instead of '{CustomLabelValue}'. Ignoring...", customLabel.Key, labels[customLabelKey], customLabel.Value);
                        continue;
                    }

                    labels.Add(customLabelKey, customLabel.Value);
                }
            }

            foreach (var defaultLabel in defaultLabels)
            {
                var defaultLabelKey = defaultLabel.Key.SanitizeForPrometheusLabelKey();
                if (labels.ContainsKey(defaultLabelKey) == false)
                {
                    labels.Add(defaultLabelKey, defaultLabel.Value);
                }
            }

            // Add the tenant id
            var metricsDeclaration = _metricsDeclarationProvider.Get(applyDefaults: true);

            if (labels.ContainsKey("tenant_id") == false)
            {
                labels.Add("tenant_id", metricsDeclaration.AzureMetadata.TenantId);
            }

            // Transform labels, if need be
            if (_prometheusConfiguration.CurrentValue.Labels != null)
            {
                labels = LabelTransformer.TransformLabels(_prometheusConfiguration.CurrentValue.Labels.Transformation, labels);
            }

            var orderedLabels = labels.OrderBy(kvp => kvp.Key).ToDictionary(kvp => kvp.Key, kvp => kvp.Value);

            return(orderedLabels);
        }
Ejemplo n.º 10
0
        private Dictionary <string, string> DetermineLabels(PrometheusMetricDefinition metricDefinition, ScrapeResult scrapeResult, MeasuredMetric measuredMetric)
        {
            var labels = new Dictionary <string, string>(scrapeResult.Labels.Select(label => new KeyValuePair <string, string>(label.Key.SanitizeForPrometheusLabelKey(), label.Value)));

            if (measuredMetric.IsDimensional)
            {
                labels.Add(measuredMetric.DimensionName.SanitizeForPrometheusLabelKey(), measuredMetric.DimensionValue);
            }

            if (metricDefinition?.Labels?.Any() == true)
            {
                foreach (var customLabel in metricDefinition.Labels)
                {
                    var customLabelKey = customLabel.Key.SanitizeForPrometheusLabelKey();
                    if (labels.ContainsKey(customLabelKey))
                    {
                        _logger.LogWarning("Custom label {CustomLabelName} was already specified with value 'LabelValue' instead of 'CustomLabelValue'. Ignoring...", customLabel.Key, labels[customLabelKey], customLabel.Value);
                        continue;
                    }

                    labels.Add(customLabelKey, customLabel.Value);
                }
            }

            return(labels);
        }
Ejemplo n.º 11
0
        public async Task <ScrapeResult> ScrapeAsync(IScrapeSession session, CancellationToken cancellationToken = default)
        {
            var result  = new ScrapeResult();
            var options = _options.Value;

            _logger.LogInformation("Scraping yts movies from {fromDate}", session.ScrapeFrom);

            for (var page = 1; page < int.MaxValue; page++)
            {
                _logger.LogInformation("scraping page {page}", page);
                var request = new YtsListMoviesRequest
                {
                    Page    = page,
                    Limit   = 50,
                    OrderBy = "desc",
                    SortBy  = "date_added"
                };
                var response = await _client.ListMoviesAsync(request, cancellationToken);

                if (response.Movies is null)
                {
                    break;
                }

                var movies = session.ScrapeFrom.HasValue
                    ? response.Movies
                             .Where(x => x.DateUploaded > session.ScrapeFrom.Value)
                             .ToList()
                    : response.Movies;

                if (!movies.Any())
                {
                    break;
                }

                _logger.LogInformation("retrieved {movieCount} movies", movies.Count);

                var requests = _mapper.Map <ICollection <CreateMovieRequest> >(movies);

                await session.CreateMoviesAsync(requests);

                _logger.LogInformation("added {movieCount} movies", requests.Count);

                result.MovieCount   += requests.Count;
                result.TorrentCount += requests.Sum(x => x.Torrents?.Count ?? 0);

                if (movies.Count < request.Limit)
                {
                    break;
                }

                if (options.RemoteScrapeDelay > TimeSpan.Zero)
                {
                    await Task.Delay(options.RemoteScrapeDelay, cancellationToken);
                }
            }

            _logger.LogInformation("done");

            return(result);
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Gets all the professor and the courses from the database, and using
        /// populates the database using the name of the course and professor
        /// </summary>
        public void Update()
        {
            // Getting the list of courses from the database
            //var courses = _context.Courses.ToList();
            var courses = _context?.Courses.Include(c => c.Sections)
                          .ThenInclude(s => s.Professor)
                          .Include(c => c.Sections)
                          .ToList();

            // List of professor and courses that do not have cape review
            List <Tuple <string, string> > profcourseNotFound = new List <Tuple <string, string> >();

            // Keep track of number of course we scrape
            int delay = 0;

            // Iterate over all the courses we recieved from the database
            foreach (var course in courses)
            {
                // for each course list out all the sections that are present
                var sections = course.Sections.ToList();

                // Variable to store all the professors that are teaching one course
                List <string> listOfProfessors = new List <string>();

                // Iterate over all the sections for each course
                foreach (var section in sections)
                {
                    Professor currProfessor = section.Professor;
                    // Update the Cape review for each professor only one time
                    if (!listOfProfessors.Contains(currProfessor.Name))
                    {
                        listOfProfessors.Add(currProfessor.Name);

                        // Note: Cape has already been initialized in the database model
                        // as a empty list.

                        // Gets the CAPE page for the specific professor
                        string capePageURL = GenerateURL(currProfessor.Name, course.CourseAbbreviation);

                        // If there is cape review page for the specific professor and specific course
                        if (capePageURL.Length != 0)
                        {
                            ScrapeResult scrapedCapePage = InsertDataFromHtmlPage(capePageURL);
                            Cape         newCapeReview   = null;
                            // Boolean to keep track if we need to add the cape review to the DB or
                            // just update
                            bool addToDb = true;

                            // Check if we already have the cape for specific professor and course
                            foreach (Cape tempCape in course.Cape)
                            {
                                if (tempCape.Professor == currProfessor)
                                {
                                    newCapeReview = tempCape;
                                    addToDb       = false;
                                }
                            }

                            // If no cape found then make a new one
                            if (newCapeReview == null)
                            {
                                newCapeReview = new Cape();
                            }

                            if (scrapedCapePage != null)
                            {
                                // Updating the cape review result for professor object
                                newCapeReview.Term                 = scrapedCapePage.Term;
                                newCapeReview.StudentsEnrolled     = scrapedCapePage.StudentsEnrolled;
                                newCapeReview.NumberOfEvaluation   = scrapedCapePage.NumberOfEvaluation;
                                newCapeReview.RecommendedClass     = scrapedCapePage.RecommendedClass;
                                newCapeReview.RecommendedProfessor = scrapedCapePage.RecommendedProfessor;
                                newCapeReview.StudyHoursPerWeek    = scrapedCapePage.StudyHoursPerWeek;
                                newCapeReview.AverageGradeExpected = scrapedCapePage.AverageGradeExpected;
                                newCapeReview.AverageGradeReceived = scrapedCapePage.AverageGradeReceived;
                                newCapeReview.URL = capePageURL;

                                // Adding cape review to professor and cource object
                                if (addToDb)
                                {
                                    currProfessor.Cape.Add(newCapeReview);
                                    course.Cape.Add(newCapeReview);
                                }
                            }
                        }
                        else
                        {
                            // Used to check when and for which tuples did we not find cape reviews.
                            profcourseNotFound.Add(Tuple.Create(currProfessor.Name, course.CourseAbbreviation));
                        }
                    }
                }

                // After every 100 courses scraped, wait for 10 seconds
                delay++;
                if (delay % 100 == 0)
                {
                    System.Threading.Thread.Sleep(10000);
                    _context.SaveChanges();
                }
            }

            // Save Changes in the database
            _context.SaveChanges();
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Scrapes a single Cape page specified by the URL input parameter.
        /// Note that this works only for FA12 term to the present
        /// </summary>
        /// <returns>ScrapeResult objects containing the data</returns>
        /// <param name="Url">URL of single Cape page to scrape</param>
        public ScrapeResult InsertDataFromHtmlPage(string Url)
        {
            // Check for null or empty URL
            if (String.IsNullOrEmpty(Url))
            {
                throw new ArgumentNullException(Url);
            }

            // HTML read from url and assign into var htmlDoc
            HtmlWeb web     = new HtmlWeb();
            var     htmlDoc = web.Load(Url);

            // Get the course name
            HtmlNode crseNameNode      = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.CourseNamePath);
            string   fullScrapedCourse = (crseNameNode != null) ? crseNameNode.InnerText : "N/A";

            string[] splitCourse = fullScrapedCourse.Split(" ");

            // Cape is too old to scrape
            if (splitCourse.Length < 2)
            {
                return(null);
            }

            string courseName = splitCourse[0] + " " + splitCourse[1];

            // Check if we're looking at the right course scrape
            if (CourseName != courseName)
            {
                return(null);
            }

            // Gather professor and check for null return
            HtmlNode instrNameNode  = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.InstrNamePath);
            string   instructorName = (instrNameNode != null) ? instrNameNode.InnerText : "N/A";

            // Gather term and check for null return
            HtmlNode termNode = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.TermPath);
            string   term     = (termNode != null) ? termNode.InnerText : "N/A";

            // Gather enrollment and check for null return
            HtmlNode enrollmentNode = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.EnrollmentPath);
            string   enrollment     = (enrollmentNode != null) ? enrollmentNode.InnerText : "0";

            // Gather number of evalulations and check for null return
            HtmlNode evalsSubmittedNode = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.EvalsSubmittedPath);
            string   evalsSubmitted     = (enrollmentNode != null) ? evalsSubmittedNode.InnerText : "0";

            // Gather percentage of enrolled students that recommend course and check for null return
            HtmlNode recCourseRow = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.RecommendClassTblRow);

            if (recCourseRow == null)
            {
                return(null);
            }
            HtmlNode recCourseNode = recCourseRow.SelectSingleNode(CapeXPaths.TblRowToMean);
            string   recCourseMean = (recCourseNode != null) ? recCourseNode.InnerText : "0";

            // Gather percentage of enrolled students that recommend professor and check for null return
            HtmlNode recProfRow  = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.RecommendProfTblRow);
            HtmlNode recProfNode = recProfRow.SelectSingleNode(CapeXPaths.TblRowToMean);
            string   recProfMean = (recProfNode != null) ? recProfNode.InnerText : "0";

            // Gather average hours of studying per week and check for null return
            HtmlNode studyHoursRow  = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.HoursPerWeekTblRow);
            HtmlNode studyHoursNode = studyHoursRow.SelectSingleNode(CapeXPaths.TblRowToMean);
            string   studyHoursMean = (studyHoursNode != null) ? studyHoursNode.InnerText : "0";

            // Gather average grade expected, if valid only grabs letter grade portion of string
            string avgGradeExpected      = "";
            string avgGradeExpectedCheck = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.AvgGradeExpectedNode)?.InnerText;

            if (String.IsNullOrEmpty(avgGradeExpectedCheck))
            {
                // When cape doesn't have grade expected value
                avgGradeExpected = "0";
            }
            else
            {
                avgGradeExpected = (avgGradeExpectedCheck.Substring(avgGradeExpectedCheck.IndexOf("(")));
                avgGradeExpected = avgGradeExpected.Replace("(", "");
                avgGradeExpected = avgGradeExpected.Replace(")", "");
            }

            // Gather average grade received, if valid only grabs letter grade portion of string
            string avgGradeReceived      = "";
            string avgGradeReceivedCheck = htmlDoc.DocumentNode.SelectSingleNode(CapeXPaths.AvgGradeReceivedNode)?.InnerText;

            if (String.IsNullOrEmpty(avgGradeReceivedCheck))
            {
                // When cape doesn't have grade recieved value
                avgGradeReceived = ("0");
            }
            else
            {
                avgGradeReceived = (avgGradeReceivedCheck.Substring(avgGradeReceivedCheck.IndexOf('(')));
                avgGradeReceived = avgGradeReceived.Replace("(", "");
                avgGradeReceived = avgGradeReceived.Replace(")", "");
            }

            // Make a scrapeResult object to store the result
            ScrapeResult result = new ScrapeResult()
            {
                InstructorName       = instructorName,
                Term                 = term,
                StudentsEnrolled     = Convert.ToInt32(enrollment),
                NumberOfEvaluation   = Convert.ToInt32(evalsSubmitted),
                RecommendedClass     = Convert.ToDecimal(recCourseMean),
                RecommendedProfessor = Convert.ToDecimal(recProfMean),
                StudyHoursPerWeek    = Convert.ToDecimal(studyHoursMean),
                AverageGradeExpected = Convert.ToDecimal(avgGradeExpected),
                AverageGradeReceived = Convert.ToDecimal(avgGradeReceived)
            };

            return(result);
        }
Ejemplo n.º 14
0
        public void ProcessRequest(HttpContext context)
        {
            ScrapeResult scrapeResult = new ScrapeResult(context.Request);

            scrapeResult.Save(context.Server.MapPath("~/results/" + scrapeResult.FileName));
        }