private IList <UpcomingSatellitePass> Scrape(string site) { var upcomingPassFileInfo = _fileProvider.GetFileInfo(site == "" ? "/upcoming_passes.txt" : $"/{site}/upcoming_passes.txt"); if (!upcomingPassFileInfo.Exists) { _logger.LogWarning("Upcoming Passes file does not exist!"); return(new List <UpcomingSatellitePass>()); } var currentLastModified = upcomingPassFileInfo.LastModified; if (currentLastModified != DateTimeOffset.MinValue && _lastModifiedBySite.TryGetValue(site, out var savedLastModified) && currentLastModified == savedLastModified) { _logger.LogInformation("Upcoming passes file has not been modified"); return(null); } _lastModifiedBySite[site] = currentLastModified; var toReturn = new List <UpcomingSatellitePass>(); using (var sr = new StreamReader(upcomingPassFileInfo.CreateReadStream(), Encoding.UTF8)) { string line; while ((line = sr.ReadLine()) != null) { if (string.IsNullOrWhiteSpace(line)) { continue; } var splitLine = line.Split(','); if (splitLine.Length != 7) { _logger.LogWarning("Invalid Line: {LineContent}", splitLine); continue; } toReturn.Add(new UpcomingSatellitePass { Site = site, StartTime = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(int.Parse(splitLine[0], CultureInfo.InvariantCulture)), EndTime = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(int.Parse(splitLine[1], CultureInfo.InvariantCulture)), SatelliteName = splitLine[4].Replace(" ", ""), MaxElevation = int.Parse(splitLine[2], CultureInfo.InvariantCulture) }); } } return(toReturn); }
private void Scrape(CancellationToken cancellationToken, string site) { try { _logger.LogInformation("starting pass scrape for site {Site}", site); var sw = Stopwatch.StartNew(); var existingPasses = _satellitePassRepository.Get().Select(x => x.FileKey).ToHashSet(); var baseUrl = site == "" ? "" : ("/" + site); var yearsDir = _fileProvider.GetDirectoryContents($"{baseUrl}/meta"); foreach (var year in yearsDir.Where(x => x.IsDirectory).Select(x => x.Name).OrderBy(x => x)) { if (cancellationToken.IsCancellationRequested) { break; } var monthsDir = _fileProvider.GetDirectoryContents($"{baseUrl}/meta/{year}"); foreach (var month in monthsDir.Where(x => x.IsDirectory).Select(x => x.Name).OrderBy(x => x)) { if (cancellationToken.IsCancellationRequested) { break; } var monthDir = _fileProvider.GetDirectoryContents($"{baseUrl}/meta/{year}/{month}"); var monthImagesDir = _fileProvider.GetDirectoryContents($"{baseUrl}/images/{year}/{month}"); _logger.LogInformation("scraping {ScrapeMonth}", $"{year}-{month}"); foreach (var metaFileInfo in monthDir.OrderBy(x => x.Name)) { if (cancellationToken.IsCancellationRequested) { break; } var fileKey = Path.GetFileNameWithoutExtension(metaFileInfo.Name); if (existingPasses.Contains(fileKey) || _invalidMetaPasses.Contains(GetUniquePassKey(site, fileKey))) { continue; } _logger.LogInformation("scraping {FileKey}", fileKey); var startTimeStr = fileKey.Substring(0, 15); var startTime = DateTime.ParseExact(startTimeStr, "yyyyMMdd-HHmmss", CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal); var imageDir = $"{baseUrl}/images/{year}/{month}"; var rawImage = _fileProvider.GetFileInfo($"{imageDir}/{fileKey}-RAW.png"); if (!rawImage.Exists) { _logger.LogInformation("no raw image for {FileKey}", fileKey); continue; } var satName = fileKey.Substring(16); string metaData; using (var sr = new StreamReader(metaFileInfo.CreateReadStream())) { metaData = sr.ReadToEnd(); } var endTimeMatch = Regex.Match(metaData, @"^END_TIME=(.*)$", RegexOptions.Multiline); DateTime?endTime = null; if (endTimeMatch.Success) { endTime = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(double.Parse(endTimeMatch.Groups[1].Value)); } Match channelAMatch = Regex.Match(metaData, @"^CHAN_A=Channel A: (.*) \(.*\)$", RegexOptions.Multiline); Match channelBMatch = Regex.Match(metaData, @"^CHAN_B=Channel B: (.*) \(.*\)$", RegexOptions.Multiline); Match gainMatch = Regex.Match(metaData, @"^GAIN=Gain: (.*)$", RegexOptions.Multiline); Match maxElevMatch = Regex.Match(metaData, @"^MAXELEV=(.*)$", RegexOptions.Multiline); if (!channelAMatch.Success || !channelBMatch.Success || !gainMatch.Success || !double.TryParse(gainMatch.Groups[1].Value, NumberStyles.Float, CultureInfo.InvariantCulture, out var gainRaw) || !maxElevMatch.Success || !int.TryParse(maxElevMatch.Groups[1].Value, NumberStyles.Integer, CultureInfo.InvariantCulture, out var maxElev)) { _logger.LogInformation("metadata invalid for {FileKey}", fileKey); _invalidMetaPasses.Add(GetUniquePassKey(site, fileKey)); continue; } var channelA = channelAMatch.Groups[1].Value; var channelB = channelBMatch.Groups[1].Value; var gain = -gainRaw; var enhancementTypes = EnhancementTypes.None; if (new[] { channelA, channelB }.Any(x => x == "4") && new[] { channelA, channelB }.Any(x => x == "1" || x == "2")) { enhancementTypes |= EnhancementTypes.Msa; } else { if (monthImagesDir.Any(x => x.Name == $"{fileKey}-MSA.png")) { _fileProvider.DeleteFile($"{imageDir}/{fileKey}-MSA.png"); } if (monthImagesDir.Any(x => x.Name == $"{fileKey}-MSA-merc.png")) { _fileProvider.DeleteFile($"{imageDir}/{fileKey}-MSA-merc.png"); } if (monthImagesDir.Any(x => x.Name == $"{fileKey}-MSA-stereo.png")) { _fileProvider.DeleteFile($"{imageDir}/{fileKey}-MSA-stereo.png"); } } if (new[] { channelA, channelB }.Any(x => x == "4")) { enhancementTypes |= EnhancementTypes.Mcir; enhancementTypes |= EnhancementTypes.Therm; enhancementTypes |= EnhancementTypes.Za; enhancementTypes |= EnhancementTypes.No; } else { if (monthImagesDir.Any(x => x.Name == $"{fileKey}-MCIR.png")) { _fileProvider.DeleteFile($"{imageDir}/{fileKey}-MCIR.png"); } if (monthImagesDir.Any(x => x.Name == $"{fileKey}-THERM.png")) { _fileProvider.DeleteFile($"{imageDir}/{fileKey}-THERM.png"); } if (monthImagesDir.Any(x => x.Name == $"{fileKey}-ZA.png")) { _fileProvider.DeleteFile($"{imageDir}/{fileKey}-ZA.png"); } if (monthImagesDir.Any(x => x.Name == $"{fileKey}-NO.png")) { _fileProvider.DeleteFile($"{imageDir}/{fileKey}-NO.png"); } if (monthImagesDir.Any(x => x.Name == $"{fileKey}-THERM-merc.png")) { _fileProvider.DeleteFile($"{imageDir}/{fileKey}-THERM-merc.png"); } if (monthImagesDir.Any(x => x.Name == $"{fileKey}-THERM-stereo.png")) { _fileProvider.DeleteFile($"{imageDir}/{fileKey}-THERM-stereo.png"); } } var projectionTypes = ProjectionTypes.None; if (enhancementTypes.HasFlag(EnhancementTypes.Msa) && monthImagesDir.Any(x => x.Name == $"{fileKey}-MSA-merc.png")) { projectionTypes |= ProjectionTypes.MsaMercator; } if (enhancementTypes.HasFlag(EnhancementTypes.Msa) && monthImagesDir.Any(x => x.Name == $"{fileKey}-MSA-stereo.png")) { projectionTypes |= ProjectionTypes.MsaStereographic; } if (enhancementTypes.HasFlag(EnhancementTypes.Therm) && monthImagesDir.Any(x => x.Name == $"{fileKey}-THERM-merc.png")) { projectionTypes |= ProjectionTypes.ThermMercator; } if (enhancementTypes.HasFlag(EnhancementTypes.Therm) && monthImagesDir.Any(x => x.Name == $"{fileKey}-THERM-stereo.png")) { projectionTypes |= ProjectionTypes.ThermStereographic; } var toInsert = new SatellitePass { Site = site, ImageDir = imageDir, FileKey = fileKey, StartTime = startTime, EndTime = endTime, SatelliteName = satName, ChannelA = channelA, ChannelB = channelB, Gain = gain, MaxElevation = maxElev, EnhancementTypes = enhancementTypes, ProjectionTypes = projectionTypes }; IFileInfo thumbnailSource = null; string thumbnailEnhancementType = null; if (enhancementTypes.HasFlag(EnhancementTypes.Msa)) { var msaImage = _fileProvider.GetFileInfo($"{imageDir}/{fileKey}-MSA.png"); if (msaImage.Exists) { thumbnailSource = msaImage; thumbnailEnhancementType = "MSA"; } } if (thumbnailSource == null) { thumbnailSource = rawImage; thumbnailEnhancementType = "RAW"; } using (var imageStream = thumbnailSource.CreateReadStream()) { toInsert.ThumbnailUri = GetThumbnail(imageStream); toInsert.ThumbnailEnhancementType = thumbnailEnhancementType; } _satellitePassRepository.Insert(toInsert); _passCounter.WithLabels(satName).Inc(); if (endTime.HasValue) { _passDurationCounter.WithLabels(satName).Inc((endTime.Value - startTime).TotalSeconds); } _logger.LogInformation("{FileKey} successfully scraped", fileKey); } } } sw.Stop(); _scrapeCounter.WithLabels("success").Inc(); _scrapeDurationCounter.Inc(sw.Elapsed.TotalSeconds); } catch (Exception ex) { _logger.LogError(ex, "Error while scraping!"); _scrapeCounter.WithLabels("error").Inc(); } _logger.LogInformation("scrape done!"); }