public async Task <APIGatewayProxyResponse> GetArchiveAlbums(APIGatewayProxyRequest request, ILambdaContext context) { var logger = context.Logger; logger.LogLine($"request {JsonConvert.SerializeObject(request)}"); logger.LogLine($"context {JsonConvert.SerializeObject(context)}"); var archiveTablename = Environment.GetEnvironmentVariable("ARCHIVE_TABLE"); logger.LogLine($"archiveTablename {archiveTablename}"); var client = new AmazonDynamoDBClient(); var table = Table.LoadTable(client, archiveTablename); var db_request = new QueryRequest { TableName = archiveTablename, IndexName = "gsiAlbums", KeyConditionExpression = "sort_key = :v_Id", ExpressionAttributeValues = new Dictionary <string, AttributeValue> { { ":v_Id", new AttributeValue { S = "alb" } } } }; var albumList = new List <ArchiveAlbum>(); var result = await client.QueryAsync(db_request); result.Items.ForEach(item => { var album = new ArchiveAlbum { Name = item["name"].S, Owner = item["owner"].S, Year = Convert.ToInt32(item["year"].N), Id = new Guid(item["id"].S), DateCreated = item["datecreated"].S, DateArchived = item["date_archived"].S }; albumList.Add(album); }); return(new APIGatewayProxyResponse { StatusCode = 200, Headers = new Dictionary <string, string> () { { "Access-Control-Allow-Origin", "*" }, { "Access-Control-Allow-Credentials", "true" } }, Body = JsonConvert.SerializeObject(albumList) }); }
public static IEnumerable <ArchiveFile> ScrapeArchiveFiles( ArchiveAlbum archiveAlbum) { var context = BrowsingContext .New( Configuration .Default .WithDefaultLoader()); using (var document = context .OpenAsync(archiveAlbum.AlbumFileContentsUrl) .GetAwaiter() .GetResult()) { var maincontent = document .GetElementById("maincontent"); var container = maincontent .GetElementsByClassName("container-ia") .First(); var directoryListing = container .GetElementsByClassName("download-directory-listing") .First(); var tbody = directoryListing .GetElementsByTagName("tbody") .First(); var fileNodeList = tbody .GetElementsByTagName("tr") .Skip(1); foreach (var fileNode in fileNodeList) { var fileLinkElement = fileNode .GetElementsByTagName("td") .First() .GetElementsByTagName("a") .First(); var fileLinkPath = fileLinkElement .GetAttribute("href"); var fileTitle = fileLinkElement .TextContent; var fileDate = fileNode .GetElementsByTagName("td") .Skip(1) .First() .TextContent; var fileSize = fileNode .GetElementsByTagName("td") .Skip(2) .First() .TextContent; var archiveFileType = DetermineArchiveFileType(fileTitle); var show = DetermineArchiveFileShow(fileTitle, out var showKey); if (showKey == null) { continue; } var postShowStr = fileTitle.Replace(showKey, ""); var airDate = DetermineArchiveFileAirDate( postShowStr) .GetValueOrDefault(); var approximateBytes = DetermineArchiveFileSizeBytes(fileSize); if (!DateTime.TryParseExact( fileDate, "dd-MMM-yyyy ss:mm", DateTimeFormatInfo.CurrentInfo, DateTimeStyles.None, out var lastModifiedDate)) { throw new FormatException( $"Cannot parse dateTime from string {fileDate.Quote()}."); } yield return(new ArchiveFile( fileLinkPath, archiveFileType, show, archiveAlbum, fileLinkPath, $"{archiveAlbum.AlbumFileContentsUrl}{fileLinkPath}", airDate, fileTitle, lastModifiedDate, approximateBytes)); } } }
public override void Execute(string arg) { XConsole.WriteLine($"Seeding archive.org database shows to ArchiveFiles table.", Swatch.Cyan); using var context = new CoreContext(); XConsole.WriteLine($" Querying Opie and Anthony shows...", Swatch.Cyan); var archiveApi = new InternetArchiveAPI(); var queryBuilder = IAQueryBuilder .Builder .WithUploader("*****@*****.**") .FromCreator("Opie and Anthony") .WithSort( IAQueryFields.Title, IASortDirection.Ascending) .WithFields( IAQueryFields.Creator, IAQueryFields.Date, IAQueryFields.Identifier, IAQueryFields.Title) .WithRows(10000) .WithOutputKind(APIDataOutputKind.JSON) .WithCallback("callback") .WithShouldSave(true); var regex = new Regex( $"O&A-(?<year>[0-9]*)-(?<month>[0-9]*)"); var fullShowRegex = new Regex( $"O&A-(?<year>[0-9]*)-(?<month>[0-9]*)-(?<day>[0-9]*)"); foreach (var archiveItem in archiveApi.Query(queryBuilder)) { var match = regex.Match(archiveItem.Title); var monthStr = match.Groups["month"] .Value; var yearStr = match.Groups["year"] .Value; var month = int.Parse(monthStr); var year = int.Parse(yearStr); var archiveAlbum = new ArchiveAlbum( archiveItem.Identifier, ContentCreator.Opie_and_Anthony, archiveItem.Description, archiveItem.UploadDate, year, month); context.ArchiveAlbums.Add(archiveAlbum); XConsole.WriteLine( $"Item: {archiveItem.Identifier} - {archiveItem.Title}", Swatch.Teal); foreach (var file in archiveItem.GetItemFiles()) { //if (file.FileName.EndsWith(".torrent")) //{ // var decodedTorrentFileName = file.FileName.UrlDecode(); // var targetTorrentUrl = // $"https://archive.org/download/{archiveItem.Identifier}/{decodedTorrentFileName}"; // Console.WriteLine($"Complete."); //} if (!file.FileName.EndsWith(".mp3")) { continue; } var decodedFileName = file.FileName.UrlDecode(); var fullShowMatch = fullShowRegex.Match(decodedFileName); var fullShowYearStr = fullShowMatch.Groups["year"] .Value; var fullShowMonthStr = fullShowMatch.Groups["month"] .Value; var fullShowDayStr = fullShowMatch.Groups["day"] .Value; if (!int.TryParse(fullShowYearStr, out var fullShowYear)) { XConsole.WriteLine( $"\t\tERROR: Cannot parse year int from string {fullShowYearStr.Quote()} " + $"for input {decodedFileName.Quote()}", Swatch.Red); continue; } if (!int.TryParse(fullShowMonthStr, out var fullShowMonth)) { XConsole.WriteLine( $"\t\tERROR: Cannot parse month int from string {fullShowMonthStr.Quote()} " + $"for input {decodedFileName.Quote()}", Swatch.Red); continue; } if (!int.TryParse(fullShowDayStr, out var fullShowDay)) { XConsole.WriteLine( $"\t\tERROR: Cannot parse day int from string {fullShowDayStr.Quote()} " + $"for input {decodedFileName.Quote()}", Swatch.Red); continue; } DateTime showAirDate; try { showAirDate = new DateTime( fullShowYear, fullShowMonth, fullShowDay); } catch { XConsole.WriteLine( $"\t\tERROR: {fullShowYear}-{fullShowMonth}-{fullShowDay} does not represent a valid " + $"DateTime. Defaulting to DateTime.MinValue", Swatch.Red); showAirDate = DateTime.MinValue; } var targetUrl = $"https://archive.org/download/{archiveItem.Identifier}/{decodedFileName}"; context.ArchiveFiles.Add( new ArchiveFile( file.FileName, ArchiveFileTypeInfo.MP3, Show.OpieAndAnthonyShow, archiveAlbum, archiveItem.Identifier, targetUrl, showAirDate, archiveItem.Title, archiveItem.UploadDate, -1)); XConsole .Write($"Adding file: ", Swatch.Cyan) .WriteLine($"{targetUrl}", Swatch.Pink); } } XConsole .Write($"Saving to database...", Swatch.Cyan); context.SaveChanges(); XConsole .WriteLine($"Complete", Swatch.Teal); }