public static void GenerateCSTagDeclarationsFromXml() { var addedTags = new List <WPTerm>(); addedTags.AddRange(WPTerm.AllTags); var archiveApi = new InternetArchiveAPI(); var queryBuilder = InternetArchiveQueryBuilder .Builder .WithUploader("Dr. E. Michael Jones") .WithSort( IAQueryFields.Title, IASortDirection.Ascending) .WithFields( IAQueryFields.Creator, IAQueryFields.Date, IAQueryFields.Description, IAQueryFields.Identifier, IAQueryFields.MediaType, IAQueryFields.Title) .WithRows(5000) .WithOutputKind(APIDataOutputKind.JSON) .WithCallback("callback") .WithShouldSave(true); var index = addedTags.Count; foreach (var archiveItem in archiveApi.Query(queryBuilder)) { foreach (var file in archiveItem.GetItemFiles()) { if (!file.FileName.EndsWith(".mp4")) { continue; } var uploadIndex = archiveItem.Identifier.Replace("emj-archive-", ""); var decodedFileName = file.FileName.UrlDecode(); var localJsonFilePath = JsonMetadataFileAssociator .GetAssociatedJsonFile(uploadIndex, file.FileName, out var matchedDistance); var jsonResultMetadata = JsonYouTubeMetadataParser .ParseJsonYouTubeMetadata(localJsonFilePath); var flattenedMetadata = new YouTubeVideoFlattenedMetadata(jsonResultMetadata); if (flattenedMetadata.TagList != null) { foreach (var tagText in flattenedMetadata.TagList) { var wpTerm = new WPTerm(index, tagText); var existing = addedTags.Any(t => t.TagFriendlyName == wpTerm.TagFriendlyName); var existingCsharpID = addedTags.Any(t => t.TagName == wpTerm.TagName); if (!existing && !existingCsharpID) { addedTags.Add(wpTerm); Console.WriteLine($"public static readonly WPTerm {wpTerm.TagName} = new WPTerm({wpTerm.WPTermID}, {wpTerm.TagName.Quote()}, {wpTerm.TagFriendlyName.Quote()}, {wpTerm.HtmlEncodedTagName.Quote()};"); Console.WriteLine(); // Console.WriteLine($" <term ID=\"{wpTag.CultureWarsTagID}\" Name={wpTag.TagName.Quote()} FriendlyName={wpTag.TagFriendlyName.Quote()} HtmlEncoded={wpTag.HtmlEncodedTagName.Quote()}\\>"); index++; } else { Console.WriteLine($"//public static readonly WPTerm {wpTerm.TagName} = new WPTerm(-1, {wpTerm.TagName.Quote()}, {wpTerm.TagFriendlyName.Quote()}, {wpTerm.HtmlEncodedTagName.Quote()};"); Console.WriteLine(); } } } } } }
public static IEnumerable <WPPostItem> GetWPPosts() { Console.WriteLine($"Generating WordPress Posts..."); Console.WriteLine(); var archiveApi = new InternetArchiveAPI(); var queryBuilder = InternetArchiveQueryBuilder .Builder .WithUploader("Dr. E. Michael Jones") .WithSort( IAQueryFields.Title, IASortDirection.Ascending) .WithFields( IAQueryFields.Creator, IAQueryFields.Date, IAQueryFields.Description, IAQueryFields.Identifier, IAQueryFields.MediaType, IAQueryFields.Title) .WithRows(5000) .WithOutputKind(APIDataOutputKind.JSON) .WithCallback("callback") .WithShouldSave(true); //var addedTags = new List<WPTerm>(); //addedTags.AddRange(WPTerm.AllTags); //foreach (var cwTag in WPTerm.AllTags) //{ // var wpTerm = new Term( // (ulong)cwTag.CultureWarsTagID, // cwTag.TagFriendlyName, // cwTag.HtmlEncodedTagName); // Console.WriteLine($" <term ID=\"{cwTag.CultureWarsTagID}\" Name={cwTag.TagName.Quote()} FriendlyName={cwTag.TagFriendlyName.Quote()} HtmlEncoded={cwTag.HtmlEncodedTagName.Quote()}\\>"); // // context.Terms.Add(wpTerm); //} //var index = 87ul; var thumbnailIndex = 1; foreach (var archiveItem in archiveApi.Query(queryBuilder)) { foreach (var file in archiveItem.GetItemFiles()) { if (!file.FileName.EndsWith(".mp4")) { continue; } var uploadIndex = archiveItem.Identifier.Replace("emj-archive-", ""); var decodedFileName = file.FileName.UrlDecode(); var localJsonFilePath = JsonMetadataFileAssociator .GetAssociatedJsonFile(uploadIndex, file.FileName, out var matchedDistance); var jsonResultMetadata = JsonYouTubeMetadataParser .ParseJsonYouTubeMetadata(localJsonFilePath); var flattenedMetadata = new YouTubeVideoFlattenedMetadata(jsonResultMetadata); var encodedFileNameCharArray = file .Title .ToLower() .Replace(" ", "-") .Where(t => t.IsLetterOrDigit() || t == '-') .ToArray(); var videoThumbnail = AlternativeThumbnailAssociator.GetPrimaryAssociatedThumbnailFileUrl(file); var thumbnailUrl = videoThumbnail; var encodedFileName = new string(encodedFileNameCharArray); var linkPath = $@"/videos/{encodedFileName}"; var terms = new List <WPTerm>(); if (flattenedMetadata.TagList != null) { foreach (var tag in flattenedMetadata.TagList) { var titleCase = tag.ToTitleCase(); var term = WPTerm.FromFriendlyNameOrNull(titleCase); if (term != null) { terms.Add(term); } else { } } } var postItem = WPPostItem .Builder .WithPostID(thumbnailIndex) .WithPostName(encodedFileName) .WithPostTitle(flattenedMetadata.Title) .WithPostLink(linkPath) .WithPostStatus(WPStatus.Publish) .WithAuthor(WPAuthor.EMichaelJones) .WithPublicationDate(flattenedMetadata.VideoPublishedDate) .WithPostDate(flattenedMetadata.VideoPublishedDate) .WithPostDateGmt(flattenedMetadata.VideoPublishedDate) .WithCategories( WPCategory.CensoredVideos) .WithTerms(terms.ToArray()) .WithPostComments(new List <WPPostComment>()) .WithPostContent( $@"<iframe src=""https://archive.org/download/{archiveItem.Identifier}/{file.FileName}"" width=""640"" height=""480"" frameborder=""0"" webkitallowfullscreen=""true"" mozallowfullscreen=""true"" allowfullscreen=""""></iframe>") .WithPostExcerpt(flattenedMetadata.Description) .WithPostThumbnailId(thumbnailIndex + 1) .Build(); var thumbnailItem = WPAttachmentItem .Builder .WithPostID(thumbnailIndex + 1) .WithPostName($"") .WithPostLink($"") .WithPostTitle($"") .WithAttachmentUrl($"{videoThumbnail}") .WithStatus("inherit") .WithAuthor(WPAuthor.EMichaelJones) .WithPublicationDate(flattenedMetadata.VideoPublishedDate) .WithPostDate(flattenedMetadata.VideoPublishedDate) .WithPostDateGmt(flattenedMetadata.VideoPublishedDate) .WithPostContent($"") .WithPostExcerpt($"") .Build(); yield return(postItem); thumbnailIndex += 2; //if (flattenedMetadata.TagList != null) //{ // foreach (var tagText in flattenedMetadata.TagList) // { // var titleCase = tagText.ToTitleCase(); // var existing = addedTags.Any(t => t.TagFriendlyName == titleCase); // if (!existing) // { // var cwTag = new WPTerm((int)index, titleCase); // var wpTerm = new Term( // index, // cwTag.TagFriendlyName, // cwTag.HtmlEncodedTagName); // context.Terms.Add(wpTerm); // Console.WriteLine($" <term ID=\"{cwTag.CultureWarsTagID}\" Name={cwTag.TagName.Quote()} FriendlyName={cwTag.TagFriendlyName.Quote()} HtmlEncoded={cwTag.HtmlEncodedTagName.Quote()}\\>"); // index++; // } // } //} } Console.WriteLine($"Generation complete."); // context.SaveChanges(); Console.WriteLine($"saved to sql"); } }
public override void Execute(string arg) { XConsole.WriteLine($"Seeding archive.org database shows to ArchiveFiles table.", Swatch.Cyan); using var context = new CoreContext(); XConsole.WriteLine($" Querying Opie and Anthony shows...", Swatch.Cyan); var archiveApi = new InternetArchiveAPI(); var queryBuilder = IAQueryBuilder .Builder .WithUploader("*****@*****.**") .FromCreator("Opie and Anthony") .WithSort( IAQueryFields.Title, IASortDirection.Ascending) .WithFields( IAQueryFields.Creator, IAQueryFields.Date, IAQueryFields.Identifier, IAQueryFields.Title) .WithRows(10000) .WithOutputKind(APIDataOutputKind.JSON) .WithCallback("callback") .WithShouldSave(true); var regex = new Regex( $"O&A-(?<year>[0-9]*)-(?<month>[0-9]*)"); var fullShowRegex = new Regex( $"O&A-(?<year>[0-9]*)-(?<month>[0-9]*)-(?<day>[0-9]*)"); foreach (var archiveItem in archiveApi.Query(queryBuilder)) { var match = regex.Match(archiveItem.Title); var monthStr = match.Groups["month"] .Value; var yearStr = match.Groups["year"] .Value; var month = int.Parse(monthStr); var year = int.Parse(yearStr); var archiveAlbum = new ArchiveAlbum( archiveItem.Identifier, ContentCreator.Opie_and_Anthony, archiveItem.Description, archiveItem.UploadDate, year, month); context.ArchiveAlbums.Add(archiveAlbum); XConsole.WriteLine( $"Item: {archiveItem.Identifier} - {archiveItem.Title}", Swatch.Teal); foreach (var file in archiveItem.GetItemFiles()) { //if (file.FileName.EndsWith(".torrent")) //{ // var decodedTorrentFileName = file.FileName.UrlDecode(); // var targetTorrentUrl = // $"https://archive.org/download/{archiveItem.Identifier}/{decodedTorrentFileName}"; // Console.WriteLine($"Complete."); //} if (!file.FileName.EndsWith(".mp3")) { continue; } var decodedFileName = file.FileName.UrlDecode(); var fullShowMatch = fullShowRegex.Match(decodedFileName); var fullShowYearStr = fullShowMatch.Groups["year"] .Value; var fullShowMonthStr = fullShowMatch.Groups["month"] .Value; var fullShowDayStr = fullShowMatch.Groups["day"] .Value; if (!int.TryParse(fullShowYearStr, out var fullShowYear)) { XConsole.WriteLine( $"\t\tERROR: Cannot parse year int from string {fullShowYearStr.Quote()} " + $"for input {decodedFileName.Quote()}", Swatch.Red); continue; } if (!int.TryParse(fullShowMonthStr, out var fullShowMonth)) { XConsole.WriteLine( $"\t\tERROR: Cannot parse month int from string {fullShowMonthStr.Quote()} " + $"for input {decodedFileName.Quote()}", Swatch.Red); continue; } if (!int.TryParse(fullShowDayStr, out var fullShowDay)) { XConsole.WriteLine( $"\t\tERROR: Cannot parse day int from string {fullShowDayStr.Quote()} " + $"for input {decodedFileName.Quote()}", Swatch.Red); continue; } DateTime showAirDate; try { showAirDate = new DateTime( fullShowYear, fullShowMonth, fullShowDay); } catch { XConsole.WriteLine( $"\t\tERROR: {fullShowYear}-{fullShowMonth}-{fullShowDay} does not represent a valid " + $"DateTime. Defaulting to DateTime.MinValue", Swatch.Red); showAirDate = DateTime.MinValue; } var targetUrl = $"https://archive.org/download/{archiveItem.Identifier}/{decodedFileName}"; context.ArchiveFiles.Add( new ArchiveFile( file.FileName, ArchiveFileTypeInfo.MP3, Show.OpieAndAnthonyShow, archiveAlbum, archiveItem.Identifier, targetUrl, showAirDate, archiveItem.Title, archiveItem.UploadDate, -1)); XConsole .Write($"Adding file: ", Swatch.Cyan) .WriteLine($"{targetUrl}", Swatch.Pink); } } XConsole .Write($"Saving to database...", Swatch.Cyan); context.SaveChanges(); XConsole .WriteLine($"Complete", Swatch.Teal); }