public static InternetArchiveFile GetMiddleAssociatedThumbnailFile( InternetArchiveItem iaItem, InternetArchiveFile iaFile) { var associatedThumbnailFiles = GetAssociatedThumbnailFiles(iaItem, iaFile) .ToArray(); var centerIndex = (int)Math.Floor(associatedThumbnailFiles.Length / 2d); return(associatedThumbnailFiles[centerIndex]); }
public static IEnumerable <InternetArchiveFile> GetAssociatedThumbnailFiles( InternetArchiveItem iaItem, InternetArchiveFile iaFile) { var thumbnailFiles = ArchiveFileInterpreter.ScrapeArchiveThumbnailFiles(iaItem); foreach (var thumbnailFile in thumbnailFiles) { var thumbnailFileName = thumbnailFile.Title.RemoveFileExtension(); var videoFileName = iaFile.Title.RemoveFileExtension(); if (thumbnailFileName.StartsWith(videoFileName)) { yield return(thumbnailFile); } } }
public static IEnumerable <InternetArchiveFile> ScrapeArchiveFiles( InternetArchiveItem internetArchiveItem) { var context = BrowsingContext.New( Configuration.Default.WithDefaultLoader()); var downloadPageUrl = internetArchiveItem.GetItemDownloadPageUrl(); using (var document = context .OpenAsync(downloadPageUrl) .GetAwaiter() .GetResult()) { var maincontent = document .GetElementById("maincontent"); var container = maincontent .GetElementsByClassName("container-ia") .First(); var directoryListing = container .GetElementsByClassName("download-directory-listing") .First(); var tbody = directoryListing .GetElementsByTagName("tbody") .First(); var fileNodeList = tbody .GetElementsByTagName("tr") .Skip(1); var itemIndex = 0; foreach (var fileNode in fileNodeList) { var fileLinkElement = fileNode .GetElementsByTagName("td") .First() .GetElementsByTagName("a") .First(); var fileLinkPath = fileLinkElement .GetAttribute("href"); var fileTitle = fileLinkElement .TextContent; var fileDate = fileNode .GetElementsByTagName("td") .Skip(1) .First() .TextContent; var fileSize = fileNode .GetElementsByTagName("td") .Skip(2) .First() .TextContent; var fileKind = DetermineIAFileKind(fileTitle); //var airDate = DetermineArchiveFileAirDate( // postShowStr) // .GetValueOrDefault(); var approximateBytes = DetermineArchiveFileSizeBytes(fileSize); if (!DateTime.TryParseExact( fileDate, "dd-MMM-yyyy ss:mm", DateTimeFormatInfo.CurrentInfo, DateTimeStyles.None, out var lastModifiedDate)) { throw new FormatException( $"Cannot parse dateTime from string {fileDate.Quote()}."); } yield return(new InternetArchiveFile( internetArchiveItem, fileLinkPath, fileKind, fileTitle, lastModifiedDate, approximateBytes, itemIndex)); itemIndex++; } } }