private async Task _ReOrderPoemRecitationsAsync(RMuseumDbContext context, int poemId, bool update = true) { var recitations = await context.Recitations .Where(r => r.ReviewStatus == AudioReviewStatus.Approved && r.GanjoorPostId == poemId) .OrderBy(r => r.Id) //this causes the oldest recirations to become the first one .ToListAsync(); List <RecitationOrderingViewModel> scores = new List <RecitationOrderingViewModel>(); for (var i = 0; i < recitations.Count; i++) { var recitation = recitations[i]; RecitationOrderingViewModel score = new RecitationOrderingViewModel() { RecitationId = recitation.Id, EarlynessAdvantage = recitations.Count - 1 - i, UpVotes = await context.RecitationUserUpVotes.AsNoTracking().Where(r => r.RecitationId == recitation.Id && r.UserId != recitation.OwnerId) .CountAsync(), Mistakes = await context.RecitationApprovedMistakes.AsNoTracking().Where(m => m.RecitationId == recitation.Id).SumAsync(m => m.NumberOfLinesAffected) }; score.TotalScores = score.EarlynessAdvantage + score.UpVotes - (5 * score.Mistakes); //audio order is used as a temporary variable in the following line and soon is get replaced by computed value recitation.AudioOrder = score.TotalScores; scores.Add(score); } recitations.Sort((a, b) => b.AudioOrder.CompareTo(a.AudioOrder)); for (var i = 0; i < recitations.Count; i++) { recitations[i].AudioOrder = i + 1; scores.Where(s => s.RecitationId == recitations[i].Id).Single().ComputedOrder = i + 1; if (update) { context.Update(recitations[i]); } } if (update) { await context.SaveChangesAsync(); } }
/// <summary> /// examine comments for long links /// </summary> /// <returns></returns> public RServiceResult <bool> FindAndFixLongUrlsInComments() { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) //this is long running job, so _context might be already been freed/collected by GC { LongRunningJobProgressServiceEF jobProgressServiceEF = new LongRunningJobProgressServiceEF(context); var job = (await jobProgressServiceEF.NewJob("FindAndFixLongUrlsInComments", "Query data")).Result; try { var comments = await context.GanjoorComments.Where(c => c.HtmlComment.Contains("href=")).ToArrayAsync(); await jobProgressServiceEF.UpdateJob(job.Id, 0, $"Examining {comments.Length} Comments"); int percent = 0; for (int i = 0; i < comments.Length; i++) { if (i * 100 / comments.Length > percent) { percent++; await jobProgressServiceEF.UpdateJob(job.Id, percent); } var comment = comments[i]; string commentText = await _ProcessCommentHtml(comment.HtmlComment, context); if (commentText != comment.HtmlComment) { comment.HtmlComment = commentText; context.Update(comment); await context.SaveChangesAsync(); } } await jobProgressServiceEF.UpdateJob(job.Id, 100, "", true); } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, 100, "", false, exp.ToString()); } } } ); return(new RServiceResult <bool>(true)); }
/// <summary> /// import from server folder /// </summary> /// <param name="folderPath">C:\Tools\batches\florence</param> /// <param name="friendlyUrl">shahname-florence</param> /// <param name="srcUrl">https://t.me/dr_khatibi_abolfazl/888</param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromServerFolder(string folderPath, string friendlyUrl, string srcUrl) { try { if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.ServerFolder && j.ResourceNumber == folderPath && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing server folder {folderPath}")); } if (string.IsNullOrEmpty(friendlyUrl)) { return(new RServiceResult <bool>(false, $"Friendly url is empty, server folder {folderPath}")); } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.ServerFolder, ResourceNumber = folderPath, FriendlyUrl = friendlyUrl, SrcUrl = srcUrl, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from server folder {job.ResourceNumber}", $"extracted from server folder {job.ResourceNumber}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); meta.Add(tag); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = ""; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); string[] fileNames = Directory.GetFiles(job.ResourceNumber, "*.jpg"); int order = 0; foreach (string fileName in fileNames) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order * 100 / (decimal)fileNames.Length; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } order++; RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order} of {book.NameInEnglish}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; page.Tags = new RTagValue[] { }; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } using (FileStream imageStream = new FileStream(fileName, FileMode.Open)) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, job.SrcUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } } pages.Add(page); } book.Tags = meta.ToArray(); book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from https://curiosity.lib.harvard.edu /// </summary> /// <param name="url">example: https://curiosity.lib.harvard.edu/islamic-heritage-project/catalog/40-990114893240203941</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromHarvard(string url, string friendlyUrl) { try { if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Harvard && j.ResourceNumber == url && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { return(new RServiceResult <bool>(false, $"Friendly url is empty, url = {url}")); } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Harvard, ResourceNumber = url, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string html = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = html; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } int nStartIndex = html.IndexOf("<dt"); while (nStartIndex != -1) { nStartIndex = html.IndexOf(">", nStartIndex); if (nStartIndex == -1) { break; } nStartIndex++; string tagName = html.Substring(nStartIndex, html.IndexOf(":", nStartIndex) - nStartIndex); nStartIndex = html.IndexOf("<dd", nStartIndex); if (nStartIndex == -1) { break; } nStartIndex = html.IndexOf(">", nStartIndex); if (nStartIndex == -1) { break; } nStartIndex++; string tagValues = html.Substring(nStartIndex, html.IndexOf("</dd>", nStartIndex) - nStartIndex); foreach (string tagValuePart in tagValues.Split("<br/>", StringSplitOptions.RemoveEmptyEntries)) { string tagValue = tagValuePart; bool href = false; if (tagValue.IndexOf("<a href=") != -1) { href = true; tagValue = tagValue.Substring(tagValue.IndexOf('>') + 1); tagValue = tagValue.Substring(0, tagValue.IndexOf('<')); } tag = await TagHandler.PrepareAttribute(context, tagName, tagValue, 1); if (href) { tag.ValueSupplement = tagValue; } meta.Add(tag); } nStartIndex = html.IndexOf("<dt", nStartIndex + 1); } tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Harvard University Islamic Heritage Project", 1); tag.ValueSupplement = $"{job.SrcUrl}"; meta.Add(tag); nStartIndex = html.IndexOf("https://pds.lib.harvard.edu/pds/view/"); if (nStartIndex == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Not found https://pds.lib.harvard.edu/pds/view/"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nStartIndex += "https://pds.lib.harvard.edu/pds/view/".Length; string hardvardResourceNumber = html.Substring(nStartIndex, html.IndexOf('\"', nStartIndex) - nStartIndex); List <RArtifactItemRecord> pages = (await _InternalHarvardJsonImport(hardvardResourceNumber, job, friendlyUrl, context, book, meta)).Result; if (pages == null) { return; } book.Tags = meta.ToArray(); book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
private async Task <RServiceResult <List <RArtifactItemRecord> > > _InternalHarvardJsonImport(string hardvardResourceNumber, ImportJob job, string friendlyUrl, RMuseumDbContext context, RArtifactMasterRecord book, List <RTagValue> meta) { List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); using (var client = new HttpClient()) { using (var jsonResult = await client.GetAsync($"https://iiif.lib.harvard.edu/manifests/drs:{hardvardResourceNumber}")) { if (jsonResult.IsSuccessStatusCode) { string json = await jsonResult.Content.ReadAsStringAsync(); var parsed = JObject.Parse(json); book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = parsed.SelectToken("label").Value <string>(); RTagValue tag; tag = await TagHandler.PrepareAttribute(context, "Title", book.Name, 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Contributor Names", "تعیین نشده", 1); meta.Add(tag); List <string> labels = new List <string>(); foreach (JToken structure in parsed.SelectTokens("$.structures[*].label")) { labels.Add(structure.Value <string>()); } int order = 0; var canvases = parsed.SelectToken("sequences").First().SelectToken("canvases").ToArray(); int pageCount = canvases.Length; foreach (JToken canvas in canvases) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = order * 100 / (decimal)pageCount; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string label = canvas.SelectToken("label").Value <string>(); if (labels.Where(l => l.IndexOf(label) != -1).SingleOrDefault() != null) { label = labels.Where(l => l.IndexOf(label) != -1).SingleOrDefault(); } string imageUrl = canvas.SelectTokens("images[*]").First().SelectToken("resource").SelectToken("@id").Value <string>(); RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = label, Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; tag = await TagHandler.PrepareAttribute(context, "Source", "Harvard University Islamic Heritage Project", 1); tag.ValueSupplement = imageUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return(new RServiceResult <List <RArtifactItemRecord> >(null, "failed")); } imageResult.Dispose(); GC.Collect(); } } } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({jsonResult.StatusCode}) for https://iiif.lib.harvard.edu/manifests/drs:{hardvardResourceNumber}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return(new RServiceResult <List <RArtifactItemRecord> >(null, "failed")); } } } return(new RServiceResult <List <RArtifactItemRecord> >(pages)); }
/// <summary> /// import GanjoorPage entity data from MySql /// </summary> /// <returns></returns> public RServiceResult <bool> ImportFromMySql() { try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) //this is long running job, so _context might be already been freed/collected by GC using (RMuseumDbContext contextReport = new RMuseumDbContext(Configuration)) //this is long running job, so _context might be already been freed/collected by GC { LongRunningJobProgressServiceEF jobProgressServiceEF = new LongRunningJobProgressServiceEF(contextReport); var job = (await jobProgressServiceEF.NewJob("GanjoorService:ImportFromMySql", "pre open connection")).Result; if (string.IsNullOrEmpty(Configuration.GetSection("AudioMySqlServer")["ReportedCommentsDatabase"])) { await jobProgressServiceEF.UpdateJob(job.Id, job.Progress, "", false, "ReportedCommentsDatabase is not set"); return; } MusicCatalogueService catalogueService = new MusicCatalogueService(Configuration, context); RServiceResult <bool> musicCatalogueRes = await catalogueService.ImportFromMySql("MusicCatalogueImportFromMySql", jobProgressServiceEF, job); if (!musicCatalogueRes.Result) { return; } try { using (MySqlConnection connection = new MySqlConnection ( $"server={Configuration.GetSection("AudioMySqlServer")["Server"]};uid={Configuration.GetSection("AudioMySqlServer")["Username"]};pwd={Configuration.GetSection("AudioMySqlServer")["Password"]};database={Configuration.GetSection("AudioMySqlServer")["Database"]};charset=utf8;convert zero datetime=True" )) { connection.Open(); using (MySqlDataAdapter src = new MySqlDataAdapter( "SELECT ID, post_author, post_date, post_date_gmt, post_content, post_title, post_category, post_excerpt, post_status, comment_status, ping_status, post_password, post_name, to_ping, pinged, post_modified, post_modified_gmt, post_content_filtered, post_parent, guid, menu_order, post_type, post_mime_type, comment_count, " + "COALESCE((SELECT meta_value FROM ganja_postmeta WHERE post_id = ID AND meta_key='_wp_page_template'), '') AS template," + "(SELECT meta_value FROM ganja_postmeta WHERE post_id = ID AND meta_key='otherpoetid') AS other_poet_id " + "FROM ganja_posts", connection)) { using (DataTable srcData = new DataTable()) { job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 1 - mysql 1")).Result; await src.FillAsync(srcData); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 1 - processing mysql data")).Result; foreach (DataRow row in srcData.Rows) { GanjoorPageType pageType = row["post_type"].ToString() == "post" && row["comment_status"].ToString() != "closed" ? GanjoorPageType.PoemPage : row["template"].ToString() == "comspage.php" ? GanjoorPageType.AllComments : row["template"].ToString() == "relations.php" ? GanjoorPageType.ProsodySimilars : row["template"].ToString() == "vazn.php" ? GanjoorPageType.ProsodyAndStats : GanjoorPageType.None; int?poetId = row["post_author"].ToString() == "1" ? (int?)null : int.Parse(row["post_author"].ToString()); if (poetId == 36)//رشحه { continue; } if (poetId != null) { if (!await context.GanjoorPoets.Where(poet => poet.Id == poetId).AnyAsync()) { continue; } } GanjoorPage page = new GanjoorPage() { Id = int.Parse(row["ID"].ToString()), GanjoorPageType = pageType, Published = true, PageOrder = -1, Title = row["post_title"].ToString(), UrlSlug = row["post_name"].ToString(), HtmlText = row["post_content"].ToString(), ParentId = row["post_parent"].ToString() == "0" ? (int?)null : int.Parse(row["post_parent"].ToString()), PoetId = poetId, SecondPoetId = row["other_poet_id"] == DBNull.Value ? (int?)null : int.Parse(row["other_poet_id"].ToString()), PostDate = (DateTime)row["post_date"] }; if (pageType == GanjoorPageType.PoemPage) { var poem = await context.GanjoorPoems.Where(p => p.Id == page.Id).FirstOrDefaultAsync(); if (poem == null) { continue; } page.PoemId = poem.Id; } if (poetId != null && pageType == GanjoorPageType.None) { GanjoorCat cat = await context.GanjoorCategories.Where(c => c.PoetId == poetId && c.ParentId == null && c.UrlSlug == page.UrlSlug).SingleOrDefaultAsync(); if (cat != null) { page.GanjoorPageType = GanjoorPageType.PoetPage; page.CatId = cat.Id; } else { cat = await context.GanjoorCategories.Where(c => c.PoetId == poetId && c.ParentId != null && c.UrlSlug == page.UrlSlug).SingleOrDefaultAsync(); if (cat != null) { page.GanjoorPageType = GanjoorPageType.CatPage; page.CatId = cat.Id; } } } context.GanjoorPages.Add(page); } } } } job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 1 - finalizing")).Result; await context.SaveChangesAsync(); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 2 - pre fetch data")).Result; var orphanPages = await context.GanjoorPages.Include(p => p.Poem).Where(p => p.FullUrl == null).ToListAsync(); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 2 - post fetch data")).Result; double count = orphanPages.Count; int i = 0; foreach (var page in orphanPages) { job = (await jobProgressServiceEF.UpdateJob(job.Id, i++, "phase 2")).Result; string fullUrl = page.UrlSlug; string fullTitle = page.Title; if (page.GanjoorPageType == GanjoorPageType.PoemPage) { fullTitle = page.Poem.FullTitle; fullUrl = page.Poem.FullUrl; } else { if (page.ParentId != null) { GanjoorPage parent = await context.GanjoorPages.Where(p => p.Id == page.ParentId).SingleAsync(); while (parent != null) { fullUrl = parent.UrlSlug + "/" + fullUrl; fullTitle = parent.Title + " » " + fullTitle; parent = parent.ParentId == null ? null : await context.GanjoorPages.Where(p => p.Id == parent.ParentId).SingleAsync(); } } else { GanjoorCat cat = await context.GanjoorCategories.Where(c => c.PoetId == page.PoetId && c.UrlSlug == page.UrlSlug).SingleOrDefaultAsync(); if (cat != null) { fullUrl = cat.FullUrl; while (cat.ParentId != null) { cat = await context.GanjoorCategories.Where(c => c.Id == cat.ParentId).SingleOrDefaultAsync(); if (cat != null) { fullTitle = cat.Title + " » " + fullTitle; } } } else { cat = await context.GanjoorCategories.Where(c => c.PoetId == page.PoetId && c.ParentId == null).SingleOrDefaultAsync(); if (cat != null) { fullUrl = $"{cat.UrlSlug}/{page.UrlSlug}"; } } } } if (!string.IsNullOrEmpty(fullUrl) && fullUrl.IndexOf('/') != 0) { fullUrl = $"/{fullUrl}"; } page.FullUrl = fullUrl; page.FullTitle = fullTitle; context.Update(page); } job = (await jobProgressServiceEF.UpdateJob(job.Id, job.Progress, "phase 2 - finalizing")).Result; await context.SaveChangesAsync(); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 3 - pre mysql data fetch")).Result; using (MySqlConnection connection = new MySqlConnection ( $"server={Configuration.GetSection("AudioMySqlServer")["Server"]};uid={Configuration.GetSection("AudioMySqlServer")["Username"]};pwd={Configuration.GetSection("AudioMySqlServer")["Password"]};database={Configuration.GetSection("AudioMySqlServer")["Database"]};charset=utf8;convert zero datetime=True" )) { connection.Open(); using (MySqlDataAdapter src = new MySqlDataAdapter( "SELECT meta_key, post_id, meta_value FROM ganja_postmeta WHERE meta_key IN ( 'vazn', 'ravi', 'src', 'srcslug', 'oldtag' )", connection)) { job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 3 - mysql 2")).Result; using (DataTable srcData = new DataTable()) { await src.FillAsync(srcData); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 3 - processing meta data")).Result; int r = 0; foreach (DataRow row in srcData.Rows) { job = (await jobProgressServiceEF.UpdateJob(job.Id, r++, "phase 3 - processing meta data")).Result; int poemId = int.Parse(row["post_id"].ToString()); var poem = await context.GanjoorPoems.Where(p => p.Id == poemId).FirstOrDefaultAsync(); if (poem == null) { continue; } string metaKey = row["meta_key"].ToString(); string metaValue = row["meta_value"].ToString(); switch (metaKey) { case "vazn": { GanjoorMetre metre = await context.GanjoorMetres.Where(m => m.Rhythm == metaValue).SingleOrDefaultAsync(); if (metre == null) { metre = new GanjoorMetre() { Rhythm = metaValue, VerseCount = 0 }; context.GanjoorMetres.Add(metre); await context.SaveChangesAsync(); } poem.GanjoorMetreId = metre.Id; } break; case "ravi": poem.RhymeLetters = metaValue; break; case "src": poem.SourceName = metaValue; break; case "srcslug": poem.SourceUrlSlug = metaValue; break; case "oldtag": poem.OldTag = metaValue; switch (poem.OldTag) { case "بدایع": poem.OldTagPageUrl = "/saadi/badaye"; break; case "خواتیم": poem.OldTagPageUrl = "/saadi/khavatim"; break; case "طیبات": poem.OldTagPageUrl = "/saadi/tayyebat"; break; case "غزلیات قدیم": poem.OldTagPageUrl = "/saadi/ghazaliyat-e-ghadim"; break; case "ملمعات": poem.OldTagPageUrl = "/saadi/molammaat"; break; } break; } context.GanjoorPoems.Update(poem); } } } } job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 3 - finalizing meta data")).Result; await context.SaveChangesAsync(); var resApprovedPoemSongs = await _ImportPoemSongsDataFromMySql("_ImportPoemSongsDataFromMySql", context, jobProgressServiceEF, job, true); if (!resApprovedPoemSongs.Result) { return; } var resPendingPoemSongs = await _ImportPoemSongsDataFromMySql("_ImportPoemSongsDataFromMySql", context, jobProgressServiceEF, job, false); if (!resPendingPoemSongs.Result) { return; } using (MySqlConnection connection = new MySqlConnection ( $"server={Configuration.GetSection("AudioMySqlServer")["Server"]};uid={Configuration.GetSection("AudioMySqlServer")["Username"]};pwd={Configuration.GetSection("AudioMySqlServer")["Password"]};database={Configuration.GetSection("AudioMySqlServer")["Database"]};charset=utf8;convert zero datetime=True" )) { connection.Open(); using (MySqlDataAdapter src = new MySqlDataAdapter( "SELECT poem_id, mimage_id FROM ganja_mimages", connection)) { job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase N - mysql N")).Result; using (DataTable srcData = new DataTable()) { await src.FillAsync(srcData); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase N - processing meta data")).Result; int r = 0; foreach (DataRow row in srcData.Rows) { job = (await jobProgressServiceEF.UpdateJob(job.Id, r++, "phase N - processing meta data")).Result; int poemId = int.Parse(row["poem_id"].ToString()); Guid imageId = Guid.Parse(row["mimage_id"].ToString()); var link = await context.GanjoorLinks.Include(l => l.Item).ThenInclude(i => i.Images). Where(l => l.GanjoorPostId == poemId && l.Item.Images.First().Id == imageId) .FirstOrDefaultAsync(); if (link != null) { link.DisplayOnPage = true; context.GanjoorLinks.Update(link); } } } } } job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase N - finalizing meta data")).Result; await context.SaveChangesAsync(); } catch (Exception jobExp) { await jobProgressServiceEF.UpdateJob(job.Id, job.Progress, "", false, jobExp.ToString()); } var resComments = await _ImportCommentsDataFromMySql("_ImportCommentsDataFromMySql", context, jobProgressServiceEF, job); if (!resComments.Result) { return; } await jobProgressServiceEF.UpdateJob(job.Id, 100, "Finished", true); } }); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// import from https://viewer.cbl.ie /// </summary> /// <param name="resourceNumber">119</param> /// <param name="friendlyUrl">golestan-baysonghori</param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromChesterBeatty(string resourceNumber, string friendlyUrl) { try { string srcUrl = $"https://viewer.cbl.ie/viewer/object/Per_{resourceNumber}/1/"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.ChesterBeatty && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {srcUrl}")); } if (string.IsNullOrEmpty(friendlyUrl)) { return(new RServiceResult <bool>(false, $"Friendly url is empty, server folder {srcUrl}")); } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.ChesterBeatty, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = srcUrl, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from url {job.ResourceNumber}", $"extracted from url {job.ResourceNumber}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl, }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Chester Beatty Digital Collections", 1); tag.ValueSupplement = srcUrl; meta.Add(tag); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = ""; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; using (var client = new HttpClient()) do { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order} of {book.NameInEnglish}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; string imageUrl = $"https://viewer.cbl.ie/viewer/rest/image/Per_{resourceNumber}/Per{resourceNumber}_{$"{order}".PadLeft(3, '0')}.jpg/full/!10000,10000/0/default.jpg?ignoreWatermark=true"; page.Tags = new RTagValue[] { }; bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } tag = await TagHandler.PrepareAttribute(context, "Source", "Chester Beatty Digital Collections", 1); tag.ValueSupplement = $"https://viewer.cbl.ie/viewer/object/Per_{resourceNumber}/{$"{order}".PadLeft(3, '0')}/";; page.Tags = new RTagValue[] { tag }; pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); if (imageResult.StatusCode == HttpStatusCode.Forbidden || imageResult.StatusCode == HttpStatusCode.NotFound) { break; } int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } tag = await TagHandler.PrepareAttribute(context, "Source", "Chester Beatty Digital Collections", 1); tag.ValueSupplement = $"https://viewer.cbl.ie/viewer/object/Per_{resourceNumber}/{$"{order}".PadLeft(3, '0')}/"; page.Tags = new RTagValue[] { tag }; pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } pages.Add(page); }while (true); book.Tags = meta.ToArray(); book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// start updating stats page /// </summary> /// <returns></returns> public RServiceResult <bool> StartUpdatingStatsPage(Guid editingUserId) { try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) //this is long running job, so _context might be already been freed/collected by GC { LongRunningJobProgressServiceEF jobProgressServiceEF = new LongRunningJobProgressServiceEF(context); var job = (await jobProgressServiceEF.NewJob("UpdateStatsPage", "Total Poets Stats")).Result; try { var poetsCoupletCounts = await context.GanjoorVerses.Include(v => v.Poem).ThenInclude(p => p.Cat).ThenInclude(c => c.Poet).AsNoTracking() .Where(v => v.Poem.Cat.Poet.Published && (v.VersePosition == VersePosition.Right || v.VersePosition == VersePosition.CenteredVerse1)) .GroupBy(v => new { v.Poem.Cat.PoetId }) .Select(g => new { PoetId = g.Key.PoetId, Count = g.Count() }) .ToListAsync(); poetsCoupletCounts.Sort((a, b) => b.Count - a.Count); var sumPoetsCouplets = poetsCoupletCounts.Sum(c => c.Count); var rhythmsCoupletCounts = await context.GanjoorVerses.Include(v => v.Poem).ThenInclude(p => p.Cat).ThenInclude(c => c.Poet).AsNoTracking() .Where(v => v.Poem.Cat.Poet.Published && (v.VersePosition == VersePosition.Right || v.VersePosition == VersePosition.CenteredVerse1)) .GroupBy(v => new { v.Poem.GanjoorMetreId }) .Select(g => new { GanjoorMetreId = g.Key.GanjoorMetreId, Count = g.Count() }) .ToListAsync(); rhythmsCoupletCounts.Sort((a, b) => b.Count - a.Count); var sumRhythmsCouplets = rhythmsCoupletCounts.Sum(c => c.Count); var dbPage = await context.GanjoorPages.Where(p => p.FullUrl == "/vazn").SingleAsync(); var poets = await context.GanjoorPoets.ToListAsync(); string htmlText = $"<p>تا تاریخ {LanguageUtils.FormatDate(DateTime.Now)} مجموعاً {LanguageUtils.FormatMoney(sumPoetsCouplets)} بیت شعر از طریق سایت گنجور در دسترس قرار گرفته است. در جدول زیر که شاعران در آنها بر اساس تعداد ابیات اشعارشان به صورت نزولی مرتب شدهاند با کلیک بر روی نام هر شاعر میتوانید آمار اوزان اشعار او را مشاهده کنید.</p>{Environment.NewLine}"; htmlText += $"<p>توجه فرمایید که این آمار به دلایلی از قبیل وجود چند نسخه از آثار شعرا در سایت (مثل آثار خیام یا وجود متن خلاصه و کامل هفت اورنگ جامی) و همینطور یک بیت محسوب شدن مصرعهای بند قالبهای ترکیبی مثل مخمسها تقریبی و حدودی است و افزونگی دارد.</p>{Environment.NewLine}"; htmlText += $"<table>{Environment.NewLine}" + $"<tr class=\"h\">{Environment.NewLine}" + $"<td class=\"c1\">ردیف</td>{Environment.NewLine}" + $"<td class=\"c2\">شاعر</td>{Environment.NewLine}" + $"<td class=\"c3\">تعداد ابیات</td>{Environment.NewLine}" + $"<td class=\"c4\">درصد از کل</td>{Environment.NewLine}" + $"</tr>{Environment.NewLine}"; for (int i = 0; i < poetsCoupletCounts.Count; i++) { if (i % 2 == 0) { htmlText += $"<tr class=\"e\">{Environment.NewLine}"; } else { htmlText += $"<tr>{Environment.NewLine}"; } htmlText += $"<td class=\"c1\">{(i + 1).ToPersianNumbers()}</td>{Environment.NewLine}"; htmlText += $"<td class=\"c2\"><a href=\"{(await context.GanjoorCategories.Where(c => c.ParentId == null && c.PoetId == poetsCoupletCounts[i].PoetId).SingleAsync()).FullUrl}/vazn\">{poets.Where(p => p.Id == poetsCoupletCounts[i].PoetId).Single().Nickname}</a></td>{Environment.NewLine}"; htmlText += $"<td class=\"c3\">{LanguageUtils.FormatMoney(poetsCoupletCounts[i].Count)}</td>{Environment.NewLine}"; htmlText += $"<td class=\"c4\">{(poetsCoupletCounts[i].Count * 100.0 / sumPoetsCouplets).ToString("N2", new CultureInfo("fa-IR")).ToPersianNumbers()}</td>{Environment.NewLine}"; htmlText += $"</tr>{Environment.NewLine}"; } htmlText += $"</table>{Environment.NewLine}"; var rhythms = await context.GanjoorMetres.ToListAsync(); htmlText += $"<p>فهرست زیر نیز آمار اشعار گنجور را از لحاظ اوزان عروضی نشان میدهد:</p>{Environment.NewLine}"; htmlText += $"<table>{Environment.NewLine}" + $"<tr class=\"h\">{Environment.NewLine}" + $"<td class=\"c1\">ردیف</td>{Environment.NewLine}" + $"<td class=\"c2\">وزن</td>{Environment.NewLine}" + $"<td class=\"c3\">تعداد ابیات</td>{Environment.NewLine}" + $"<td class=\"c4\">درصد از کل</td>{Environment.NewLine}" + $"</tr>{Environment.NewLine}"; for (int i = 0; i < rhythmsCoupletCounts.Count; i++) { if (i % 2 == 0) { htmlText += $"<tr class=\"e\">{Environment.NewLine}"; } else { htmlText += $"<tr>{Environment.NewLine}"; } htmlText += $"<td class=\"c1\">{(i + 1).ToPersianNumbers()}</td>{Environment.NewLine}"; var rhythm = rhythms.Where(r => r.Id == rhythmsCoupletCounts[i].GanjoorMetreId).SingleOrDefault(); if (rhythm != null) { rhythm.VerseCount = rhythmsCoupletCounts[i].Count; context.Update(rhythm); } string rhythmName = rhythmsCoupletCounts[i].GanjoorMetreId == null ? "وزنیابی نشده" : $"<a href=\"/vazn/?v={Uri.EscapeDataString(rhythm.Rhythm)}\">{rhythms.Where(r => r.Id == rhythmsCoupletCounts[i].GanjoorMetreId).Single().Rhythm}</a>"; htmlText += $"<td class=\"c2\">{rhythmName}</td>{Environment.NewLine}"; htmlText += $"<td class=\"c3\">{LanguageUtils.FormatMoney(rhythmsCoupletCounts[i].Count)}</td>{Environment.NewLine}"; htmlText += $"<td class=\"c4\">{(rhythmsCoupletCounts[i].Count * 100.0 / sumRhythmsCouplets).ToString("N2", new CultureInfo("fa-IR")).ToPersianNumbers()}</td>{Environment.NewLine}"; htmlText += $"</tr>{Environment.NewLine}"; } htmlText += $"</table>{Environment.NewLine}"; await context.SaveChangesAsync(); //store rhythm[s].VerseCount await _UpdatePageHtmlText(context, editingUserId, dbPage, "به روزرسانی خودکار صفحهٔ آمار وزنها", htmlText); foreach (var poetInfo in poetsCoupletCounts) { var poet = poets.Where(p => p.Id == poetInfo.PoetId).Single(); await jobProgressServiceEF.UpdateJob(job.Id, poetInfo.PoetId, poet.Nickname); await _UpdatePoetStatsPage(editingUserId, poet, rhythms, context); } await jobProgressServiceEF.UpdateJob(job.Id, 100, "", true); } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, 100, "", false, exp.ToString()); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// start removing original images /// </summary> /// <returns></returns> public RServiceResult <bool> StartRemovingOriginalImages() { try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) //this is long running job, so _context might be already been freed/collected by GC { LongRunningJobProgressServiceEF jobProgressServiceEF = new LongRunningJobProgressServiceEF(context); var job = (await jobProgressServiceEF.NewJob("RemovingOriginalImages", "Removing")).Result; try { var srcPath = Configuration.GetSection("PictureFileService")["StoragePath"]; var trashPath = Configuration.GetSection("PictureFileService")["TrashStoragePath"]; var images = await context.PictureFiles.Where(p => p.StoredFileName != null && p.SrcUrl != null && p.NormalSizeImageStoredFileName.IndexOf("orig") != 0).ToListAsync(); int progress = 0; for (int i = 0; i < images.Count; i++) { var image = images[i]; string targetDir = Path.Combine(trashPath, image.FolderName); if (!Directory.Exists(targetDir)) { Directory.CreateDirectory(targetDir); Directory.CreateDirectory(Path.Combine(targetDir, "orig")); } string srcFileName = Path.Combine(Path.Combine(srcPath, image.FolderName), image.StoredFileName); if (File.Exists(srcFileName)) { string targetFileName = Path.Combine(Path.Combine(trashPath, image.FolderName), image.StoredFileName); File.Move(srcFileName, targetFileName, true); image.StoredFileName = null; context.Update(image); if ((i * 100 / images.Count) > progress) { progress = i * 100 / images.Count; await jobProgressServiceEF.UpdateJob(job.Id, progress); } } } await jobProgressServiceEF.UpdateJob(job.Id, 100, "", true); } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, 100, "", false, exp.ToString()); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// import from http://www.qajarwomen.org /// </summary> /// <param name="hardvardResourceNumber">43117279</param> /// <param name="friendlyUrl">atame</param> /// <param name="srcUrl">http://www.qajarwomen.org/fa/items/1018A10.html</param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromHarvardDirectly(string hardvardResourceNumber, string friendlyUrl, string srcUrl) { try { if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.HarvardDirect && j.ResourceNumber == hardvardResourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing harvard direct resource number {hardvardResourceNumber}")); } if (string.IsNullOrEmpty(friendlyUrl)) { return(new RServiceResult <bool>(false, $"Friendly url is empty, harvard direct resource number {hardvardResourceNumber}")); } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.HarvardDirect, ResourceNumber = hardvardResourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = srcUrl, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from harvard resource number {job.ResourceNumber}", $"extracted from harvard resource number {job.ResourceNumber}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; tag = await TagHandler.PrepareAttribute(context, "Notes", "وارد شده از سایت دنیای زنان در عصر قاجار", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "دنیای زنان در عصر قاجار", 1); tag.ValueSupplement = $"{job.SrcUrl}"; meta.Add(tag); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = $"https://iiif.lib.harvard.edu/manifests/drs:{hardvardResourceNumber}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } List <RArtifactItemRecord> pages = (await _InternalHarvardJsonImport(hardvardResourceNumber, job, friendlyUrl, context, book, meta)).Result; if (pages == null) { return; } book.Tags = meta.ToArray(); book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from http://www.bl.uk /// </summary> /// <param name="resourceNumber">grenville_xli_f001r</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromBritishLibrary(string resourceNumber, string friendlyUrl) { string url = $"http://www.bl.uk/manuscripts/Viewer.aspx?ref={resourceNumber}"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.BritishLibrary && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated artifact friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.BritishLibrary, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { client.Timeout = TimeSpan.FromMinutes(5); using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { if ( (await context.Artifacts.Where(a => a.FriendlyUrl == job.FriendlyUrl).SingleOrDefaultAsync()) != null ) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "aborted because of duplicated friendly url"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string html = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = html; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } int nIdxStart = html.IndexOf("PageList"); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "PageList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart = html.IndexOf("value=\"", nIdxStart); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "value after PageList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart += "value=\"".Length; string strPageList = html.Substring(nIdxStart, html.IndexOf('"', nIdxStart) - nIdxStart); nIdxStart = html.IndexOf("TextList"); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "TextList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart = html.IndexOf("value=\"", nIdxStart); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "value after TextList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart += "value=\"".Length; string strTextList = html.Substring(nIdxStart, html.IndexOf('"', nIdxStart) - nIdxStart); nIdxStart = html.IndexOf("TitleList"); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "TitleList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart = html.IndexOf("value=\"", nIdxStart); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "value after TitleList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart += "value=\"".Length; string strTitleList = html.Substring(nIdxStart, html.IndexOf('"', nIdxStart) - nIdxStart); string[] PageUrls = strPageList.Split("||", StringSplitOptions.None); string[] PageTexts = strTextList.Split("||", StringSplitOptions.None); string[] PageTitles = strTitleList.Split("||", StringSplitOptions.None); if (PageUrls.Length != PageTexts.Length || PageTexts.Length != PageTitles.Length) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "PageUrls.Length != PageTexts.Length || PageTexts.Length != PageTitles.Length"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } tag = await TagHandler.PrepareAttribute(context, "Title", "Untitled", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Contributor Names", "Unknown", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); book.Tags = meta.ToArray(); tag = await TagHandler.PrepareAttribute(context, "Source", "British Library", 1); string viewerUrl = $"http://www.bl.uk/manuscripts/FullDisplay.aspx?ref={resourceNumber.Substring(0, resourceNumber.LastIndexOf('_'))}"; tag.ValueSupplement = viewerUrl; meta.Add(tag); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; for (int i = 0; i < PageUrls.Length; i++) { if (PageUrls[i] == "##") { continue; } order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; List <RTagValue> pageTags = new List <RTagValue>(); tag = await TagHandler.PrepareAttribute(context, "Source", "British Library", 1); tag.ValueSupplement = $"http://www.bl.uk/manuscripts/Viewer.aspx?ref={PageUrls[i]}"; pageTags.Add(tag); if (!string.IsNullOrEmpty(PageTitles[i])) { RTagValue toc = await TagHandler.PrepareAttribute(context, "Title in TOC", PageTitles[i], 1); toc.ValueSupplement = "1"; //font size pageTags.Add(toc); } if (!string.IsNullOrEmpty(PageTexts[i])) { tag = await TagHandler.PrepareAttribute(context, "Label", PageTexts[i], 1); pageTags.Add(tag); } page.Tags = pageTags.ToArray(); bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, viewerUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } /* * failed multithread attempt: * * BLTileMixer mixer = new BLTileMixer(); * RServiceResult<Stream> blResult = await mixer.DownloadMix(PageUrls[i], order); */ Dictionary <(int x, int y), Image> tiles = new Dictionary <(int x, int y), Image>(); int max_x = -1; for (int x = 0; ; x++) { string imageUrl = $"http://www.bl.uk/manuscripts/Proxy.ashx?view={PageUrls[i]}_files/13/{x}_0.jpg"; var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { imageStream.Position = 0; try { Image tile = Image.FromStream(imageStream); tiles.Add((x, 0), tile); max_x = x; } catch (Exception aexp) { if (aexp is ArgumentException) { break; } throw aexp; } } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } } int max_y = -1; for (int y = 1; ; y++) { string imageUrl = $"http://www.bl.uk/manuscripts/Proxy.ashx?view={PageUrls[i]}_files/13/0_{y}.jpg"; var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { if (imageStream.Length <= 248) { break; } imageStream.Position = 0; try { Image tile = Image.FromStream(imageStream); tiles.Add((0, y), tile); max_y = y; } catch (Exception aexp) { if (aexp is ArgumentException) { break; } throw aexp; } } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } } for (int x = 0; x <= max_x; x++) { for (int y = 0; y <= max_y; y++) { if (tiles.TryGetValue((x, y), out Image tmp) == false) { string imageUrl = $"http://www.bl.uk/manuscripts/Proxy.ashx?view={PageUrls[i]}_files/13/{x}_{y}.jpg"; var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { if (imageStream.Length == 0) { break; } imageStream.Position = 0; tiles.Add((x, y), Image.FromStream(imageStream)); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } } } } int tileWidth = tiles[(0, 0)].Width; int tileHeight = tiles[(0, 0)].Height;
/// <summary> /// from http://pudl.princeton.edu/ /// </summary> /// <param name="resourceNumber">dj52w476m</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromPrinceton(string resourceNumber, string friendlyUrl) { string url = $"http://pudl.princeton.edu/mdCompiler2.php?obj={resourceNumber}"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Princeton && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Princeton, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string xml = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = xml; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } XElement elObject = XDocument.Parse(xml).Root; foreach (var prop in elObject.Element("dmd").Element("properties").Elements("property")) { if (prop.Element("label") == null) { continue; } string label = prop.Element("label").Value.Replace(":", ""); int order = 1; foreach (var value in prop.Elements("valueGrp").Elements("value")) { tag = await TagHandler.PrepareAttribute(context, label, value.Value, order); if (value.Attribute("href") != null) { if (value.Attribute("href").Value.IndexOf("http://localhost") != 0) { tag.ValueSupplement = value.Attribute("href").Value; } } meta.Add(tag); if (label == "Title") { book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = value.Value; } order++; } } tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Princeton Digital Library of Islamic Manuscripts", 1); tag.ValueSupplement = $"http://pudl.princeton.edu/objects/{job.ResourceNumber}"; meta.Add(tag); book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); foreach (var structure in elObject.Elements("structure")) { if (structure.Attribute("type") != null && structure.Attribute("type").Value == "RelatedObjects") { if (structure.Element("div") == null || structure.Element("div").Element("OrderedList") == null) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "structure[RelatedObjects].div.OrderedList is null"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); return; } } int pageCount = structure.Element("div").Element("OrderedList").Elements("div").Count(); int inlineOrder = 0; foreach (var div in structure.Element("div").Element("OrderedList").Elements("div")) { inlineOrder++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = inlineOrder * 100 / (decimal)pageCount; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } int order = int.Parse(div.Attribute("order").Value); RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = div.Attribute("label").Value, Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; string imageUrl = div.Attribute("img").Value; imageUrl = "https://libimages.princeton.edu/loris/" + imageUrl.Substring(imageUrl.LastIndexOf(":") + 1); imageUrl += $"/full/,{div.Attribute("h").Value}/0/default.jpg"; tag = await TagHandler.PrepareAttribute(context, "Source", "Princeton Digital Library of Islamic Manuscripts", 1); tag.ValueSupplement = imageUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } } } foreach (var structure in elObject.Elements("structure")) { if (structure.Attribute("type") != null && structure.Attribute("type").Value == "Physical") { if (structure.Element("RTLBoundManuscript") != null) { foreach (var leaf in structure.Element("RTLBoundManuscript").Elements("Leaf")) { foreach (var side in leaf.Elements("Side")) { int pageOrder = int.Parse(side.Attribute("order").Value); tag = await TagHandler.PrepareAttribute(context, "Leaf Side", side.Attribute("label").Value, 100); RArtifactItemRecord page = pages.Where(p => p.Order == pageOrder).SingleOrDefault(); if (page != null) { List <RTagValue> tags = new List <RTagValue>(page.Tags); tags.Add(tag); page.Tags = tags; } } } foreach (var folio in structure.Element("RTLBoundManuscript").Elements("Folio")) { foreach (var side in folio.Elements("Side")) { int pageOrder = int.Parse(side.Attribute("order").Value); tag = await TagHandler.PrepareAttribute(context, "Folio Side", folio.Attribute("label").Value + ":" + side.Attribute("label").Value, 101); RArtifactItemRecord page = pages.Where(p => p.Order == pageOrder).SingleOrDefault(); if (page != null) { List <RTagValue> tags = new List <RTagValue>(page.Tags); tags.Add(tag); page.Tags = tags; } } } } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "ages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from http://www.library.upenn.edu/ /// </summary> /// <param name="resourceNumber">MEDREN_9949222153503681</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromPenLibraries(string resourceNumber, string friendlyUrl) { string url = $"http://dla.library.upenn.edu/dla/medren/pageturn.html?id={resourceNumber}&rotation=0&size=0"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.PennLibraries && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated artifact friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.PennLibraries, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { client.Timeout = TimeSpan.FromMinutes(5); using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { if ( (await context.Artifacts.Where(a => a.FriendlyUrl == job.FriendlyUrl).SingleOrDefaultAsync()) != null ) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "aborted because of duplicated friendly url"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string html = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = html; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string title = ""; string author = ""; int tagOrder = 1; int nIdxStart = html.IndexOf("https://repo.library.upenn.edu/djatoka/resolver?"); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "https://repo.library.upenn.edu/djatoka/resolver? not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } string firstImageUrl = html.Substring(nIdxStart, html.IndexOf('"', nIdxStart) - nIdxStart).Replace("&", "&"); nIdxStart = html.IndexOf("recordinfolabel"); while (nIdxStart != -1) { nIdxStart += "recordinfolabel\">".Length; int nIdxEnd = html.IndexOf(":", nIdxStart); string recordinfolabel = html.Substring(nIdxStart, nIdxEnd - nIdxStart); nIdxStart = html.IndexOf("recordinfotext", nIdxEnd); nIdxStart += "recordinfotext\">".Length; nIdxEnd = html.IndexOf("</td>", nIdxStart); string recordinfotext = html.Substring(nIdxStart, nIdxEnd - nIdxStart).Replace("</div>", "<div>").Replace("\n", "").Replace("\r", "").Trim(); string[] values = recordinfotext.Split("<div>", StringSplitOptions.RemoveEmptyEntries); foreach (string value in values) { if (value.Trim().Length == 0) { continue; } if (recordinfolabel == "Title") { title = value.Trim(); tag = await TagHandler.PrepareAttribute(context, "Title", title, 1); meta.Add(tag); } else if (recordinfolabel == "Author") { author = value.Trim(); tag = await TagHandler.PrepareAttribute(context, "Contributor Names", author, 1); meta.Add(tag); } else { tag = await TagHandler.PrepareAttribute(context, recordinfolabel, value.Trim(), tagOrder++); meta.Add(tag); } } nIdxStart = html.IndexOf("recordinfolabel", nIdxEnd); } tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Penn Libraries", 1); string viewerUrl = $"http://dla.library.upenn.edu/dla/medren/detail.html?id={resourceNumber}"; tag.ValueSupplement = viewerUrl; meta.Add(tag); book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = title; book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; while (true) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string imageUrl = firstImageUrl; RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; tag = await TagHandler.PrepareAttribute(context, "Source", "Penn Libraries", 1); tag.ValueSupplement = viewerUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if (order > 1) { string pageUrl = $"http://dla.library.upenn.edu/dla/medren/pageturn.html?id={resourceNumber}&doubleside=0&rotation=0&size=0¤tpage={order}"; var pageResult = await client.GetAsync(pageUrl); if (pageResult.StatusCode == HttpStatusCode.NotFound) { break; //finished } string pageHtml = await pageResult.Content.ReadAsStringAsync(); nIdxStart = pageHtml.IndexOf("https://repo.library.upenn.edu/djatoka/resolver?"); if (nIdxStart == -1) { if (order > 1) { break; //finished } using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"https://repo.library.upenn.edu/djatoka/resolver? not found on page {order}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } imageUrl = pageHtml.Substring(nIdxStart, pageHtml.IndexOf('"', nIdxStart) - nIdxStart).Replace("&", "&"); } var imageResult = await client.GetAsync(imageUrl); if (imageResult.StatusCode == HttpStatusCode.NotFound) { break; //finished } int _ImportRetryCount = 200; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from http://www.thedigitalwalters.org/01_ACCESS_WALTERS_MANUSCRIPTS.html /// </summary> /// <param name="resourceNumber">W619</param> /// <param name="friendlyUrl">golestan-walters-01</param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromWalters(string resourceNumber, string friendlyUrl) { string url = $"http://www.thedigitalwalters.org/Data/WaltersManuscripts/ManuscriptDescriptions/{resourceNumber}_tei.xml"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Walters && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Walters, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string xml = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = xml; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } XElement elObject = XDocument.Parse(xml).Root; tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); try { foreach (var prop in elObject .Elements("{http://www.tei-c.org/ns/1.0}teiHeader").First() .Elements("{http://www.tei-c.org/ns/1.0}fileDesc").First() .Elements("{http://www.tei-c.org/ns/1.0}titleStmt").First() .Elements("{http://www.tei-c.org/ns/1.0}title")) { string label = prop.Value; book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = label; tag = await TagHandler.PrepareAttribute(context, "Title", label, 1); meta.Add(tag); break; } } catch { //ignore non-existing = null tags } try { foreach (var prop in elObject .Elements("{http://www.tei-c.org/ns/1.0}teiHeader").First() .Elements("{http://www.tei-c.org/ns/1.0}fileDesc").First() .Elements("{http://www.tei-c.org/ns/1.0}titleStmt").First() .Elements("{http://www.tei-c.org/ns/1.0}author")) { string label = prop.Value; tag = await TagHandler.PrepareAttribute(context, "Contributor Names", label, 1); meta.Add(tag); break; } } catch { //ignore non-existing = null tags } try { foreach (var prop in elObject .Elements("{http://www.tei-c.org/ns/1.0}teiHeader").First() .Elements("{http://www.tei-c.org/ns/1.0}fileDesc").First() .Elements("{http://www.tei-c.org/ns/1.0}titleStmt").First() .Elements("{http://www.tei-c.org/ns/1.0}respStmt")) { string label = prop.Elements("{http://www.tei-c.org/ns/1.0}name").First().Value; tag = await TagHandler.PrepareAttribute(context, "Contributor Names", label, 1); meta.Add(tag); break; } } catch { //ignore non-existing = null tags } try { foreach (var prop in elObject .Elements("{http://www.tei-c.org/ns/1.0}teiHeader").First() .Elements("{http://www.tei-c.org/ns/1.0}fileDesc").First() .Elements("{http://www.tei-c.org/ns/1.0}notesStmt").First() .Elements("{http://www.tei-c.org/ns/1.0}note")) { string label = prop.Value; tag = await TagHandler.PrepareAttribute(context, "Notes", label, 1); meta.Add(tag); } } catch { //ignore non-existing = null tags } tag = await TagHandler.PrepareAttribute(context, "Source", "Digitized Walters Manuscripts", 1); tag.ValueSupplement = $"http://www.thedigitalwalters.org/Data/WaltersManuscripts/html/{job.ResourceNumber}/"; meta.Add(tag); book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; foreach (var surface in elObject .Elements("{http://www.tei-c.org/ns/1.0}facsimile").First() .Elements("{http://www.tei-c.org/ns/1.0}surface")) { foreach (var graphic in surface.Elements("{http://www.tei-c.org/ns/1.0}graphic")) { if (graphic.Attribute("url").Value.Contains("sap.jpg")) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; string imageUrl = $"http://www.thedigitalwalters.org/Data/WaltersManuscripts/{resourceNumber}/data/W.{resourceNumber.Substring(1)}/{graphic.Attribute("url").Value}"; tag = await TagHandler.PrepareAttribute(context, "Source", "Digitized Walters Manuscripts", 1); tag.ValueSupplement = imageUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from https://catalog.hathitrust.org /// </summary> /// <param name="resourceNumber">006814127</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromHathiTrust(string resourceNumber, string friendlyUrl) { string url = $"https://catalog.hathitrust.org/Record/{resourceNumber}.xml"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.HathiTrust && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated artifact friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.HathiTrust, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { if ( (await context.Artifacts.Where(a => a.FriendlyUrl == job.FriendlyUrl).SingleOrDefaultAsync()) != null ) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "aborted because of duplicated friendly url"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string xml = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = xml; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string title = ""; string author = ""; string pdfResourceNumber = ""; int tagOrder = 1; XElement elObject = XDocument.Parse(xml).Root; foreach (var datafield in elObject.Element("record").Elements("datafield")) { tagOrder++; if (datafield.Attribute("tag") == null) { continue; } string hathiTrustTag = datafield.Attribute("tag").Value; switch (hathiTrustTag) { case "245": case "246": foreach (var subfield in datafield.Elements("subfield")) { if (subfield.Attribute("code") != null) { if (subfield.Attribute("code").Value == "a" || subfield.Attribute("code").Value == "f") { title = (title + " " + subfield.Value).Trim(); } } } break; case "100": foreach (var subfield in datafield.Elements("subfield")) { if (subfield.Attribute("code") != null) { if (subfield.Attribute("code").Value == "a" || subfield.Attribute("code").Value == "d") { author = (author + " " + subfield.Value).Trim(); } } } break; case "HOL": foreach (var subfield in datafield.Elements("subfield")) { if (subfield.Attribute("code") != null) { if (subfield.Attribute("code").Value == "p") { pdfResourceNumber = subfield.Value; } } } break; default: { if (int.TryParse(hathiTrustTag, out int tmp)) { if (tmp >= 100 && tmp <= 900) { string note = ""; foreach (var subfield in datafield.Elements("subfield")) { if (subfield.Attribute("code") != null) { note = (note + " " + subfield.Value).Trim(); } } tag = await TagHandler.PrepareAttribute(context, "Notes", note, tagOrder); meta.Add(tag); } } } break; } } if (string.IsNullOrEmpty(pdfResourceNumber)) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "pdfResourceNumber not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } tag = await TagHandler.PrepareAttribute(context, "Title", title, 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Contributor Names", author, 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "HathiTrust Digital Library", 1); string viewerUrl = $"https://babel.hathitrust.org/cgi/pt?id={pdfResourceNumber}"; tag.ValueSupplement = viewerUrl; meta.Add(tag); book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = title; book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); string lastMD5hash = ""; int order = 0; while (true) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string imageUrl = $"https://babel.hathitrust.org/cgi/imgsrv/image?id={pdfResourceNumber};seq={order};size=1000;rotation=0"; RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; tag = await TagHandler.PrepareAttribute(context, "Source", "HathiTrust Digital Library", 1); tag.ValueSupplement = viewerUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 200; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } bool lastPage = false; using (var md5 = MD5.Create()) { string md5hash = string.Join("", md5.ComputeHash(File.ReadAllBytes(Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"))).Select(x => x.ToString("X2"))); if (md5hash == lastMD5hash) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); lastPage = true; } lastMD5hash = md5hash; } if (!lastPage) { page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } else { break; } } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "ages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from http://cudl.lib.cam.ac.uk /// </summary> /// <param name="resourceNumber">MS-RAS-00258</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromCambridge(string resourceNumber, string friendlyUrl) { string url = $"http://cudl.lib.cam.ac.uk/view/{resourceNumber}.json"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Cambridge && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated artifact friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Cambridge, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { if ( (await context.Artifacts.Where(a => a.FriendlyUrl == job.FriendlyUrl).SingleOrDefaultAsync()) != null ) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "aborted because of duplicated friendly url"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string json = await result.Content.ReadAsStringAsync(); var parsed = JObject.Parse(json); book.Name = book.NameInEnglish = parsed.SelectToken("logicalStructures[*].label").Value <string>(); book.Description = book.DescriptionInEnglish = Regex.Replace( parsed.SelectToken("descriptiveMetadata[*].abstract.displayForm").Value <string>(), "<.*?>", string.Empty); int tagOrder = 1; foreach (JToken descriptiveMetadata in parsed.SelectTokens("$.descriptiveMetadata[*]").Children()) { foreach (JToken child in descriptiveMetadata.Children()) { if (child.SelectToken("label") != null && child.SelectToken("display") != null) { if (child.SelectToken("display").Value <string>() == "True") { string metaName = child.SelectToken("label").Value <string>(); string metaValue = ""; if (child.SelectToken("displayForm") != null) { metaValue = Regex.Replace( child.SelectToken("displayForm").Value <string>(), "<.*?>", string.Empty); tag = await TagHandler.PrepareAttribute(context, metaName, metaValue, tagOrder++); meta.Add(tag); } else if (child.SelectToken("value") != null) { foreach (JToken value in child.SelectTokens("value").Children()) { if (value.SelectToken("displayForm") != null) { metaValue = Regex.Replace( value.SelectToken("displayForm").Value <string>(), "<.*?>", string.Empty); tag = await TagHandler.PrepareAttribute(context, metaName, metaValue, tagOrder++); meta.Add(tag); } } } } } } } string imageReproPageURL = "https://image01.cudl.lib.cam.ac.uk"; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = json; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "University of Cambridge Digital Library", 1); string viewerUrl = $"http://cudl.lib.cam.ac.uk/view/{resourceNumber}"; tag.ValueSupplement = viewerUrl; meta.Add(tag); book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; foreach (JToken pageToken in parsed.SelectTokens("$.pages").Children()) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string imageUrl = imageReproPageURL + pageToken.SelectToken("downloadImageURL").Value <string>(); RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; List <RTagValue> pageMata = new List <RTagValue>(); tag = await TagHandler.PrepareAttribute(context, "Source", "University of Cambridge Digital Library", 1); tag.ValueSupplement = $"{viewerUrl}/{order}"; pageMata.Add(tag); if (pageToken.SelectToken("label") != null) { tag = await TagHandler.PrepareAttribute(context, "Label", pageToken.SelectToken("label").Value <string>(), 1); pageMata.Add(tag); } page.Tags = pageMata.ToArray(); if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); if (imageResult.StatusCode == HttpStatusCode.NotFound) { break; //finished } int _ImportRetryCount = 200; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from https://www.loc.gov /// </summary> /// <param name="resourceNumber"> /// <example> /// m084 /// </example> /// </param> /// <param name="friendlyUrl"> /// <example> /// boostan1207 /// </example> /// </param> /// <param name="resourcePrefix"></param> /// <example> /// plmp /// </example> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromTheLibraryOfCongress(string resourceNumber, string friendlyUrl, string resourcePrefix) { string url = $"https://www.loc.gov/resource/{resourcePrefix}.{resourceNumber}/?fo=json&st=gallery"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Loc && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Loc, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } int pageCount = 0; int representative_index = 0; //اول یک صفحه را میخوانیم تا تعداد صفحات را مشخص کنیم using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { string json = await result.Content.ReadAsStringAsync(); var parsed = JObject.Parse(json); pageCount = parsed.SelectToken("resource.segment_count").Value <int>(); representative_index = parsed.SelectToken("resource.representative_index").Value <int>(); } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } //here might be problems: loc json does not return correct answer when number of segments are more than 1000 /* * if (pageCount > 1000) * { * job.Exception = $"Page count ({pageCount}) was cut to 1000 for this artifact due to loc bug."; * pageCount = 1000; * } */ //حالا که تعداد صفحات را داریم دوباره میخوانیم url = $"https://www.loc.gov/resource/{resourcePrefix}.{resourceNumber}/?c={pageCount}&fo=json&st=gallery"; using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { //here is a problem, this method could be called from a background service where _context is disposed, so I need to renew it using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = representative_index, FriendlyUrl = friendlyUrl }; string json = await result.Content.ReadAsStringAsync(); job.SrcContent = json; var parsed = JObject.Parse(json); var segmentsArray = parsed.SelectToken("segments").ToArray(); //here might be problems: loc json does not return correct answer when number of segments are more than 1000 //I've added some temporary solutions prior //Here I want to log any paradox I encounter: if (segmentsArray.Length != pageCount) { job.Exception = $"Page count ({pageCount}) is not equal to number of returned resources ({segmentsArray.Length})."; } List <RTagValue> meta = new List <RTagValue>(); string string_value = await HandleSimpleValue(context, parsed, meta, "item.title", "Title"); if (!string.IsNullOrWhiteSpace(string_value)) { book.Name = string_value; book.NameInEnglish = string_value; } await HandleSimpleValue(context, parsed, meta, "item.date", "Date"); string_value = await HandleListValue(context, parsed, meta, "item.other_title", "Other Title"); if (!string.IsNullOrWhiteSpace(string_value)) { book.Name = string_value; } await HandleListValue(context, parsed, meta, "item.contributor_names", "Contributor Names"); await HandleSimpleValue(context, parsed, meta, "item.shelf_id", "Shelf ID"); await HandleListValue(context, parsed, meta, "item.created_published", "Created / Published"); await HandleListValue(context, parsed, meta, "item.subject_headings", "Subject Headings"); await HandleListValue(context, parsed, meta, "item.notes", "Notes"); await HandleListValue(context, parsed, meta, "item.medium", "Medium"); await HandleListValue(context, parsed, meta, "item.call_number", "Call Number/Physical Location"); await HandleListValue(context, parsed, meta, "item.digital_id", "Digital Id"); await HandleSimpleValue(context, parsed, meta, "item.library_of_congress_control_number", "Library of Congress Control Number"); await HandleChildrenValue(context, parsed, meta, "item.language", "Language"); await HandleListValue(context, parsed, meta, "item.online_format", "Online Format"); await HandleListValue(context, parsed, meta, "item.number_oclc", "OCLC Number"); string_value = await HandleListValue(context, parsed, meta, "item.description", "Description"); if (!string.IsNullOrEmpty(string_value)) { book.Description = string_value; book.DescriptionInEnglish = string_value; } await HandleSimpleValue(context, parsed, meta, "cite_this.chicago", "Chicago citation style"); await HandleSimpleValue(context, parsed, meta, "cite_this.apa", "APA citation style"); await HandleSimpleValue(context, parsed, meta, "cite_this.mla", "MLA citation style"); await HandleChildrenValue(context, parsed, meta, "item.dates", "Dates"); await HandleChildrenValue(context, parsed, meta, "item.contributors", "Contributors"); await HandleChildrenValue(context, parsed, meta, "item.location", "Location"); await HandleListValue(context, parsed, meta, "item.rights", "Rights & Access"); RTagValue tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Library of Congress, African and Middle East Division, Near East Section Persian Manuscript Collection", 1); tag.ValueSupplement = url; string_value = parsed.SelectToken("item.id").Value <string>(); if (!string.IsNullOrWhiteSpace(string_value)) { tag.ValueSupplement = string_value; } meta.Add(tag); book.Tags = meta.ToArray(); int order = 0; List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); //due to loc bug for books with more than 1000 pages relying on segmentsArray changed to hard coded image urls and .... //foreach (JToken segment in segmentsArray) for (int pageIndex = 1; pageIndex <= pageCount; pageIndex++) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = order * 100 / (decimal)pageCount; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } order++; RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {pageIndex} of {book.NameInEnglish}", //segment.SelectToken("title").Value<string>(), Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; tag = await TagHandler.PrepareAttribute(context, "Source", "Library of Congress, African and Middle East Division, Near East Section Persian Manuscript Collection", 1); tag.ValueSupplement = $"http://www.loc.gov/resource/{resourcePrefix}.{resourceNumber}/?sp={pageIndex}"; //segment.SelectToken("id").Value<string>(); page.Tags = new RTagValue[] { tag }; string imageUrlPart = $"{pageIndex}".PadLeft(4, '0'); string imageUrl = $"https://tile.loc.gov/image-services/iiif/service:amed:{resourcePrefix}:{resourceNumber}:{imageUrlPart}/full/pct:100/0/default.jpg"; //string imageUrl = $"https://tile.loc.gov/image-services/iiif/service:rbc:{resourcePrefix}:2015:{resourceNumber}:{imageUrlPart}/full/pct:100/0/default.jpg"; /* * List<string> list = segment.SelectToken("image_url").ToObject<List<string>>(); * if (list != null && list.Count > 0) * { * for (int i = 0; i < list.Count; i++) * { * if (list[i].IndexOf(".jpg") != -1) * { * if (imageUrl == "") * imageUrl = list[i]; * else * { * if (imageUrl.IndexOf("#h=") != -1 && imageUrl.IndexOf("&w=", imageUrl.IndexOf("#h=")) != -1) * { * int h1 = int.Parse(imageUrl.Substring(imageUrl.IndexOf("#h=") + "#h=".Length, imageUrl.IndexOf("&w=") - imageUrl.IndexOf("#h=") - "&w=".Length)); * if (list[i].IndexOf("#h=") != -1 && list[i].IndexOf("&w=", list[i].IndexOf("#h=")) != -1) * { * int h2 = int.Parse(list[i].Substring(list[i].IndexOf("#h=") + "#h=".Length, list[i].IndexOf("&w=") - list[i].IndexOf("#h=") - "&w=".Length)); * * if (h2 > h1) * { * imageUrl = list[i]; * } * } * } * else * imageUrl = list[i]; * * } * } * } * } */ if (!string.IsNullOrEmpty(imageUrl)) { //imageUrl = "https:" + imageUrl.Substring(0, imageUrl.IndexOf('#')); bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (book.CoverImage == null && pages.Count > 0) { book.CoverImage = RPictureFile.Duplicate(pages[0].Images.First()); } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }