private async Task _FindCategoryPoemsRhythmsInternal(int catId, bool retag, string rhythm) { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) //this is long running job, so _context might be already been freed/collected by GC { LongRunningJobProgressServiceEF jobProgressServiceEF = new LongRunningJobProgressServiceEF(context); var job = (await jobProgressServiceEF.NewJob($"FindCategoryPoemsRhythms Cat {catId}", "Query data")).Result; try { var metres = await context.GanjoorMetres.OrderBy(m => m.Rhythm).AsNoTracking().ToArrayAsync(); var rhythms = metres.Select(m => m.Rhythm).ToArray(); GanjoorMetre preDeterminedMetre = string.IsNullOrEmpty(rhythm) ? null : metres.Where(m => m.Rhythm == rhythm).Single(); var poems = await context.GanjoorPoems.Where(p => p.CatId == catId).ToListAsync(); int i = 0; using (HttpClient httpClient = new HttpClient()) { foreach (var poem in poems) { if (retag || poem.GanjoorMetreId == null) { await jobProgressServiceEF.UpdateJob(job.Id, i ++); if (preDeterminedMetre == null) { var res = await _FindPoemRhythm(poem.Id, context, httpClient, rhythms); if (!string.IsNullOrEmpty(res.Result)) { poem.GanjoorMetreId = metres.Where(m => m.Rhythm == res.Result).Single().Id; context.GanjoorPoems.Update(poem); await context.SaveChangesAsync(); } } else { poem.GanjoorMetreId = preDeterminedMetre.Id; context.GanjoorPoems.Update(poem); await context.SaveChangesAsync(); } if (poem.GanjoorMetreId != null && !string.IsNullOrEmpty(poem.RhymeLetters)) { await _UpdateRelatedPoems(context, (int)poem.GanjoorMetreId, poem.RhymeLetters); } } } } await jobProgressServiceEF.UpdateJob(job.Id, 99); await jobProgressServiceEF.UpdateJob(job.Id, 100, "", true); } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, 100, "", false, exp.ToString()); } } }
private async Task _GeneratingSubCatsTOC(Guid userId, RMuseumDbContext context, LongRunningJobProgressServiceEF jobProgressServiceEF, RLongRunningJobStatus job, int catId) { foreach (var cat in await context.GanjoorCategories.AsNoTracking().Where(c => c.ParentId == catId).ToListAsync()) { await jobProgressServiceEF.UpdateJob(job.Id, cat.Id); var page = await context.GanjoorPages.Where(p => p.FullUrl == cat.FullUrl).SingleAsync(); context.GanjoorPageSnapshots.Add ( new GanjoorPageSnapshot() { GanjoorPageId = page.Id, MadeObsoleteByUserId = userId, HtmlText = page.HtmlText, Note = "تولید گروهی فهرستهای زیربخشها", RecordDate = DateTime.Now } ); page.HtmlText = (await _GenerateTableOfContents(context, cat.Id, GanjoorTOC.TitlesAndFirstVerse)).Result; context.GanjoorPages.Update(page); await context.SaveChangesAsync(); await _GeneratingSubCatsTOC(userId, context, jobProgressServiceEF, job, cat.Id); } }
/// <summary> /// generate missing default numberings and start counting /// </summary> /// <returns></returns> public RServiceResult <bool> GenerateMissingDefaultNumberings() { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) //this is long running job, so _context might be already been freed/collected by GC { LongRunningJobProgressServiceEF jobProgressServiceEF = new LongRunningJobProgressServiceEF(context); var job = (await jobProgressServiceEF.NewJob("GenerateMissingDefaultNumberings", "Query")).Result; try { var cats = await context.GanjoorCategories.Where(c => c.ParentId != null).ToListAsync(); foreach (var cat in cats) { var numbering = await context.GanjoorNumberings.Where(n => n.StartCatId == cat.Id && n.Name == cat.Title).FirstOrDefaultAsync(); if (numbering == null) { await jobProgressServiceEF.UpdateJob(job.Id, 0, cat.FullUrl); numbering = new GanjoorNumbering() { Name = cat.Title, StartCatId = cat.Id, EndCatId = cat.Id }; context.GanjoorNumberings.Add(numbering); await context.SaveChangesAsync(); Recount(numbering.Id); //start counting } } await context.SaveChangesAsync(); await jobProgressServiceEF.UpdateJob(job.Id, 100, "", true); } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, 100, "", false, exp.ToString()); } } } ); return(new RServiceResult <bool>(true)); }
/// <summary> /// start filling GanjoorLink table OriginalSource values /// </summary> /// <returns></returns> public RServiceResult <bool> StartFillingGanjoorLinkOriginalSources() { try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) //this is long running job, so _context might be already been freed/collected by GC { LongRunningJobProgressServiceEF jobProgressServiceEF = new LongRunningJobProgressServiceEF(context); var job = (await jobProgressServiceEF.NewJob("FillingGanjoorLinkOriginalSources", "Updating")).Result; try { var links = await context.GanjoorLinks.ToListAsync(); for (int i = 0; i < links.Count; i++) { var link = links[i]; var itemInfo = await context.Items .Include(i => i.Tags) .ThenInclude(t => t.RTag) .Where(i => i.Id == link.ItemId).SingleAsync(); var sourceTag = itemInfo.Tags.Where(t => t.RTag.FriendlyUrl == "source").FirstOrDefault(); if (sourceTag != null) { if (!string.IsNullOrEmpty(sourceTag.ValueSupplement) && (sourceTag.ValueSupplement.IndexOf("http") == 0)) { link.OriginalSourceUrl = sourceTag.ValueSupplement; link.LinkToOriginalSource = true; context.GanjoorLinks.Update(link); } } } await context.SaveChangesAsync(); await jobProgressServiceEF.UpdateJob(job.Id, 100, "", true); } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, 100, "", false, exp.ToString()); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
private async Task _ReOrderPoemRecitationsAsync(RMuseumDbContext context, int poemId, bool update = true) { var recitations = await context.Recitations .Where(r => r.ReviewStatus == AudioReviewStatus.Approved && r.GanjoorPostId == poemId) .OrderBy(r => r.Id) //this causes the oldest recirations to become the first one .ToListAsync(); List <RecitationOrderingViewModel> scores = new List <RecitationOrderingViewModel>(); for (var i = 0; i < recitations.Count; i++) { var recitation = recitations[i]; RecitationOrderingViewModel score = new RecitationOrderingViewModel() { RecitationId = recitation.Id, EarlynessAdvantage = recitations.Count - 1 - i, UpVotes = await context.RecitationUserUpVotes.AsNoTracking().Where(r => r.RecitationId == recitation.Id && r.UserId != recitation.OwnerId) .CountAsync(), Mistakes = await context.RecitationApprovedMistakes.AsNoTracking().Where(m => m.RecitationId == recitation.Id).SumAsync(m => m.NumberOfLinesAffected) }; score.TotalScores = score.EarlynessAdvantage + score.UpVotes - (5 * score.Mistakes); //audio order is used as a temporary variable in the following line and soon is get replaced by computed value recitation.AudioOrder = score.TotalScores; scores.Add(score); } recitations.Sort((a, b) => b.AudioOrder.CompareTo(a.AudioOrder)); for (var i = 0; i < recitations.Count; i++) { recitations[i].AudioOrder = i + 1; scores.Where(s => s.RecitationId == recitations[i].Id).Single().ComputedOrder = i + 1; if (update) { context.Update(recitations[i]); } } if (update) { await context.SaveChangesAsync(); } }
/// <summary> /// examine comments for long links /// </summary> /// <returns></returns> public RServiceResult <bool> FindAndFixLongUrlsInComments() { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) //this is long running job, so _context might be already been freed/collected by GC { LongRunningJobProgressServiceEF jobProgressServiceEF = new LongRunningJobProgressServiceEF(context); var job = (await jobProgressServiceEF.NewJob("FindAndFixLongUrlsInComments", "Query data")).Result; try { var comments = await context.GanjoorComments.Where(c => c.HtmlComment.Contains("href=")).ToArrayAsync(); await jobProgressServiceEF.UpdateJob(job.Id, 0, $"Examining {comments.Length} Comments"); int percent = 0; for (int i = 0; i < comments.Length; i++) { if (i * 100 / comments.Length > percent) { percent++; await jobProgressServiceEF.UpdateJob(job.Id, percent); } var comment = comments[i]; string commentText = await _ProcessCommentHtml(comment.HtmlComment, context); if (commentText != comment.HtmlComment) { comment.HtmlComment = commentText; context.Update(comment); await context.SaveChangesAsync(); } } await jobProgressServiceEF.UpdateJob(job.Id, 100, "", true); } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, 100, "", false, exp.ToString()); } } } ); return(new RServiceResult <bool>(true)); }
public async static Task <RTagValue> PrepareAttribute(RMuseumDbContext db, string aName, string aValue, int order) { RTag type = await db.Tags.Where(a => a.Name == aName || a.NameInEnglish == aName).SingleOrDefaultAsync(); if (type == null) { int maxOrder = await db.Tags.CountAsync() == 0 ? 0 : await db.Tags.MaxAsync(a => a.Order); type = new RTag() { Name = aName, NameInEnglish = aName, PluralName = $"{aName}s", PluralNameInEnglish = $"{aName}s", Order = maxOrder + 1, Status = PublishStatus.Published, GlobalValue = true }; await db.Tags.AddAsync(type); await db.SaveChangesAsync(); } RTagValue tag = new RTagValue() { Order = order, Value = aValue, ValueInEnglish = aValue, ValueSupplement = "", RTag = type, Status = PublishStatus.Published }; if (type.TagType == RTagType.Search || type.TagType == RTagType.LinkSearch) { RTagValue similar = await db.TagValues.Where(v => v.RTagId == type.Id && v.ValueInEnglish == aValue && !string.IsNullOrEmpty(v.FriendlyUrl)).FirstOrDefaultAsync(); if (similar != null) { tag.Value = similar.Value; tag.FriendlyUrl = similar.FriendlyUrl; } } return(tag); }
private async Task _UpdatePageHtmlText(RMuseumDbContext context, Guid userId, GanjoorPage page, string note, string htmlText) { context.GanjoorPageSnapshots.Add ( new GanjoorPageSnapshot() { GanjoorPageId = page.Id, MadeObsoleteByUserId = userId, HtmlText = page.HtmlText, Note = note, RecordDate = DateTime.Now } ); page.HtmlText = htmlText; context.GanjoorPages.Update(page); await context.SaveChangesAsync(); }
/// <summary> /// update related poems info (after metreId or rhyme for one of these poems changes) /// </summary> /// <param name="context"></param> /// <param name="metreId"></param> /// <param name="rhyme"></param> /// <returns></returns> private async Task <RServiceResult <bool> > _UpdateRelatedPoems(RMuseumDbContext context, int metreId, string rhyme) { try { var poemIds = await context.GanjoorPoems.AsNoTracking().Where(p => p.GanjoorMetreId == metreId && p.RhymeLetters == rhyme).Select(p => p.Id).ToListAsync(); foreach (var id in poemIds) { await _UpdatePoemRelatedPoemsInfoNoSaveChanges(context, id); } await context.SaveChangesAsync(); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// import from server folder /// </summary> /// <param name="folderPath">C:\Tools\batches\florence</param> /// <param name="friendlyUrl">shahname-florence</param> /// <param name="srcUrl">https://t.me/dr_khatibi_abolfazl/888</param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromServerFolder(string folderPath, string friendlyUrl, string srcUrl) { try { if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.ServerFolder && j.ResourceNumber == folderPath && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing server folder {folderPath}")); } if (string.IsNullOrEmpty(friendlyUrl)) { return(new RServiceResult <bool>(false, $"Friendly url is empty, server folder {folderPath}")); } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.ServerFolder, ResourceNumber = folderPath, FriendlyUrl = friendlyUrl, SrcUrl = srcUrl, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from server folder {job.ResourceNumber}", $"extracted from server folder {job.ResourceNumber}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); meta.Add(tag); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = ""; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); string[] fileNames = Directory.GetFiles(job.ResourceNumber, "*.jpg"); int order = 0; foreach (string fileName in fileNames) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order * 100 / (decimal)fileNames.Length; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } order++; RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order} of {book.NameInEnglish}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; page.Tags = new RTagValue[] { }; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } using (FileStream imageStream = new FileStream(fileName, FileMode.Open)) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, job.SrcUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } } pages.Add(page); } book.Tags = meta.ToArray(); book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
private async Task <string> _ImportSQLiteCatChildren(RMuseumDbContext context, IDbConnection sqlite, int poetId, int sqliteParentCatId, GanjoorCat parentCat, string parentFullTitle, LongRunningJobProgressServiceEF jobProgressServiceEF, RLongRunningJobStatus job, int parentPagId) { try { string catHtmlText = ""; foreach (var cat in await sqlite.QueryAsync($"SELECT * FROM cat WHERE parent_id = {sqliteParentCatId} ORDER BY id")) { await jobProgressServiceEF.UpdateJob(job.Id, 0, $"Importing - {cat.text}"); var poetCatId = 1 + await context.GanjoorCategories.MaxAsync(c => c.Id); string catTitle = cat.text; string url = GPersianTextSync.Farglisize(catTitle); if (catTitle.IndexOf('|') != -1) { string[] catParts = catTitle.Split('|', StringSplitOptions.RemoveEmptyEntries); if (catParts.Length == 2) { catTitle = catParts[0].Trim(); url = catParts[1].Trim(); } } else { switch (catTitle) { case "دیوان اشعار": url = "divan"; break; case "قصاید": case "قصائد": case "قصیده": case "قصیده ها": url = "ghaside"; break; case "غزلیات": case "غزل": case "غزل ها": url = "ghazal"; break; case "قطعات": case "مقطعات": case "قطعه": url = "ghete"; break; case "مثنویات": case "مثنوی": case "مثنوی ها": url = "masnavi"; break; case "ترکیبات": case "ترکیب بند": url = "tarkib"; break; case "ترجیعات": case "ترجیع بند": url = "tarjee"; break; case "مسمطات": case "مسمط": url = "mosammat"; break; case "مخمسات": case "مخمس": url = "mokhammas"; break; case "رباعیات": case "رباعی": case "رباعی ها": url = "robaee"; break; case "ملمعات": case "ملمع": url = "molamma"; break; case "هجویات": case "هجو": url = "hajv"; break; case "هزلیات": case "هزل": url = "hazl"; break; case "مراثی": case "مرثیه": case "رثا": case "مرثیه ها": url = "marsie"; break; case "مفردات": url = "mofradat"; break; case "ملحقات": url = "molhaghat"; break; case "اشعار عربی": url = "arabi"; break; case "ماده تاریخها": case "ماده تاریخها": case "ماده تاریخ": url = "tarikh"; break; case "معمیات": url = "moammiyat"; break; case "چیستان": url = "chistan"; break; case "لغز": case "لغزها": url = "loghaz"; break; } } GanjoorCat dbCat = new GanjoorCat() { Id = poetCatId, PoetId = poetId, Title = catTitle, UrlSlug = url, FullUrl = $"{parentCat.FullUrl}/{url}", ParentId = parentCat.Id, TableOfContentsStyle = GanjoorTOC.Analyse, Published = true, }; context.GanjoorCategories.Add(dbCat); var maxPageId = await context.GanjoorPages.MaxAsync(p => p.Id); if (await context.GanjoorPoems.MaxAsync(p => p.Id) > maxPageId) { maxPageId = await context.GanjoorPoems.MaxAsync(p => p.Id); } var catPageId = 1 + maxPageId; while (await context.GanjoorPoems.Where(p => p.Id == catPageId).AnyAsync()) { catPageId++; } GanjoorPage dbPageCat = new GanjoorPage() { Id = catPageId, GanjoorPageType = GanjoorPageType.CatPage, Published = false, PageOrder = -1, Title = dbCat.Title, FullTitle = $"{parentFullTitle} » {dbCat.Title}", UrlSlug = dbCat.UrlSlug, FullUrl = dbCat.FullUrl, HtmlText = "", PoetId = poetId, CatId = poetCatId, PostDate = DateTime.Now, ParentId = parentPagId }; context.GanjoorPages.Add(dbPageCat); await context.SaveChangesAsync(); catHtmlText += $"<p><a href=\"{dbCat.FullUrl}\">{dbCat.Title}</a></p>{Environment.NewLine}"; var resChild = await _ImportSQLiteCatChildren(context, sqlite, poetId, (int)cat.id, dbCat, $"{parentFullTitle} » {dbCat.Title}", jobProgressServiceEF, job, dbPageCat.Id); if (!string.IsNullOrEmpty(resChild)) { return(resChild); } } var maxPoemId = await context.GanjoorPoems.MaxAsync(p => p.Id); if (await context.GanjoorPages.MaxAsync(p => p.Id) > maxPoemId) { maxPoemId = await context.GanjoorPages.MaxAsync(p => p.Id); } var poemId = 1 + maxPoemId; int poemNumber = 0; foreach (var poem in await sqlite.QueryAsync($"SELECT * FROM poem WHERE cat_id = {sqliteParentCatId} ORDER BY id")) { poemNumber++; await jobProgressServiceEF.UpdateJob(job.Id, poemNumber, "", false); string title = poem.title; string urlSlug = $"sh{poemNumber}"; if (title.IndexOf('|') != -1) { string[] titleParts = title.Split('|', StringSplitOptions.RemoveEmptyEntries); if (titleParts.Length == 2) { title = titleParts[0].Trim(); urlSlug = titleParts[1].Trim(); } } GanjoorPoem dbPoem = new GanjoorPoem() { Id = poemId, CatId = parentCat.Id, Title = title, UrlSlug = urlSlug, FullTitle = $"{parentFullTitle} » {title}", FullUrl = $"{parentCat.FullUrl}/{urlSlug}", Published = true, }; List <GanjoorVerse> poemVerses = new List <GanjoorVerse>(); foreach (var verse in await sqlite.QueryAsync($"SELECT * FROM verse WHERE poem_id = {poem.id} ORDER BY vorder")) { int vOrder = int.Parse(verse.vorder.ToString()); int position = int.Parse(verse.position.ToString()); string text = verse.text; GanjoorVerse dbVerse = new GanjoorVerse() { PoemId = poemId, VOrder = vOrder, VersePosition = (VersePosition)position, Text = text.Replace("ـ", "").Replace(" ", " ").ApplyCorrectYeKe().Trim() }; poemVerses.Add(dbVerse); } if (poemVerses.Count == 0) { poemNumber--; continue; } dbPoem.PlainText = PreparePlainText(poemVerses); dbPoem.HtmlText = PrepareHtmlText(poemVerses); context.GanjoorPoems.Add(dbPoem); await context.SaveChangesAsync(); foreach (var dbVerse in poemVerses) { context.GanjoorVerses.Add(dbVerse); await context.SaveChangesAsync();//id set should be in order } await _FillPoemCoupletIndices(context, poemId); try { var poemRhymeLettersRes = LanguageUtils.FindRhyme(poemVerses); if (!string.IsNullOrEmpty(poemRhymeLettersRes.Rhyme)) { dbPoem.RhymeLetters = poemRhymeLettersRes.Rhyme; context.GanjoorPoems.Update(dbPoem); } } catch { } GanjoorPage dbPoemPage = new GanjoorPage() { Id = poemId, GanjoorPageType = GanjoorPageType.PoemPage, Published = false, PageOrder = -1, Title = dbPoem.Title, FullTitle = dbPoem.FullTitle, UrlSlug = dbPoem.UrlSlug, FullUrl = dbPoem.FullUrl, HtmlText = dbPoem.HtmlText, PoetId = poetId, CatId = parentCat.Id, PoemId = poemId, PostDate = DateTime.Now, ParentId = parentPagId }; context.GanjoorPages.Add(dbPoemPage); await context.SaveChangesAsync(); catHtmlText += $"<p><a href=\"{dbPoemPage.FullUrl}\">{dbPoemPage.Title}</a></p>{Environment.NewLine}"; poemId++; } if (!string.IsNullOrEmpty(catHtmlText)) { var parentCatPage = await context.GanjoorPages.Where(p => p.FullUrl == parentCat.FullUrl).SingleAsync(); parentCatPage.HtmlText += catHtmlText; context.GanjoorPages.Update(parentCatPage); } await context.SaveChangesAsync(); } catch (Exception exp) { return(exp.ToString()); } return(""); }
/// <summary> /// Apply corrections from sqlite /// </summary> /// <param name="poetId"></param> /// <param name="file"></param> /// <param name="note"></param> /// <returns></returns> public async Task <RServiceResult <bool> > ApplyCorrectionsFromSqlite(int poetId, IFormFile file, string note) { try { string dir = Path.Combine($"{Configuration.GetSection("PictureFileService")["StoragePath"]}", "SQLiteImports"); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } string filePath = Path.Combine(dir, file.FileName); if (File.Exists(filePath)) { File.Delete(filePath); } using (FileStream fsMain = new FileStream(filePath, FileMode.Create)) { await file.CopyToAsync(fsMain); } string email = $"{Configuration.GetSection("Ganjoor")["SystemEmail"]}"; var userId = (await _appUserService.FindUserByEmail(email)).Result.Id; _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) //this is long running job, so _context might be already been freed/collected by GC { LongRunningJobProgressServiceEF jobProgressServiceEF = new LongRunningJobProgressServiceEF(context); var job = (await jobProgressServiceEF.NewJob("ApplyCorrectionsFromSqlite", "Query data")).Result; try { SqliteConnectionStringBuilder connectionStringBuilder = new SqliteConnectionStringBuilder(); connectionStringBuilder.DataSource = filePath; using (SqliteConnection sqliteConnection = new SqliteConnection(connectionStringBuilder.ToString())) { await sqliteConnection.OpenAsync(); IDbConnection sqlite = sqliteConnection; var poets = (await sqlite.QueryAsync("SELECT * FROM poet")).ToList(); if (poets.Count != 1) { await jobProgressServiceEF.UpdateJob(job.Id, 100, "", false, "poets count in sqlite db is not equal to 1"); } int poemNumber = 0; foreach (var poem in await sqlite.QueryAsync($"SELECT * FROM poem ORDER BY id")) { poemNumber++; await jobProgressServiceEF.UpdateJob(job.Id, poemNumber, "", false); int poemId = (int)poem.id; GanjoorPoem dbPoem = await context.GanjoorPoems.Include(p => p.Cat).Where(p => p.Id == poemId).SingleOrDefaultAsync(); if (dbPoem == null) { continue; } if (dbPoem.Cat.PoetId != poetId) { continue; } string comment = $"<p>تغییرات حاصل از پردازش {note}</p>{Environment.NewLine}"; bool anyChanges = false; var dbPage = await context.GanjoorPages.Where(p => p.Id == poemId).SingleOrDefaultAsync(); GanjoorPageSnapshot snapshot = new GanjoorPageSnapshot() { GanjoorPageId = poemId, MadeObsoleteByUserId = (Guid)userId, RecordDate = DateTime.Now, Note = note, Title = dbPage.Title, UrlSlug = dbPage.UrlSlug, HtmlText = dbPage.HtmlText, }; string poemTitle = poem.title; if (poemTitle != dbPoem.Title) { anyChanges = true; comment += $"<p>تغییر عنوان از «{dbPoem.Title}» به «{poemTitle}»</p>{Environment.NewLine}"; dbPoem.Title = poemTitle; dbPoem.FullTitle = $"{dbPoem.Cat.FullUrl} » {dbPoem.Title}"; context.GanjoorPoems.Update(dbPoem); } var sqliteVerses = new List <dynamic>(await sqlite.QueryAsync($"SELECT * FROM verse WHERE poem_id = {poem.id} ORDER BY vorder")); var dbVerses = await context.GanjoorVerses.Where(v => v.PoemId == poemId).OrderBy(v => v.VOrder).ToListAsync(); int vIndex = 0; while (vIndex < sqliteVerses.Count && vIndex < dbVerses.Count) { if (sqliteVerses[vIndex].vorder != dbVerses[vIndex].VOrder) { vIndex = -1; break; } string text = sqliteVerses[vIndex].text; text = text.Replace("ـ", "").Replace(" ", " ").ApplyCorrectYeKe().Trim(); if (text == dbVerses[vIndex].Text) { vIndex++; continue; } comment += $"<p>تغییر مصرع {vIndex + 1} از «{dbVerses[vIndex].Text}» به «{text}»</p>{Environment.NewLine}".ToPersianNumbers(); dbVerses[vIndex].Text = text; context.GanjoorVerses.Update(dbVerses[vIndex]); anyChanges = true; vIndex++; } if (vIndex != -1) { while (vIndex < dbVerses.Count) { comment += $"<p>حذف مصرع {vIndex + 1} با متن «{dbVerses[vIndex].Text}»</p>{Environment.NewLine}".ToPersianNumbers(); context.GanjoorVerses.Remove(dbVerses[vIndex]); vIndex++; anyChanges = true; } while (vIndex < sqliteVerses.Count) { string text = sqliteVerses[vIndex].text; text = text.Replace("ـ", "").Replace(" ", " ").ApplyCorrectYeKe().Trim(); int vOrder = int.Parse(sqliteVerses[vIndex].vorder.ToString()); int position = int.Parse(sqliteVerses[vIndex].position.ToString()); comment += $"<p>اضافه شدن مصرع {vIndex + 1} با متن «{text}»</p>{Environment.NewLine}".ToPersianNumbers(); context.GanjoorVerses.Add ( new GanjoorVerse() { PoemId = poemId, VOrder = vOrder, VersePosition = (VersePosition)position, Text = text } ); vIndex++; anyChanges = true; } if (anyChanges) { await _FillPoemCoupletIndices(context, poemId); GanjoorComment sysComment = new GanjoorComment() { UserId = userId, AuthorIpAddress = "127.0.0.1", CommentDate = DateTime.Now, HtmlComment = comment, PoemId = poemId, Status = PublishStatus.Published, }; context.GanjoorComments.Add(sysComment); context.GanjoorPageSnapshots.Add(snapshot); await context.SaveChangesAsync(); var poemVerses = await context.GanjoorVerses.Where(v => v.PoemId == poemId).OrderBy(v => v.VOrder).ToListAsync(); bool needsNewVOrder = false; for (int i = 0; i < poemVerses.Count; i++) { if (poemVerses[i].VOrder != (i + 1)) { poemVerses[i].VOrder = i + 1; needsNewVOrder = true; } } if (needsNewVOrder) { context.GanjoorVerses.UpdateRange(poemVerses); } dbPoem.PlainText = PreparePlainText(poemVerses); dbPoem.HtmlText = PrepareHtmlText(poemVerses); dbPage.HtmlText = dbPoem.HtmlText; dbPage.Title = dbPoem.Title; dbPage.FullTitle = dbPoem.FullTitle; try { var poemRhymeLettersRes = LanguageUtils.FindRhyme(poemVerses); if (!string.IsNullOrEmpty(poemRhymeLettersRes.Rhyme)) { dbPoem.RhymeLetters = poemRhymeLettersRes.Rhyme; } } catch { } context.GanjoorPoems.Update(dbPoem); context.GanjoorPages.Update(dbPage); await context.SaveChangesAsync(); } } } await jobProgressServiceEF.UpdateJob(job.Id, 100, "", true); } } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, 100, "", false, exp.ToString()); } } File.Delete(filePath); } ); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } return(new RServiceResult <bool>(true)); }
/// <summary> /// import from http://www.qajarwomen.org /// </summary> /// <param name="hardvardResourceNumber">43117279</param> /// <param name="friendlyUrl">atame</param> /// <param name="srcUrl">http://www.qajarwomen.org/fa/items/1018A10.html</param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromHarvardDirectly(string hardvardResourceNumber, string friendlyUrl, string srcUrl) { try { if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.HarvardDirect && j.ResourceNumber == hardvardResourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing harvard direct resource number {hardvardResourceNumber}")); } if (string.IsNullOrEmpty(friendlyUrl)) { return(new RServiceResult <bool>(false, $"Friendly url is empty, harvard direct resource number {hardvardResourceNumber}")); } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.HarvardDirect, ResourceNumber = hardvardResourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = srcUrl, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from harvard resource number {job.ResourceNumber}", $"extracted from harvard resource number {job.ResourceNumber}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; tag = await TagHandler.PrepareAttribute(context, "Notes", "وارد شده از سایت دنیای زنان در عصر قاجار", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "دنیای زنان در عصر قاجار", 1); tag.ValueSupplement = $"{job.SrcUrl}"; meta.Add(tag); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = $"https://iiif.lib.harvard.edu/manifests/drs:{hardvardResourceNumber}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } List <RArtifactItemRecord> pages = (await _InternalHarvardJsonImport(hardvardResourceNumber, job, friendlyUrl, context, book, meta)).Result; if (pages == null) { return; } book.Tags = meta.ToArray(); book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// save ganjoor poem probable metre /// </summary> /// <param name="id">problable metre id</param> /// <param name="metre"></param> /// <returns></returns> public async Task <RServiceResult <bool> > SaveGanjoorPoemProbableMetre(int id, string metre) { try { var item = await _context.GanjoorPoemProbableMetres.Where(p => p.Id == id).SingleAsync(); metre = metre.Trim(); if (string.IsNullOrEmpty(metre)) { metre = "dismissed"; } if (metre == "dismissed") { item.Metre = "dismissed"; _context.Update(item); await _context.SaveChangesAsync(); return(new RServiceResult <bool>(true)); } var rhythm = await _context.GanjoorMetres.AsNoTracking().Where(m => m.Rhythm == metre).SingleOrDefaultAsync(); if (rhythm == null) { rhythm = new GanjoorMetre() { Rhythm = metre, VerseCount = 0 }; _context.GanjoorMetres.Add(rhythm); await _context.SaveChangesAsync(); } var poem = await _context.GanjoorPoems.Where(p => p.Id == item.PoemId).SingleAsync(); int?oldMetreId = poem.GanjoorMetreId; poem.GanjoorMetreId = rhythm.Id; _context.Update(poem); _context.Remove(item); await _context.SaveChangesAsync(); _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) //this is long running job, so _context might be already been freed/collected by GC { if (oldMetreId != null && !string.IsNullOrEmpty(poem.RhymeLetters)) { await _UpdateRelatedPoems(context, (int)oldMetreId, poem.RhymeLetters); await context.SaveChangesAsync(); } if (poem.GanjoorMetreId != null && !string.IsNullOrEmpty(poem.RhymeLetters)) { await _UpdateRelatedPoems(context, (int)poem.GanjoorMetreId, poem.RhymeLetters); await context.SaveChangesAsync(); } } }); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// start counting /// </summary> /// <returns></returns> public RServiceResult <bool> Recount(int numberingId) { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) //this is long running job, so _context might be already been freed/collected by GC { LongRunningJobProgressServiceEF jobProgressServiceEF = new LongRunningJobProgressServiceEF(context); var job = (await jobProgressServiceEF.NewJob("NumberingService::Count", "Query Cats")).Result; try { var numbering = await context.GanjoorNumberings.Include(n => n.StartCat).Where(n => n.Id == numberingId).SingleOrDefaultAsync(); var cats = await _FindTargetCategories(context, numbering.StartCat, numbering.EndCatId); await jobProgressServiceEF.UpdateJob(job.Id, 0, "Deleting Old Data"); var oldNumbers = await context.GanjoorVerseNumbers.Where(n => n.NumberingId == numberingId).ToListAsync(); context.GanjoorVerseNumbers.RemoveRange(oldNumbers); await context.SaveChangesAsync(); await jobProgressServiceEF.UpdateJob(job.Id, 0, "Counting"); int number = 0; int coupletnumber = 0; int paragraphnumber = 0; int totalVerseCount = 0; foreach (var cat in cats) { var poems = await context.GanjoorPoems.AsNoTracking().Where(p => p.CatId == cat.Id).OrderBy(p => p.Id).ToListAsync(); await jobProgressServiceEF.UpdateJob(job.Id, 0, $"Counting:: {cat.Title}"); foreach (var poem in poems) { totalVerseCount += await context.GanjoorVerses.Where(v => v.PoemId == poem.Id).CountAsync(); var verses = await context.GanjoorVerses.AsNoTracking() .Where(v => v.PoemId == poem.Id && v.VersePosition != VersePosition.Left && v.VersePosition != VersePosition.CenteredVerse2 && v.VersePosition != VersePosition.Comment) .OrderBy(v => v.VOrder) .ToListAsync(); for (int coupletIndex = 0; coupletIndex < verses.Count; coupletIndex++) { number++; bool isPoemVerse = verses[coupletIndex].VersePosition == VersePosition.Right || verses[coupletIndex].VersePosition == VersePosition.CenteredVerse1; if (isPoemVerse) { coupletnumber++; } else { paragraphnumber++; } GanjoorVerseNumber verseNumber = new GanjoorVerseNumber() { NumberingId = numberingId, PoemId = poem.Id, CoupletIndex = coupletIndex, Number = number, IsPoemVerse = isPoemVerse, SameTypeNumber = isPoemVerse ? coupletnumber : paragraphnumber }; context.GanjoorVerseNumbers.Add(verseNumber); } } } numbering.TotalLines = number; numbering.TotalVerses = totalVerseCount; numbering.TotalCouplets = coupletnumber; numbering.TotalParagraphs = paragraphnumber; numbering.LastCountingDate = DateTime.Now; context.GanjoorNumberings.Update(numbering); await context.SaveChangesAsync(); await jobProgressServiceEF.UpdateJob(job.Id, 100, "", true); } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, 100, "", false, exp.ToString()); } } } ); return(new RServiceResult <bool>(true)); }
/// <summary> /// from http://www.library.upenn.edu/ /// </summary> /// <param name="resourceNumber">MEDREN_9949222153503681</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromPenLibraries(string resourceNumber, string friendlyUrl) { string url = $"http://dla.library.upenn.edu/dla/medren/pageturn.html?id={resourceNumber}&rotation=0&size=0"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.PennLibraries && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated artifact friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.PennLibraries, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { client.Timeout = TimeSpan.FromMinutes(5); using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { if ( (await context.Artifacts.Where(a => a.FriendlyUrl == job.FriendlyUrl).SingleOrDefaultAsync()) != null ) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "aborted because of duplicated friendly url"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string html = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = html; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string title = ""; string author = ""; int tagOrder = 1; int nIdxStart = html.IndexOf("https://repo.library.upenn.edu/djatoka/resolver?"); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "https://repo.library.upenn.edu/djatoka/resolver? not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } string firstImageUrl = html.Substring(nIdxStart, html.IndexOf('"', nIdxStart) - nIdxStart).Replace("&", "&"); nIdxStart = html.IndexOf("recordinfolabel"); while (nIdxStart != -1) { nIdxStart += "recordinfolabel\">".Length; int nIdxEnd = html.IndexOf(":", nIdxStart); string recordinfolabel = html.Substring(nIdxStart, nIdxEnd - nIdxStart); nIdxStart = html.IndexOf("recordinfotext", nIdxEnd); nIdxStart += "recordinfotext\">".Length; nIdxEnd = html.IndexOf("</td>", nIdxStart); string recordinfotext = html.Substring(nIdxStart, nIdxEnd - nIdxStart).Replace("</div>", "<div>").Replace("\n", "").Replace("\r", "").Trim(); string[] values = recordinfotext.Split("<div>", StringSplitOptions.RemoveEmptyEntries); foreach (string value in values) { if (value.Trim().Length == 0) { continue; } if (recordinfolabel == "Title") { title = value.Trim(); tag = await TagHandler.PrepareAttribute(context, "Title", title, 1); meta.Add(tag); } else if (recordinfolabel == "Author") { author = value.Trim(); tag = await TagHandler.PrepareAttribute(context, "Contributor Names", author, 1); meta.Add(tag); } else { tag = await TagHandler.PrepareAttribute(context, recordinfolabel, value.Trim(), tagOrder++); meta.Add(tag); } } nIdxStart = html.IndexOf("recordinfolabel", nIdxEnd); } tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Penn Libraries", 1); string viewerUrl = $"http://dla.library.upenn.edu/dla/medren/detail.html?id={resourceNumber}"; tag.ValueSupplement = viewerUrl; meta.Add(tag); book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = title; book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; while (true) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string imageUrl = firstImageUrl; RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; tag = await TagHandler.PrepareAttribute(context, "Source", "Penn Libraries", 1); tag.ValueSupplement = viewerUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if (order > 1) { string pageUrl = $"http://dla.library.upenn.edu/dla/medren/pageturn.html?id={resourceNumber}&doubleside=0&rotation=0&size=0¤tpage={order}"; var pageResult = await client.GetAsync(pageUrl); if (pageResult.StatusCode == HttpStatusCode.NotFound) { break; //finished } string pageHtml = await pageResult.Content.ReadAsStringAsync(); nIdxStart = pageHtml.IndexOf("https://repo.library.upenn.edu/djatoka/resolver?"); if (nIdxStart == -1) { if (order > 1) { break; //finished } using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"https://repo.library.upenn.edu/djatoka/resolver? not found on page {order}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } imageUrl = pageHtml.Substring(nIdxStart, pageHtml.IndexOf('"', nIdxStart) - nIdxStart).Replace("&", "&"); } var imageResult = await client.GetAsync(imageUrl); if (imageResult.StatusCode == HttpStatusCode.NotFound) { break; //finished } int _ImportRetryCount = 200; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
private async Task <RServiceResult <bool> > _ImportCommentsDataFromMySql(string jobName, RMuseumDbContext context, LongRunningJobProgressServiceEF jobProgressServiceEF, RLongRunningJobStatus job) { try { job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - processing reported comments")).Result; List <GanjoorCommentAbuseReport> reportedComments = await _MySqlImportReportedComments(); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - pre mysql data fetch")).Result; string connectionString = $"server={Configuration.GetSection("AudioMySqlServer")["Server"]};uid={Configuration.GetSection("AudioMySqlServer")["Username"]};pwd={Configuration.GetSection("AudioMySqlServer")["Password"]};database={Configuration.GetSection("AudioMySqlServer")["Database"]};charset=utf8;convert zero datetime=True"; using (MySqlConnection connection = new MySqlConnection ( connectionString )) { connection.Open(); using (MySqlDataAdapter src = new MySqlDataAdapter( "SELECT comment_ID, comment_post_ID, comment_author, comment_author_email, comment_author_url, comment_author_IP, comment_date, comment_content, comment_approved FROM ganja_comments WHERE comment_type <> 'pingback' ORDER BY comment_ID", connection)) { job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - mysql")).Result; using (DataTable data = new DataTable()) { await src.FillAsync(data); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - processing approved poem songs")).Result; int count = data.Rows.Count; int i = 0; int percent = -1; foreach (DataRow row in data.Rows) { GanjoorComment comment = new GanjoorComment() { PoemId = int.Parse(row["comment_post_ID"].ToString()), AuthorName = row["comment_author"].ToString(), AuthorEmail = row["comment_author_email"].ToString(), AuthorUrl = row["comment_author_url"].ToString(), AuthorIpAddress = row["comment_author_IP"].ToString(), CommentDate = (DateTime)row["comment_date"], HtmlComment = _PrepareCommentHtml(row["comment_content"].ToString()), Status = row["comment_approved"].ToString() == "1" ? PublishStatus.Published : PublishStatus.Awaiting }; var poem = await context.GanjoorPoems.Where(p => p.Id == comment.PoemId).SingleOrDefaultAsync(); if (poem == null) { continue; } context.GanjoorComments.Add(comment); int originalCommentId = int.Parse(row["comment_post_ID"].ToString()); var complaints = reportedComments.Where(c => c.GanjoorCommentId == originalCommentId).ToList(); if (complaints.Count > 0) { await context.SaveChangesAsync(); //save this comment to make its ID valid foreach (var complaint in complaints) { context.GanjoorReportedComments.Add ( new GanjoorCommentAbuseReport() { GanjoorCommentId = comment.Id, ReasonCode = complaint.ReasonCode, ReasonText = complaint.ReasonText, } ); } } i++; if (i * 100 / count > percent) { percent = i * 100 / count; job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - {i} of {count}")).Result; } } await context.SaveChangesAsync(); } } } job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - assigning comments to users")).Result; foreach (var user in await context.Users.ToListAsync()) { foreach (var comment in await context.GanjoorComments.Where(u => u.AuthorEmail == user.Email).ToListAsync()) { comment.UserId = user.Id; context.GanjoorComments.Update(comment); } } await context.SaveChangesAsync(); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - finished")).Result; return(new RServiceResult <bool>(true)); } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, job.Progress, "", false, exp.ToString()); return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from http://pudl.princeton.edu/ /// </summary> /// <param name="resourceNumber">dj52w476m</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromPrinceton(string resourceNumber, string friendlyUrl) { string url = $"http://pudl.princeton.edu/mdCompiler2.php?obj={resourceNumber}"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Princeton && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Princeton, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string xml = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = xml; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } XElement elObject = XDocument.Parse(xml).Root; foreach (var prop in elObject.Element("dmd").Element("properties").Elements("property")) { if (prop.Element("label") == null) { continue; } string label = prop.Element("label").Value.Replace(":", ""); int order = 1; foreach (var value in prop.Elements("valueGrp").Elements("value")) { tag = await TagHandler.PrepareAttribute(context, label, value.Value, order); if (value.Attribute("href") != null) { if (value.Attribute("href").Value.IndexOf("http://localhost") != 0) { tag.ValueSupplement = value.Attribute("href").Value; } } meta.Add(tag); if (label == "Title") { book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = value.Value; } order++; } } tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Princeton Digital Library of Islamic Manuscripts", 1); tag.ValueSupplement = $"http://pudl.princeton.edu/objects/{job.ResourceNumber}"; meta.Add(tag); book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); foreach (var structure in elObject.Elements("structure")) { if (structure.Attribute("type") != null && structure.Attribute("type").Value == "RelatedObjects") { if (structure.Element("div") == null || structure.Element("div").Element("OrderedList") == null) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "structure[RelatedObjects].div.OrderedList is null"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); return; } } int pageCount = structure.Element("div").Element("OrderedList").Elements("div").Count(); int inlineOrder = 0; foreach (var div in structure.Element("div").Element("OrderedList").Elements("div")) { inlineOrder++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = inlineOrder * 100 / (decimal)pageCount; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } int order = int.Parse(div.Attribute("order").Value); RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = div.Attribute("label").Value, Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; string imageUrl = div.Attribute("img").Value; imageUrl = "https://libimages.princeton.edu/loris/" + imageUrl.Substring(imageUrl.LastIndexOf(":") + 1); imageUrl += $"/full/,{div.Attribute("h").Value}/0/default.jpg"; tag = await TagHandler.PrepareAttribute(context, "Source", "Princeton Digital Library of Islamic Manuscripts", 1); tag.ValueSupplement = imageUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } } } foreach (var structure in elObject.Elements("structure")) { if (structure.Attribute("type") != null && structure.Attribute("type").Value == "Physical") { if (structure.Element("RTLBoundManuscript") != null) { foreach (var leaf in structure.Element("RTLBoundManuscript").Elements("Leaf")) { foreach (var side in leaf.Elements("Side")) { int pageOrder = int.Parse(side.Attribute("order").Value); tag = await TagHandler.PrepareAttribute(context, "Leaf Side", side.Attribute("label").Value, 100); RArtifactItemRecord page = pages.Where(p => p.Order == pageOrder).SingleOrDefault(); if (page != null) { List <RTagValue> tags = new List <RTagValue>(page.Tags); tags.Add(tag); page.Tags = tags; } } } foreach (var folio in structure.Element("RTLBoundManuscript").Elements("Folio")) { foreach (var side in folio.Elements("Side")) { int pageOrder = int.Parse(side.Attribute("order").Value); tag = await TagHandler.PrepareAttribute(context, "Folio Side", folio.Attribute("label").Value + ":" + side.Attribute("label").Value, 101); RArtifactItemRecord page = pages.Where(p => p.Order == pageOrder).SingleOrDefault(); if (page != null) { List <RTagValue> tags = new List <RTagValue>(page.Tags); tags.Add(tag); page.Tags = tags; } } } } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "ages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// import GanjoorPage entity data from MySql /// </summary> /// <returns></returns> public RServiceResult <bool> ImportFromMySql() { try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) //this is long running job, so _context might be already been freed/collected by GC using (RMuseumDbContext contextReport = new RMuseumDbContext(Configuration)) //this is long running job, so _context might be already been freed/collected by GC { LongRunningJobProgressServiceEF jobProgressServiceEF = new LongRunningJobProgressServiceEF(contextReport); var job = (await jobProgressServiceEF.NewJob("GanjoorService:ImportFromMySql", "pre open connection")).Result; if (string.IsNullOrEmpty(Configuration.GetSection("AudioMySqlServer")["ReportedCommentsDatabase"])) { await jobProgressServiceEF.UpdateJob(job.Id, job.Progress, "", false, "ReportedCommentsDatabase is not set"); return; } MusicCatalogueService catalogueService = new MusicCatalogueService(Configuration, context); RServiceResult <bool> musicCatalogueRes = await catalogueService.ImportFromMySql("MusicCatalogueImportFromMySql", jobProgressServiceEF, job); if (!musicCatalogueRes.Result) { return; } try { using (MySqlConnection connection = new MySqlConnection ( $"server={Configuration.GetSection("AudioMySqlServer")["Server"]};uid={Configuration.GetSection("AudioMySqlServer")["Username"]};pwd={Configuration.GetSection("AudioMySqlServer")["Password"]};database={Configuration.GetSection("AudioMySqlServer")["Database"]};charset=utf8;convert zero datetime=True" )) { connection.Open(); using (MySqlDataAdapter src = new MySqlDataAdapter( "SELECT ID, post_author, post_date, post_date_gmt, post_content, post_title, post_category, post_excerpt, post_status, comment_status, ping_status, post_password, post_name, to_ping, pinged, post_modified, post_modified_gmt, post_content_filtered, post_parent, guid, menu_order, post_type, post_mime_type, comment_count, " + "COALESCE((SELECT meta_value FROM ganja_postmeta WHERE post_id = ID AND meta_key='_wp_page_template'), '') AS template," + "(SELECT meta_value FROM ganja_postmeta WHERE post_id = ID AND meta_key='otherpoetid') AS other_poet_id " + "FROM ganja_posts", connection)) { using (DataTable srcData = new DataTable()) { job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 1 - mysql 1")).Result; await src.FillAsync(srcData); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 1 - processing mysql data")).Result; foreach (DataRow row in srcData.Rows) { GanjoorPageType pageType = row["post_type"].ToString() == "post" && row["comment_status"].ToString() != "closed" ? GanjoorPageType.PoemPage : row["template"].ToString() == "comspage.php" ? GanjoorPageType.AllComments : row["template"].ToString() == "relations.php" ? GanjoorPageType.ProsodySimilars : row["template"].ToString() == "vazn.php" ? GanjoorPageType.ProsodyAndStats : GanjoorPageType.None; int?poetId = row["post_author"].ToString() == "1" ? (int?)null : int.Parse(row["post_author"].ToString()); if (poetId == 36)//رشحه { continue; } if (poetId != null) { if (!await context.GanjoorPoets.Where(poet => poet.Id == poetId).AnyAsync()) { continue; } } GanjoorPage page = new GanjoorPage() { Id = int.Parse(row["ID"].ToString()), GanjoorPageType = pageType, Published = true, PageOrder = -1, Title = row["post_title"].ToString(), UrlSlug = row["post_name"].ToString(), HtmlText = row["post_content"].ToString(), ParentId = row["post_parent"].ToString() == "0" ? (int?)null : int.Parse(row["post_parent"].ToString()), PoetId = poetId, SecondPoetId = row["other_poet_id"] == DBNull.Value ? (int?)null : int.Parse(row["other_poet_id"].ToString()), PostDate = (DateTime)row["post_date"] }; if (pageType == GanjoorPageType.PoemPage) { var poem = await context.GanjoorPoems.Where(p => p.Id == page.Id).FirstOrDefaultAsync(); if (poem == null) { continue; } page.PoemId = poem.Id; } if (poetId != null && pageType == GanjoorPageType.None) { GanjoorCat cat = await context.GanjoorCategories.Where(c => c.PoetId == poetId && c.ParentId == null && c.UrlSlug == page.UrlSlug).SingleOrDefaultAsync(); if (cat != null) { page.GanjoorPageType = GanjoorPageType.PoetPage; page.CatId = cat.Id; } else { cat = await context.GanjoorCategories.Where(c => c.PoetId == poetId && c.ParentId != null && c.UrlSlug == page.UrlSlug).SingleOrDefaultAsync(); if (cat != null) { page.GanjoorPageType = GanjoorPageType.CatPage; page.CatId = cat.Id; } } } context.GanjoorPages.Add(page); } } } } job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 1 - finalizing")).Result; await context.SaveChangesAsync(); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 2 - pre fetch data")).Result; var orphanPages = await context.GanjoorPages.Include(p => p.Poem).Where(p => p.FullUrl == null).ToListAsync(); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 2 - post fetch data")).Result; double count = orphanPages.Count; int i = 0; foreach (var page in orphanPages) { job = (await jobProgressServiceEF.UpdateJob(job.Id, i++, "phase 2")).Result; string fullUrl = page.UrlSlug; string fullTitle = page.Title; if (page.GanjoorPageType == GanjoorPageType.PoemPage) { fullTitle = page.Poem.FullTitle; fullUrl = page.Poem.FullUrl; } else { if (page.ParentId != null) { GanjoorPage parent = await context.GanjoorPages.Where(p => p.Id == page.ParentId).SingleAsync(); while (parent != null) { fullUrl = parent.UrlSlug + "/" + fullUrl; fullTitle = parent.Title + " » " + fullTitle; parent = parent.ParentId == null ? null : await context.GanjoorPages.Where(p => p.Id == parent.ParentId).SingleAsync(); } } else { GanjoorCat cat = await context.GanjoorCategories.Where(c => c.PoetId == page.PoetId && c.UrlSlug == page.UrlSlug).SingleOrDefaultAsync(); if (cat != null) { fullUrl = cat.FullUrl; while (cat.ParentId != null) { cat = await context.GanjoorCategories.Where(c => c.Id == cat.ParentId).SingleOrDefaultAsync(); if (cat != null) { fullTitle = cat.Title + " » " + fullTitle; } } } else { cat = await context.GanjoorCategories.Where(c => c.PoetId == page.PoetId && c.ParentId == null).SingleOrDefaultAsync(); if (cat != null) { fullUrl = $"{cat.UrlSlug}/{page.UrlSlug}"; } } } } if (!string.IsNullOrEmpty(fullUrl) && fullUrl.IndexOf('/') != 0) { fullUrl = $"/{fullUrl}"; } page.FullUrl = fullUrl; page.FullTitle = fullTitle; context.Update(page); } job = (await jobProgressServiceEF.UpdateJob(job.Id, job.Progress, "phase 2 - finalizing")).Result; await context.SaveChangesAsync(); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 3 - pre mysql data fetch")).Result; using (MySqlConnection connection = new MySqlConnection ( $"server={Configuration.GetSection("AudioMySqlServer")["Server"]};uid={Configuration.GetSection("AudioMySqlServer")["Username"]};pwd={Configuration.GetSection("AudioMySqlServer")["Password"]};database={Configuration.GetSection("AudioMySqlServer")["Database"]};charset=utf8;convert zero datetime=True" )) { connection.Open(); using (MySqlDataAdapter src = new MySqlDataAdapter( "SELECT meta_key, post_id, meta_value FROM ganja_postmeta WHERE meta_key IN ( 'vazn', 'ravi', 'src', 'srcslug', 'oldtag' )", connection)) { job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 3 - mysql 2")).Result; using (DataTable srcData = new DataTable()) { await src.FillAsync(srcData); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 3 - processing meta data")).Result; int r = 0; foreach (DataRow row in srcData.Rows) { job = (await jobProgressServiceEF.UpdateJob(job.Id, r++, "phase 3 - processing meta data")).Result; int poemId = int.Parse(row["post_id"].ToString()); var poem = await context.GanjoorPoems.Where(p => p.Id == poemId).FirstOrDefaultAsync(); if (poem == null) { continue; } string metaKey = row["meta_key"].ToString(); string metaValue = row["meta_value"].ToString(); switch (metaKey) { case "vazn": { GanjoorMetre metre = await context.GanjoorMetres.Where(m => m.Rhythm == metaValue).SingleOrDefaultAsync(); if (metre == null) { metre = new GanjoorMetre() { Rhythm = metaValue, VerseCount = 0 }; context.GanjoorMetres.Add(metre); await context.SaveChangesAsync(); } poem.GanjoorMetreId = metre.Id; } break; case "ravi": poem.RhymeLetters = metaValue; break; case "src": poem.SourceName = metaValue; break; case "srcslug": poem.SourceUrlSlug = metaValue; break; case "oldtag": poem.OldTag = metaValue; switch (poem.OldTag) { case "بدایع": poem.OldTagPageUrl = "/saadi/badaye"; break; case "خواتیم": poem.OldTagPageUrl = "/saadi/khavatim"; break; case "طیبات": poem.OldTagPageUrl = "/saadi/tayyebat"; break; case "غزلیات قدیم": poem.OldTagPageUrl = "/saadi/ghazaliyat-e-ghadim"; break; case "ملمعات": poem.OldTagPageUrl = "/saadi/molammaat"; break; } break; } context.GanjoorPoems.Update(poem); } } } } job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase 3 - finalizing meta data")).Result; await context.SaveChangesAsync(); var resApprovedPoemSongs = await _ImportPoemSongsDataFromMySql("_ImportPoemSongsDataFromMySql", context, jobProgressServiceEF, job, true); if (!resApprovedPoemSongs.Result) { return; } var resPendingPoemSongs = await _ImportPoemSongsDataFromMySql("_ImportPoemSongsDataFromMySql", context, jobProgressServiceEF, job, false); if (!resPendingPoemSongs.Result) { return; } using (MySqlConnection connection = new MySqlConnection ( $"server={Configuration.GetSection("AudioMySqlServer")["Server"]};uid={Configuration.GetSection("AudioMySqlServer")["Username"]};pwd={Configuration.GetSection("AudioMySqlServer")["Password"]};database={Configuration.GetSection("AudioMySqlServer")["Database"]};charset=utf8;convert zero datetime=True" )) { connection.Open(); using (MySqlDataAdapter src = new MySqlDataAdapter( "SELECT poem_id, mimage_id FROM ganja_mimages", connection)) { job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase N - mysql N")).Result; using (DataTable srcData = new DataTable()) { await src.FillAsync(srcData); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase N - processing meta data")).Result; int r = 0; foreach (DataRow row in srcData.Rows) { job = (await jobProgressServiceEF.UpdateJob(job.Id, r++, "phase N - processing meta data")).Result; int poemId = int.Parse(row["poem_id"].ToString()); Guid imageId = Guid.Parse(row["mimage_id"].ToString()); var link = await context.GanjoorLinks.Include(l => l.Item).ThenInclude(i => i.Images). Where(l => l.GanjoorPostId == poemId && l.Item.Images.First().Id == imageId) .FirstOrDefaultAsync(); if (link != null) { link.DisplayOnPage = true; context.GanjoorLinks.Update(link); } } } } } job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, "phase N - finalizing meta data")).Result; await context.SaveChangesAsync(); } catch (Exception jobExp) { await jobProgressServiceEF.UpdateJob(job.Id, job.Progress, "", false, jobExp.ToString()); } var resComments = await _ImportCommentsDataFromMySql("_ImportCommentsDataFromMySql", context, jobProgressServiceEF, job); if (!resComments.Result) { return; } await jobProgressServiceEF.UpdateJob(job.Id, 100, "Finished", true); } }); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// remove user data /// </summary> /// <param name="userId"></param> /// <returns></returns> public override async Task <RServiceResult <bool> > RemoveUserData(Guid userId) { RMuseumDbContext context = _context as RMuseumDbContext; string systemEmail = $"{Configuration.GetSection("Ganjoor")["SystemEmail"]}"; var systemUserId = (Guid)(await FindUserByEmail(systemEmail)).Result.Id; if (systemUserId == userId) { return(new RServiceResult <bool>(false, "تلاش برای حذف کاربر سیستمی")); } string deletedUserEmail = $"{Configuration.GetSection("Ganjoor")["DeleteUserEmail"]}"; var deletedUserId = (Guid)(await FindUserByEmail(deletedUserEmail)).Result.Id; if (deletedUserId == userId) { return(new RServiceResult <bool>(false, "تلاش برای حذف کاربر سیستمی کاربر حذف شده")); } var reviewedRecitations = await context.Recitations.Where(r => r.ReviewerId == userId).ToListAsync(); foreach (var reviewedRecitation in reviewedRecitations) { reviewedRecitation.ReviewerId = deletedUserId; } context.UpdateRange(reviewedRecitations); await context.SaveChangesAsync(); //some tracking data related bugs makes it necessary to call this especially for same table data processing var suggestedCorrections = await context.GanjoorPoemCorrections.Where(c => c.UserId == userId).ToListAsync(); foreach (var suggestedCorrection in suggestedCorrections) { suggestedCorrection.UserId = deletedUserId; } context.UpdateRange(suggestedCorrections); await context.SaveChangesAsync(); var reviewedCorrections = await context.GanjoorPoemCorrections.Where(c => c.ReviewerUserId == userId).ToListAsync(); foreach (var reviewedCorrection in reviewedCorrections) { reviewedCorrection.ReviewerUserId = deletedUserId; } context.UpdateRange(reviewedCorrections); await context.SaveChangesAsync(); var reportedComments = await context.GanjoorReportedComments.Where(r => r.ReportedById == userId).ToListAsync(); foreach (var reportedComment in reportedComments) { reportedComment.ReportedById = deletedUserId; } context.UpdateRange(reportedComments); await context.SaveChangesAsync(); var ganjoorLinks = await context.GanjoorLinks.Where(l => l.SuggestedById == userId).ToListAsync(); foreach (var ganjoorLink in ganjoorLinks) { ganjoorLink.SuggestedById = deletedUserId; } context.UpdateRange(ganjoorLinks); await context.SaveChangesAsync(); var reviewedGanjoorLinks = await context.GanjoorLinks.Where(l => l.ReviewerId == userId).ToListAsync(); foreach (var reviewedGanjoorLink in reviewedGanjoorLinks) { reviewedGanjoorLink.ReviewerId = deletedUserId; } context.UpdateRange(reviewedGanjoorLinks); await context.SaveChangesAsync(); var pinLinks = await context.PinterestLinks.Where(l => l.SuggestedById == userId).ToListAsync(); foreach (var pinLink in pinLinks) { pinLink.SuggestedById = deletedUserId; } context.UpdateRange(pinLinks); await context.SaveChangesAsync(); var reviewedPinLinks = await context.PinterestLinks.Where(l => l.ReviewerId == userId).ToListAsync(); foreach (var reviewedPinLink in reviewedPinLinks) { reviewedPinLink.ReviewerId = deletedUserId; } context.UpdateRange(reviewedPinLinks); await context.SaveChangesAsync(); var poemMusicTracks = await context.GanjoorPoemMusicTracks.Where(m => m.SuggestedById == userId).ToListAsync(); foreach (var poemMusicTrack in poemMusicTracks) { poemMusicTrack.SuggestedById = deletedUserId; } context.UpdateRange(poemMusicTracks); await context.SaveChangesAsync(); var snapshots = await context.GanjoorPageSnapshots.Where(s => s.MadeObsoleteByUserId == userId).ToListAsync(); foreach (var snapshot in snapshots) { snapshot.MadeObsoleteByUserId = deletedUserId; } context.UpdateRange(snapshots); await context.SaveChangesAsync(); var translations = await context.GanjoorPoemTranslations.Where(t => t.UserId == userId).ToListAsync(); foreach (var translation in translations) { translation.UserId = deletedUserId; } context.UpdateRange(translations); await context.SaveChangesAsync(); var suggestedPoetNotes = await context.GanjoorPoetSuggestedSpecLines.Where(s => s.SuggestedById == userId).ToListAsync(); foreach (var suggestedPoetNote in suggestedPoetNotes) { suggestedPoetNote.SuggestedById = deletedUserId; } context.UpdateRange(suggestedPoetNotes); await context.SaveChangesAsync(); var suggestedPoetPhotos = await context.GanjoorPoetSuggestedPictures.Where(s => s.SuggestedById == userId).ToListAsync(); foreach (var suggestedPoetPhoto in suggestedPoetPhotos) { suggestedPoetPhoto.SuggestedById = deletedUserId; } context.UpdateRange(suggestedPoetPhotos); await context.SaveChangesAsync(); var visits = await context.GanjoorUserPoemVisits.Where(v => v.UserId == userId).ToListAsync(); context.RemoveRange(visits); await context.SaveChangesAsync(); var bookmarks = await context.UserBookmarks.Where(b => b.RAppUserId == userId).ToListAsync(); context.RemoveRange(bookmarks); await context.SaveChangesAsync(); var uploadSessions = await context.UploadSessions.Where(s => s.UseId == userId).ToListAsync(); context.RemoveRange(uploadSessions); await context.SaveChangesAsync(); var recitations = await context.Recitations.Where(r => r.OwnerId == userId).ToListAsync(); context.RemoveRange(recitations); await context.SaveChangesAsync(); var ganjoorBookmarks = await context.GanjoorUserBookmarks.Where(b => b.UserId == userId).ToListAsync(); context.RemoveRange(ganjoorBookmarks); await context.SaveChangesAsync(); var reportedRecitaions = await context.RecitationErrorReports.Where(r => r.ReporterId == userId).ToListAsync(); context.RemoveRange(reportedRecitaions); await context.SaveChangesAsync(); var comments = await context.GanjoorComments.Where(c => c.UserId == userId).ToListAsync(); foreach (var comment in comments) { //await _ganjoorService.DeleteMyComment(userId, comment.Id);/*had error in service initializtion, so done it in the dirty way*/ await _DeleteComment(context, comment.Id); } var recitationsVotes = await context.RecitationUserUpVotes.Where(c => c.UserId == userId).ToListAsync(); foreach (var vote in recitationsVotes) { int poemId = await context.Recitations.AsNoTracking().Where(r => r.Id == vote.RecitationId).Select(r => r.GanjoorPostId).SingleAsync(); context.Remove(vote); await context.SaveChangesAsync(); await _ReOrderPoemRecitationsAsync(context, poemId); } return(await base.RemoveUserData(userId));//notifications are deleted here, some of these operations might produce new notifications }
/// <summary> /// import from https://viewer.cbl.ie /// </summary> /// <param name="resourceNumber">119</param> /// <param name="friendlyUrl">golestan-baysonghori</param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromChesterBeatty(string resourceNumber, string friendlyUrl) { try { string srcUrl = $"https://viewer.cbl.ie/viewer/object/Per_{resourceNumber}/1/"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.ChesterBeatty && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {srcUrl}")); } if (string.IsNullOrEmpty(friendlyUrl)) { return(new RServiceResult <bool>(false, $"Friendly url is empty, server folder {srcUrl}")); } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.ChesterBeatty, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = srcUrl, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from url {job.ResourceNumber}", $"extracted from url {job.ResourceNumber}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl, }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Chester Beatty Digital Collections", 1); tag.ValueSupplement = srcUrl; meta.Add(tag); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = ""; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; using (var client = new HttpClient()) do { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order} of {book.NameInEnglish}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; string imageUrl = $"https://viewer.cbl.ie/viewer/rest/image/Per_{resourceNumber}/Per{resourceNumber}_{$"{order}".PadLeft(3, '0')}.jpg/full/!10000,10000/0/default.jpg?ignoreWatermark=true"; page.Tags = new RTagValue[] { }; bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } tag = await TagHandler.PrepareAttribute(context, "Source", "Chester Beatty Digital Collections", 1); tag.ValueSupplement = $"https://viewer.cbl.ie/viewer/object/Per_{resourceNumber}/{$"{order}".PadLeft(3, '0')}/";; page.Tags = new RTagValue[] { tag }; pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); if (imageResult.StatusCode == HttpStatusCode.Forbidden || imageResult.StatusCode == HttpStatusCode.NotFound) { break; } int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } tag = await TagHandler.PrepareAttribute(context, "Source", "Chester Beatty Digital Collections", 1); tag.ValueSupplement = $"https://viewer.cbl.ie/viewer/object/Per_{resourceNumber}/{$"{order}".PadLeft(3, '0')}/"; page.Tags = new RTagValue[] { tag }; pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } pages.Add(page); }while (true); book.Tags = meta.ToArray(); book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from https://catalog.hathitrust.org /// </summary> /// <param name="resourceNumber">006814127</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromHathiTrust(string resourceNumber, string friendlyUrl) { string url = $"https://catalog.hathitrust.org/Record/{resourceNumber}.xml"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.HathiTrust && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated artifact friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.HathiTrust, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { if ( (await context.Artifacts.Where(a => a.FriendlyUrl == job.FriendlyUrl).SingleOrDefaultAsync()) != null ) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "aborted because of duplicated friendly url"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string xml = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = xml; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string title = ""; string author = ""; string pdfResourceNumber = ""; int tagOrder = 1; XElement elObject = XDocument.Parse(xml).Root; foreach (var datafield in elObject.Element("record").Elements("datafield")) { tagOrder++; if (datafield.Attribute("tag") == null) { continue; } string hathiTrustTag = datafield.Attribute("tag").Value; switch (hathiTrustTag) { case "245": case "246": foreach (var subfield in datafield.Elements("subfield")) { if (subfield.Attribute("code") != null) { if (subfield.Attribute("code").Value == "a" || subfield.Attribute("code").Value == "f") { title = (title + " " + subfield.Value).Trim(); } } } break; case "100": foreach (var subfield in datafield.Elements("subfield")) { if (subfield.Attribute("code") != null) { if (subfield.Attribute("code").Value == "a" || subfield.Attribute("code").Value == "d") { author = (author + " " + subfield.Value).Trim(); } } } break; case "HOL": foreach (var subfield in datafield.Elements("subfield")) { if (subfield.Attribute("code") != null) { if (subfield.Attribute("code").Value == "p") { pdfResourceNumber = subfield.Value; } } } break; default: { if (int.TryParse(hathiTrustTag, out int tmp)) { if (tmp >= 100 && tmp <= 900) { string note = ""; foreach (var subfield in datafield.Elements("subfield")) { if (subfield.Attribute("code") != null) { note = (note + " " + subfield.Value).Trim(); } } tag = await TagHandler.PrepareAttribute(context, "Notes", note, tagOrder); meta.Add(tag); } } } break; } } if (string.IsNullOrEmpty(pdfResourceNumber)) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "pdfResourceNumber not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } tag = await TagHandler.PrepareAttribute(context, "Title", title, 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Contributor Names", author, 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "HathiTrust Digital Library", 1); string viewerUrl = $"https://babel.hathitrust.org/cgi/pt?id={pdfResourceNumber}"; tag.ValueSupplement = viewerUrl; meta.Add(tag); book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = title; book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); string lastMD5hash = ""; int order = 0; while (true) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string imageUrl = $"https://babel.hathitrust.org/cgi/imgsrv/image?id={pdfResourceNumber};seq={order};size=1000;rotation=0"; RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; tag = await TagHandler.PrepareAttribute(context, "Source", "HathiTrust Digital Library", 1); tag.ValueSupplement = viewerUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 200; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } bool lastPage = false; using (var md5 = MD5.Create()) { string md5hash = string.Join("", md5.ComputeHash(File.ReadAllBytes(Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"))).Select(x => x.ToString("X2"))); if (md5hash == lastMD5hash) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); lastPage = true; } lastMD5hash = md5hash; } if (!lastPage) { page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } else { break; } } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "ages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from http://www.thedigitalwalters.org/01_ACCESS_WALTERS_MANUSCRIPTS.html /// </summary> /// <param name="resourceNumber">W619</param> /// <param name="friendlyUrl">golestan-walters-01</param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromWalters(string resourceNumber, string friendlyUrl) { string url = $"http://www.thedigitalwalters.org/Data/WaltersManuscripts/ManuscriptDescriptions/{resourceNumber}_tei.xml"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Walters && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Walters, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string xml = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = xml; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } XElement elObject = XDocument.Parse(xml).Root; tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); try { foreach (var prop in elObject .Elements("{http://www.tei-c.org/ns/1.0}teiHeader").First() .Elements("{http://www.tei-c.org/ns/1.0}fileDesc").First() .Elements("{http://www.tei-c.org/ns/1.0}titleStmt").First() .Elements("{http://www.tei-c.org/ns/1.0}title")) { string label = prop.Value; book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = label; tag = await TagHandler.PrepareAttribute(context, "Title", label, 1); meta.Add(tag); break; } } catch { //ignore non-existing = null tags } try { foreach (var prop in elObject .Elements("{http://www.tei-c.org/ns/1.0}teiHeader").First() .Elements("{http://www.tei-c.org/ns/1.0}fileDesc").First() .Elements("{http://www.tei-c.org/ns/1.0}titleStmt").First() .Elements("{http://www.tei-c.org/ns/1.0}author")) { string label = prop.Value; tag = await TagHandler.PrepareAttribute(context, "Contributor Names", label, 1); meta.Add(tag); break; } } catch { //ignore non-existing = null tags } try { foreach (var prop in elObject .Elements("{http://www.tei-c.org/ns/1.0}teiHeader").First() .Elements("{http://www.tei-c.org/ns/1.0}fileDesc").First() .Elements("{http://www.tei-c.org/ns/1.0}titleStmt").First() .Elements("{http://www.tei-c.org/ns/1.0}respStmt")) { string label = prop.Elements("{http://www.tei-c.org/ns/1.0}name").First().Value; tag = await TagHandler.PrepareAttribute(context, "Contributor Names", label, 1); meta.Add(tag); break; } } catch { //ignore non-existing = null tags } try { foreach (var prop in elObject .Elements("{http://www.tei-c.org/ns/1.0}teiHeader").First() .Elements("{http://www.tei-c.org/ns/1.0}fileDesc").First() .Elements("{http://www.tei-c.org/ns/1.0}notesStmt").First() .Elements("{http://www.tei-c.org/ns/1.0}note")) { string label = prop.Value; tag = await TagHandler.PrepareAttribute(context, "Notes", label, 1); meta.Add(tag); } } catch { //ignore non-existing = null tags } tag = await TagHandler.PrepareAttribute(context, "Source", "Digitized Walters Manuscripts", 1); tag.ValueSupplement = $"http://www.thedigitalwalters.org/Data/WaltersManuscripts/html/{job.ResourceNumber}/"; meta.Add(tag); book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; foreach (var surface in elObject .Elements("{http://www.tei-c.org/ns/1.0}facsimile").First() .Elements("{http://www.tei-c.org/ns/1.0}surface")) { foreach (var graphic in surface.Elements("{http://www.tei-c.org/ns/1.0}graphic")) { if (graphic.Attribute("url").Value.Contains("sap.jpg")) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; string imageUrl = $"http://www.thedigitalwalters.org/Data/WaltersManuscripts/{resourceNumber}/data/W.{resourceNumber.Substring(1)}/{graphic.Attribute("url").Value}"; tag = await TagHandler.PrepareAttribute(context, "Source", "Digitized Walters Manuscripts", 1); tag.ValueSupplement = imageUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// examine site pages for broken links /// </summary> /// <returns></returns> public RServiceResult <bool> HealthCheckContents() { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) //this is long running job, so _context might be already been freed/collected by GC { LongRunningJobProgressServiceEF jobProgressServiceEF = new LongRunningJobProgressServiceEF(context); var job = (await jobProgressServiceEF.NewJob("HealthCheckContents", "Query data")).Result; try { var pages = await context.GanjoorPages.ToArrayAsync(); await jobProgressServiceEF.UpdateJob(job.Id, 0, $"Examining Pages"); var previousErrors = await context.GanjoorHealthCheckErrors.ToArrayAsync(); context.RemoveRange(previousErrors); await context.SaveChangesAsync(); int percent = 0; for (int i = 0; i < pages.Length; i++) { if (i * 100 / pages.Length > percent) { percent++; await jobProgressServiceEF.UpdateJob(job.Id, percent); } var hrefs = pages[i].HtmlText.Split(new[] { "href=\"" }, StringSplitOptions.RemoveEmptyEntries).Where(o => o.StartsWith("http")).Select(o => o.Substring(0, o.IndexOf("\""))); foreach (string url in hrefs) { if (url == "https://ganjoor.net" || url == "https://ganjoor.net/" || url.IndexOf("https://ganjoor.net/vazn/?") == 0 || url.IndexOf("https://ganjoor.net/simi/?v") == 0) { continue; } if (url.IndexOf("http://ganjoor.net") == 0) { context.GanjoorHealthCheckErrors.Add ( new GanjoorHealthCheckError() { ReferrerPageUrl = pages[i].FullUrl, TargetUrl = url, BrokenLink = false, MulipleTargets = false } ); await context.SaveChangesAsync(); } else if (url.IndexOf("https://ganjoor.net") == 0) { var testUrl = url.Substring("https://ganjoor.net".Length); if (testUrl[testUrl.Length - 1] == '/') { testUrl = testUrl.Substring(0, testUrl.Length - 1); } var pageCount = await context.GanjoorPages.Where(p => p.FullUrl == testUrl).CountAsync(); if (pageCount != 1) { context.GanjoorHealthCheckErrors.Add ( new GanjoorHealthCheckError() { ReferrerPageUrl = pages[i].FullUrl, TargetUrl = url, BrokenLink = pageCount == 0, MulipleTargets = pageCount != 0 } ); await context.SaveChangesAsync(); } } } } await jobProgressServiceEF.UpdateJob(job.Id, 100, "", true); } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, 100, "", false, exp.ToString()); } } } ); return(new RServiceResult <bool>(true)); }
/// <summary> /// from http://cudl.lib.cam.ac.uk /// </summary> /// <param name="resourceNumber">MS-RAS-00258</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromCambridge(string resourceNumber, string friendlyUrl) { string url = $"http://cudl.lib.cam.ac.uk/view/{resourceNumber}.json"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Cambridge && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated artifact friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Cambridge, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { if ( (await context.Artifacts.Where(a => a.FriendlyUrl == job.FriendlyUrl).SingleOrDefaultAsync()) != null ) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "aborted because of duplicated friendly url"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string json = await result.Content.ReadAsStringAsync(); var parsed = JObject.Parse(json); book.Name = book.NameInEnglish = parsed.SelectToken("logicalStructures[*].label").Value <string>(); book.Description = book.DescriptionInEnglish = Regex.Replace( parsed.SelectToken("descriptiveMetadata[*].abstract.displayForm").Value <string>(), "<.*?>", string.Empty); int tagOrder = 1; foreach (JToken descriptiveMetadata in parsed.SelectTokens("$.descriptiveMetadata[*]").Children()) { foreach (JToken child in descriptiveMetadata.Children()) { if (child.SelectToken("label") != null && child.SelectToken("display") != null) { if (child.SelectToken("display").Value <string>() == "True") { string metaName = child.SelectToken("label").Value <string>(); string metaValue = ""; if (child.SelectToken("displayForm") != null) { metaValue = Regex.Replace( child.SelectToken("displayForm").Value <string>(), "<.*?>", string.Empty); tag = await TagHandler.PrepareAttribute(context, metaName, metaValue, tagOrder++); meta.Add(tag); } else if (child.SelectToken("value") != null) { foreach (JToken value in child.SelectTokens("value").Children()) { if (value.SelectToken("displayForm") != null) { metaValue = Regex.Replace( value.SelectToken("displayForm").Value <string>(), "<.*?>", string.Empty); tag = await TagHandler.PrepareAttribute(context, metaName, metaValue, tagOrder++); meta.Add(tag); } } } } } } } string imageReproPageURL = "https://image01.cudl.lib.cam.ac.uk"; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = json; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "University of Cambridge Digital Library", 1); string viewerUrl = $"http://cudl.lib.cam.ac.uk/view/{resourceNumber}"; tag.ValueSupplement = viewerUrl; meta.Add(tag); book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; foreach (JToken pageToken in parsed.SelectTokens("$.pages").Children()) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string imageUrl = imageReproPageURL + pageToken.SelectToken("downloadImageURL").Value <string>(); RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; List <RTagValue> pageMata = new List <RTagValue>(); tag = await TagHandler.PrepareAttribute(context, "Source", "University of Cambridge Digital Library", 1); tag.ValueSupplement = $"{viewerUrl}/{order}"; pageMata.Add(tag); if (pageToken.SelectToken("label") != null) { tag = await TagHandler.PrepareAttribute(context, "Label", pageToken.SelectToken("label").Value <string>(), 1); pageMata.Add(tag); } page.Tags = pageMata.ToArray(); if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); if (imageResult.StatusCode == HttpStatusCode.NotFound) { break; //finished } int _ImportRetryCount = 200; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
private async Task <RServiceResult <List <RArtifactItemRecord> > > _InternalHarvardJsonImport(string hardvardResourceNumber, ImportJob job, string friendlyUrl, RMuseumDbContext context, RArtifactMasterRecord book, List <RTagValue> meta) { List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); using (var client = new HttpClient()) { using (var jsonResult = await client.GetAsync($"https://iiif.lib.harvard.edu/manifests/drs:{hardvardResourceNumber}")) { if (jsonResult.IsSuccessStatusCode) { string json = await jsonResult.Content.ReadAsStringAsync(); var parsed = JObject.Parse(json); book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = parsed.SelectToken("label").Value <string>(); RTagValue tag; tag = await TagHandler.PrepareAttribute(context, "Title", book.Name, 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Contributor Names", "تعیین نشده", 1); meta.Add(tag); List <string> labels = new List <string>(); foreach (JToken structure in parsed.SelectTokens("$.structures[*].label")) { labels.Add(structure.Value <string>()); } int order = 0; var canvases = parsed.SelectToken("sequences").First().SelectToken("canvases").ToArray(); int pageCount = canvases.Length; foreach (JToken canvas in canvases) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = order * 100 / (decimal)pageCount; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string label = canvas.SelectToken("label").Value <string>(); if (labels.Where(l => l.IndexOf(label) != -1).SingleOrDefault() != null) { label = labels.Where(l => l.IndexOf(label) != -1).SingleOrDefault(); } string imageUrl = canvas.SelectTokens("images[*]").First().SelectToken("resource").SelectToken("@id").Value <string>(); RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = label, Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; tag = await TagHandler.PrepareAttribute(context, "Source", "Harvard University Islamic Heritage Project", 1); tag.ValueSupplement = imageUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return(new RServiceResult <List <RArtifactItemRecord> >(null, "failed")); } imageResult.Dispose(); GC.Collect(); } } } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({jsonResult.StatusCode}) for https://iiif.lib.harvard.edu/manifests/drs:{hardvardResourceNumber}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return(new RServiceResult <List <RArtifactItemRecord> >(null, "failed")); } } } return(new RServiceResult <List <RArtifactItemRecord> >(pages)); }
/// <summary> /// import catalogue from ganjoor.net MySql db /// </summary> /// <param name="jobName"></param> /// <param name="jobProgressServiceEF"></param> /// <param name="job"></param> /// <returns></returns> public async Task <RServiceResult <bool> > ImportFromMySql(string jobName, LongRunningJobProgressServiceEF jobProgressServiceEF, RLongRunningJobStatus job) { try { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) //this is long running job, so _context might be already been freed/collected by GC { using (MySqlConnection connection = new MySqlConnection ( $"server={Configuration.GetSection("AudioMySqlServer")["Server"]};uid={Configuration.GetSection("AudioMySqlServer")["SongsUsername"]};pwd={Configuration.GetSection("AudioMySqlServer")["SongsPassword"]};database={Configuration.GetSection("AudioMySqlServer")["SongsDatabase"]};charset=utf8;convert zero datetime=True" )) { job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - import golha data - pre open connection")).Result; connection.Open(); using (MySqlDataAdapter src = new MySqlDataAdapter( "SELECT col_id, name FROM golha_collections ORDER BY col_id", connection)) { using (DataTable data = new DataTable()) { await src.FillAsync(data); foreach (DataRow row in data.Rows) { GolhaCollection collection = new GolhaCollection() { Id = int.Parse(row["col_id"].ToString()), Name = row["name"].ToString(), Programs = new List <GolhaProgram>() }; job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - import golha data - golha_collections: {collection.Id}")).Result; using (MySqlDataAdapter srcPrograms = new MySqlDataAdapter( $"SELECT program_id, title, progarm_order, url, mp3 FROM golha_programs WHERE col_id = {collection.Id} ORDER BY program_id", connection)) { using (DataTable programData = new DataTable()) { await srcPrograms.FillAsync(programData); foreach (DataRow golhaProgram in programData.Rows) { GolhaProgram program = new GolhaProgram() { Id = int.Parse(golhaProgram["program_id"].ToString()), Title = golhaProgram["title"].ToString(), ProgramOrder = int.Parse(golhaProgram["progarm_order"].ToString()), Url = golhaProgram["url"].ToString(), Mp3 = golhaProgram["mp3"].ToString(), Tracks = new List <GolhaTrack>() }; using (MySqlDataAdapter srcTracks = new MySqlDataAdapter( $"SELECT track_id, track_no, timing, title FROM golha_tracks WHERE program_id = {program.Id} ORDER BY track_no", connection)) { using (DataTable trackData = new DataTable()) { await srcTracks.FillAsync(trackData); foreach (DataRow golhaTrack in trackData.Rows) { program.Tracks.Add ( new GolhaTrack() { Id = int.Parse(golhaTrack["track_id"].ToString()), TrackNo = int.Parse(golhaTrack["track_no"].ToString()), Timing = golhaTrack["timing"].ToString(), Title = golhaTrack["title"].ToString(), Blocked = false, } ); } } } collection.Programs.Add(program); } } } context.GolhaCollections.Add(collection); await context.SaveChangesAsync(); } } } job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - import singers data")).Result; using (MySqlDataAdapter src = new MySqlDataAdapter( "SELECT artist_id, artist_name, artist_beeptunesurl FROM ganja_artists ORDER BY artist_id", connection)) { using (DataTable data = new DataTable()) { await src.FillAsync(data); foreach (DataRow row in data.Rows) { int artistId = int.Parse(row["artist_id"].ToString()); GanjoorSinger singer = new GanjoorSinger() { Name = row["artist_name"].ToString(), Url = row["artist_beeptunesurl"].ToString(), Albums = new List <GanjoorAlbum>() }; using (MySqlDataAdapter srcAlbums = new MySqlDataAdapter( $"SELECT album_id, album_name, album_beeptunesurl FROM ganja_albums WHERE album_artistid = {artistId} ORDER BY album_id", connection)) { using (DataTable dataAlbums = new DataTable()) { await srcAlbums.FillAsync(dataAlbums); foreach (DataRow rowAlbum in dataAlbums.Rows) { int albumId = int.Parse(rowAlbum["album_id"].ToString()); GanjoorAlbum album = new GanjoorAlbum() { Name = rowAlbum["album_name"].ToString(), Url = rowAlbum["album_beeptunesurl"].ToString(), Tracks = new List <GanjoorTrack>() }; using (MySqlDataAdapter srcTracks = new MySqlDataAdapter( $"SELECT track_name, track_beeptunesurl FROM ganja_tracks WHERE album_id = {albumId} ORDER BY track_id", connection)) { using (DataTable dataTracks = new DataTable()) { await srcTracks.FillAsync(dataTracks); foreach (DataRow rowTrack in dataTracks.Rows) { album.Tracks.Add ( new GanjoorTrack() { Name = rowTrack["track_name"].ToString(), Url = rowTrack["track_beeptunesurl"].ToString(), Blocked = false } ); } } } singer.Albums.Add(album); } } } context.GanjoorSingers.Add(singer); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - import singers data - {singer.Name}")).Result; } } } job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - finalizing singers data")).Result; await context.SaveChangesAsync(); } } return(new RServiceResult <bool>(true)); } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, job.Progress, "", false, exp.ToString()); return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from https://curiosity.lib.harvard.edu /// </summary> /// <param name="url">example: https://curiosity.lib.harvard.edu/islamic-heritage-project/catalog/40-990114893240203941</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromHarvard(string url, string friendlyUrl) { try { if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Harvard && j.ResourceNumber == url && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { return(new RServiceResult <bool>(false, $"Friendly url is empty, url = {url}")); } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Harvard, ResourceNumber = url, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string html = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = html; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } int nStartIndex = html.IndexOf("<dt"); while (nStartIndex != -1) { nStartIndex = html.IndexOf(">", nStartIndex); if (nStartIndex == -1) { break; } nStartIndex++; string tagName = html.Substring(nStartIndex, html.IndexOf(":", nStartIndex) - nStartIndex); nStartIndex = html.IndexOf("<dd", nStartIndex); if (nStartIndex == -1) { break; } nStartIndex = html.IndexOf(">", nStartIndex); if (nStartIndex == -1) { break; } nStartIndex++; string tagValues = html.Substring(nStartIndex, html.IndexOf("</dd>", nStartIndex) - nStartIndex); foreach (string tagValuePart in tagValues.Split("<br/>", StringSplitOptions.RemoveEmptyEntries)) { string tagValue = tagValuePart; bool href = false; if (tagValue.IndexOf("<a href=") != -1) { href = true; tagValue = tagValue.Substring(tagValue.IndexOf('>') + 1); tagValue = tagValue.Substring(0, tagValue.IndexOf('<')); } tag = await TagHandler.PrepareAttribute(context, tagName, tagValue, 1); if (href) { tag.ValueSupplement = tagValue; } meta.Add(tag); } nStartIndex = html.IndexOf("<dt", nStartIndex + 1); } tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Harvard University Islamic Heritage Project", 1); tag.ValueSupplement = $"{job.SrcUrl}"; meta.Add(tag); nStartIndex = html.IndexOf("https://pds.lib.harvard.edu/pds/view/"); if (nStartIndex == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Not found https://pds.lib.harvard.edu/pds/view/"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nStartIndex += "https://pds.lib.harvard.edu/pds/view/".Length; string hardvardResourceNumber = html.Substring(nStartIndex, html.IndexOf('\"', nStartIndex) - nStartIndex); List <RArtifactItemRecord> pages = (await _InternalHarvardJsonImport(hardvardResourceNumber, job, friendlyUrl, context, book, meta)).Result; if (pages == null) { return; } book.Tags = meta.ToArray(); book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
private async Task <RServiceResult <bool> > _ImportPoemSongsDataFromMySql(string jobName, RMuseumDbContext context, LongRunningJobProgressServiceEF jobProgressServiceEF, RLongRunningJobStatus job, bool approved) { try { job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - pre mysql data fetch")).Result; string connectionString = approved ? $"server={Configuration.GetSection("AudioMySqlServer")["Server"]};uid={Configuration.GetSection("AudioMySqlServer")["Username"]};pwd={Configuration.GetSection("AudioMySqlServer")["Password"]};database={Configuration.GetSection("AudioMySqlServer")["Database"]};charset=utf8;convert zero datetime=True" : $"server={Configuration.GetSection("AudioMySqlServer")["Server"]};uid={Configuration.GetSection("AudioMySqlServer")["SongsUsername"]};pwd={Configuration.GetSection("AudioMySqlServer")["SongsPassword"]};database={Configuration.GetSection("AudioMySqlServer")["SongsDatabase"]};charset=utf8;convert zero datetime=True"; using (MySqlConnection connection = new MySqlConnection ( connectionString )) { connection.Open(); using (MySqlDataAdapter src = new MySqlDataAdapter( "SELECT poem_id, artist_name, artist_beeptunesurl, album_name, album_beeptunesurl, track_name, track_beeptunesurl, ptrack_typeid FROM ganja_ptracks ORDER BY id", connection)) { job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - mysql")).Result; using (DataTable data = new DataTable()) { await src.FillAsync(data); job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - processing approved poem songs")).Result; foreach (DataRow row in data.Rows) { PoemMusicTrack track = new PoemMusicTrack() { TrackType = (PoemMusicTrackType)int.Parse(row["ptrack_typeid"].ToString()), PoemId = int.Parse(row["poem_id"].ToString()), ArtistName = row["artist_name"].ToString(), ArtistUrl = row["artist_beeptunesurl"].ToString(), AlbumName = row["album_name"].ToString(), AlbumUrl = row["album_beeptunesurl"].ToString(), TrackName = row["track_name"].ToString(), TrackUrl = row["track_beeptunesurl"].ToString(), ApprovalDate = DateTime.Now, Description = "", Approved = approved }; var poem = await context.GanjoorPoems.Where(p => p.Id == track.PoemId).SingleOrDefaultAsync(); if (poem == null) { continue; } switch (track.TrackType) { case PoemMusicTrackType.BeepTunesOrKhosousi: case PoemMusicTrackType.iTunes: { GanjoorTrack catalogueTrack = await context.GanjoorMusicCatalogueTracks.Where(m => m.Url == track.TrackUrl).FirstOrDefaultAsync(); if (catalogueTrack != null) { track.GanjoorTrackId = catalogueTrack.Id; } GanjoorSinger singer = await context.GanjoorSingers.Where(s => s.Url == track.ArtistUrl).FirstOrDefaultAsync(); if (singer != null) { track.SingerId = singer.Id; } } break; case PoemMusicTrackType.Golha: { track.AlbumName = $"{track.ArtistName} » {track.AlbumName}"; track.ArtistName = ""; track.GolhaTrackId = int.Parse(track.ArtistUrl); track.ArtistUrl = ""; } break; } context.GanjoorPoemMusicTracks.Add(track); } job = (await jobProgressServiceEF.UpdateJob(job.Id, 0, $"{jobName} - finalizing approved poem songs data")).Result; await context.SaveChangesAsync(); } } } return(new RServiceResult <bool>(true)); } catch (Exception exp) { await jobProgressServiceEF.UpdateJob(job.Id, job.Progress, "", false, exp.ToString()); return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from http://www.bl.uk /// </summary> /// <param name="resourceNumber">grenville_xli_f001r</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromBritishLibrary(string resourceNumber, string friendlyUrl) { string url = $"http://www.bl.uk/manuscripts/Viewer.aspx?ref={resourceNumber}"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.BritishLibrary && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated artifact friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.BritishLibrary, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { client.Timeout = TimeSpan.FromMinutes(5); using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { if ( (await context.Artifacts.Where(a => a.FriendlyUrl == job.FriendlyUrl).SingleOrDefaultAsync()) != null ) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "aborted because of duplicated friendly url"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string html = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = html; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } int nIdxStart = html.IndexOf("PageList"); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "PageList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart = html.IndexOf("value=\"", nIdxStart); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "value after PageList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart += "value=\"".Length; string strPageList = html.Substring(nIdxStart, html.IndexOf('"', nIdxStart) - nIdxStart); nIdxStart = html.IndexOf("TextList"); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "TextList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart = html.IndexOf("value=\"", nIdxStart); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "value after TextList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart += "value=\"".Length; string strTextList = html.Substring(nIdxStart, html.IndexOf('"', nIdxStart) - nIdxStart); nIdxStart = html.IndexOf("TitleList"); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "TitleList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart = html.IndexOf("value=\"", nIdxStart); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "value after TitleList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart += "value=\"".Length; string strTitleList = html.Substring(nIdxStart, html.IndexOf('"', nIdxStart) - nIdxStart); string[] PageUrls = strPageList.Split("||", StringSplitOptions.None); string[] PageTexts = strTextList.Split("||", StringSplitOptions.None); string[] PageTitles = strTitleList.Split("||", StringSplitOptions.None); if (PageUrls.Length != PageTexts.Length || PageTexts.Length != PageTitles.Length) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "PageUrls.Length != PageTexts.Length || PageTexts.Length != PageTitles.Length"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } tag = await TagHandler.PrepareAttribute(context, "Title", "Untitled", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Contributor Names", "Unknown", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); book.Tags = meta.ToArray(); tag = await TagHandler.PrepareAttribute(context, "Source", "British Library", 1); string viewerUrl = $"http://www.bl.uk/manuscripts/FullDisplay.aspx?ref={resourceNumber.Substring(0, resourceNumber.LastIndexOf('_'))}"; tag.ValueSupplement = viewerUrl; meta.Add(tag); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; for (int i = 0; i < PageUrls.Length; i++) { if (PageUrls[i] == "##") { continue; } order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; List <RTagValue> pageTags = new List <RTagValue>(); tag = await TagHandler.PrepareAttribute(context, "Source", "British Library", 1); tag.ValueSupplement = $"http://www.bl.uk/manuscripts/Viewer.aspx?ref={PageUrls[i]}"; pageTags.Add(tag); if (!string.IsNullOrEmpty(PageTitles[i])) { RTagValue toc = await TagHandler.PrepareAttribute(context, "Title in TOC", PageTitles[i], 1); toc.ValueSupplement = "1"; //font size pageTags.Add(toc); } if (!string.IsNullOrEmpty(PageTexts[i])) { tag = await TagHandler.PrepareAttribute(context, "Label", PageTexts[i], 1); pageTags.Add(tag); } page.Tags = pageTags.ToArray(); bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, viewerUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } /* * failed multithread attempt: * * BLTileMixer mixer = new BLTileMixer(); * RServiceResult<Stream> blResult = await mixer.DownloadMix(PageUrls[i], order); */ Dictionary <(int x, int y), Image> tiles = new Dictionary <(int x, int y), Image>(); int max_x = -1; for (int x = 0; ; x++) { string imageUrl = $"http://www.bl.uk/manuscripts/Proxy.ashx?view={PageUrls[i]}_files/13/{x}_0.jpg"; var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { imageStream.Position = 0; try { Image tile = Image.FromStream(imageStream); tiles.Add((x, 0), tile); max_x = x; } catch (Exception aexp) { if (aexp is ArgumentException) { break; } throw aexp; } } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } } int max_y = -1; for (int y = 1; ; y++) { string imageUrl = $"http://www.bl.uk/manuscripts/Proxy.ashx?view={PageUrls[i]}_files/13/0_{y}.jpg"; var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { if (imageStream.Length <= 248) { break; } imageStream.Position = 0; try { Image tile = Image.FromStream(imageStream); tiles.Add((0, y), tile); max_y = y; } catch (Exception aexp) { if (aexp is ArgumentException) { break; } throw aexp; } } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } } for (int x = 0; x <= max_x; x++) { for (int y = 0; y <= max_y; y++) { if (tiles.TryGetValue((x, y), out Image tmp) == false) { string imageUrl = $"http://www.bl.uk/manuscripts/Proxy.ashx?view={PageUrls[i]}_files/13/{x}_{y}.jpg"; var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { if (imageStream.Length == 0) { break; } imageStream.Position = 0; tiles.Add((x, y), Image.FromStream(imageStream)); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } } } } int tileWidth = tiles[(0, 0)].Width; int tileHeight = tiles[(0, 0)].Height;