/// <summary> /// Rotate Image in 90 deg. multiplicants: 90, 180 or 270 /// </summary> /// <param name="id"></param> /// <param name="degIn90mul"></param> /// <returns></returns> public async Task <RServiceResult <RPictureFile> > RotateImage(Guid id, int degIn90mul) { if (degIn90mul != 90 && degIn90mul != 180 && degIn90mul != 270) { return(new RServiceResult <RPictureFile>(null, "degIn90mul could only be equal to these 3 values: 90, 180 and 270")); } RPictureFile rPictureFile = await _context.PictureFiles .Where(p => p.Id == id) .SingleOrDefaultAsync(); string origPath = GetImagePath(rPictureFile).Result; using (MemoryStream msRotated = new MemoryStream()) { using (Image img = Image.FromFile(origPath)) { img.RotateFlip(degIn90mul == 90 ? RotateFlipType.Rotate90FlipNone : degIn90mul == 180 ? RotateFlipType.Rotate180FlipNone : RotateFlipType.Rotate270FlipNone); ImageCodecInfo jpgEncoder = GetEncoder(ImageFormat.Jpeg); Encoder myEncoder = Encoder.Quality; EncoderParameters jpegParameters = new EncoderParameters(1); EncoderParameter qualityParameter = new EncoderParameter(myEncoder, 92L); jpegParameters.Param[0] = qualityParameter; img.Save(msRotated, jpgEncoder, jpegParameters); } File.Move(GetImagePath(rPictureFile, "thumb").Result, GetImagePath(rPictureFile, "thumb").Result + ".bak"); File.Move(GetImagePath(rPictureFile, "norm").Result, GetImagePath(rPictureFile, "norm").Result + ".bak"); File.Move(origPath, origPath + ".bak"); RServiceResult <RPictureFile> result = await ProcessImage ( null, rPictureFile, msRotated, rPictureFile.OriginalFileName ); if (!string.IsNullOrEmpty(result.ExceptionString)) { return(new RServiceResult <RPictureFile>(null, result.ExceptionString)); } result.Result.LastModified = DateTime.Now; _context.PictureFiles.Update(result.Result); await _context.SaveChangesAsync(); File.Delete(GetImagePath(rPictureFile, "thumb").Result + ".bak"); File.Delete(GetImagePath(rPictureFile, "norm").Result + ".bak"); File.Delete(origPath + ".bak"); return(result); } }
/// <summary> /// Generated Cropped Image Based On ThumbnailCoordinates For Notes /// </summary> /// <param name="id"></param> /// <param name="left"></param> /// <param name="top"></param> /// <param name="width"></param> /// <param name="height"></param> /// <returns></returns> public async Task <RServiceResult <RImage> > GenerateCroppedImageBasedOnThumbnailCoordinates(Guid id, decimal left, decimal top, decimal width, decimal height) { try { RPictureFile rPictureFile = await _context.PictureFiles .Where(p => p.Id == id) .SingleOrDefaultAsync(); int adjustedImageWidth = (int)(width * rPictureFile.NormalSizeImageWidth / rPictureFile.ThumbnailImageWidth); if (adjustedImageWidth > rPictureFile.ThumbnailImageWidth) { adjustedImageWidth = rPictureFile.ThumbnailImageWidth; } int adjustedImageHeight = (int)(height * adjustedImageWidth / width); int adjusttedLeft = (int)(left * adjustedImageWidth / width); int adjusttedTop = (int)(top * adjustedImageHeight / height); string normalImagePath = GetImagePath(rPictureFile, "norm").Result; using (Image targetImage = new Bitmap(adjustedImageWidth, adjustedImageHeight)) { using (Graphics g = Graphics.FromImage(targetImage)) { using (Image img = Image.FromFile(normalImagePath)) { g.DrawImage(img, new Rectangle(0, 0, adjustedImageWidth, adjustedImageHeight), (int)(left * rPictureFile.NormalSizeImageWidth / rPictureFile.ThumbnailImageWidth), (int)(top * rPictureFile.NormalSizeImageHeight / rPictureFile.ThumbnailImageHeight), (int)(width * rPictureFile.NormalSizeImageWidth / rPictureFile.ThumbnailImageWidth), (int)(height * rPictureFile.NormalSizeImageHeight / rPictureFile.ThumbnailImageHeight), GraphicsUnit.Pixel, new ImageAttributes() ); } } using (MemoryStream ms = new MemoryStream()) { targetImage.Save(ms, ImageFormat.Jpeg); ms.Position = 0; RServiceResult <RImage> res = await _simpleImageStorage.Add(null, ms, $"{Path.GetFileNameWithoutExtension(rPictureFile.OriginalFileName)}-cropped-{left}-{top}-{width}-{height}.jpg", "CroppedImages"); if (string.IsNullOrEmpty(res.ExceptionString)) { RImage image = res.Result; _context.GeneralImages.Add(image); await _context.SaveChangesAsync(); return(new RServiceResult <RImage>(image)); } return(res); } } } catch (Exception exp) { return(new RServiceResult <RImage>(null, exp.ToString())); } }
/// <summary> /// returns image file stream /// </summary> /// <param name="image"></param> /// <param name="sz"></param> /// <returns></returns> public RServiceResult <string> GetImagePath(RPictureFile image, string sz = "orig") { string fileName = sz == "thumb" ? image.ThumbnailImageStoredFileName : sz == "norm" ? image.NormalSizeImageStoredFileName : image.StoredFileName; if (string.IsNullOrEmpty(fileName)) { return(new RServiceResult <string>(null)); } return(new RServiceResult <string>(Path.Combine(ImageStoragePath, image.FolderName, fileName))); }
/// <summary> /// recover from files /// </summary> /// <param name="title"></param> /// <param name="description"></param> /// <param name="order"></param> /// <param name="srcurl"></param> /// <param name="orignalFilePath"></param> /// <param name="normalFilePath"></param> /// <param name="thumbFilePath"></param> /// <param name="originalFileNameForStreams"></param> /// <param name="imageFolderName"></param> /// <returns></returns> public async Task <RServiceResult <RPictureFile> > RecoverFromeFiles ( string title, string description, int order, string srcurl, string orignalFilePath, string normalFilePath, string thumbFilePath, string originalFileNameForStreams, string imageFolderName ) { RPictureFile pictureFile = new RPictureFile() { Title = title, TitleInEnglish = title, Description = description, DescriptionInEnglish = description, Order = order, SrcUrl = srcurl, Status = PublishStatus.Draft, DataTime = DateTime.Now, LastModified = DateTime.Now, LastModifiedMeta = DateTime.Now, FolderName = string.IsNullOrEmpty(imageFolderName) ? DateTime.Now.ToString("yyyy-MM") : imageFolderName, ContentType = "image/jpeg", OriginalFileName = originalFileNameForStreams, StoredFileName = orignalFilePath, NormalSizeImageStoredFileName = normalFilePath, ThumbnailImageStoredFileName = thumbFilePath, FileSizeInBytes = (await File.ReadAllBytesAsync(orignalFilePath)).Length }; using (Image img = Image.FromFile(orignalFilePath)) { pictureFile.ImageWidth = img.Width; pictureFile.ImageHeight = img.Height; } using (Image img = Image.FromFile(normalFilePath)) { pictureFile.NormalSizeImageWidth = img.Width; pictureFile.NormalSizeImageHeight = img.Height; } using (Image img = Image.FromFile(thumbFilePath)) { pictureFile.ThumbnailImageWidth = img.Width; pictureFile.ThumbnailImageHeight = img.Height; } //here is a problem, this method could be called from a background service where _context is disposed, so I need to renew it /*using(RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions<RMuseumDbContext>())) * { * await context.PictureFiles.AddAsync(pictureFile.Result); * await context.SaveChangesAsync(); * }*/ return(new RServiceResult <RPictureFile>(pictureFile)); }
/// <summary> /// returns image file stream /// </summary> /// <param name="image"></param> /// <param name="sz"></param> /// <returns></returns> public RServiceResult <string> GetImagePath(RPictureFile image, string sz = "orig") { try { string fileName = sz == "thumb" ? image.ThumbnailImageStoredFileName : sz == "norm" ? image.NormalSizeImageStoredFileName : image.StoredFileName; return(new RServiceResult <string>(Path.Combine(ImageStoragePath, image.FolderName, fileName))); } catch (Exception exp) { return(new RServiceResult <string>(null, exp.ToString())); } }
/// <summary> /// from http://cudl.lib.cam.ac.uk /// </summary> /// <param name="resourceNumber">MS-RAS-00258</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromCambridge(string resourceNumber, string friendlyUrl) { string url = $"http://cudl.lib.cam.ac.uk/view/{resourceNumber}.json"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Cambridge && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated artifact friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Cambridge, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { if ( (await context.Artifacts.Where(a => a.FriendlyUrl == job.FriendlyUrl).SingleOrDefaultAsync()) != null ) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "aborted because of duplicated friendly url"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string json = await result.Content.ReadAsStringAsync(); var parsed = JObject.Parse(json); book.Name = book.NameInEnglish = parsed.SelectToken("logicalStructures[*].label").Value <string>(); book.Description = book.DescriptionInEnglish = Regex.Replace( parsed.SelectToken("descriptiveMetadata[*].abstract.displayForm").Value <string>(), "<.*?>", string.Empty); int tagOrder = 1; foreach (JToken descriptiveMetadata in parsed.SelectTokens("$.descriptiveMetadata[*]").Children()) { foreach (JToken child in descriptiveMetadata.Children()) { if (child.SelectToken("label") != null && child.SelectToken("display") != null) { if (child.SelectToken("display").Value <string>() == "True") { string metaName = child.SelectToken("label").Value <string>(); string metaValue = ""; if (child.SelectToken("displayForm") != null) { metaValue = Regex.Replace( child.SelectToken("displayForm").Value <string>(), "<.*?>", string.Empty); tag = await TagHandler.PrepareAttribute(context, metaName, metaValue, tagOrder++); meta.Add(tag); } else if (child.SelectToken("value") != null) { foreach (JToken value in child.SelectTokens("value").Children()) { if (value.SelectToken("displayForm") != null) { metaValue = Regex.Replace( value.SelectToken("displayForm").Value <string>(), "<.*?>", string.Empty); tag = await TagHandler.PrepareAttribute(context, metaName, metaValue, tagOrder++); meta.Add(tag); } } } } } } } string imageReproPageURL = "https://image01.cudl.lib.cam.ac.uk"; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = json; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "University of Cambridge Digital Library", 1); string viewerUrl = $"http://cudl.lib.cam.ac.uk/view/{resourceNumber}"; tag.ValueSupplement = viewerUrl; meta.Add(tag); book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; foreach (JToken pageToken in parsed.SelectTokens("$.pages").Children()) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string imageUrl = imageReproPageURL + pageToken.SelectToken("downloadImageURL").Value <string>(); RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; List <RTagValue> pageMata = new List <RTagValue>(); tag = await TagHandler.PrepareAttribute(context, "Source", "University of Cambridge Digital Library", 1); tag.ValueSupplement = $"{viewerUrl}/{order}"; pageMata.Add(tag); if (pageToken.SelectToken("label") != null) { tag = await TagHandler.PrepareAttribute(context, "Label", pageToken.SelectToken("label").Value <string>(), 1); pageMata.Add(tag); } page.Tags = pageMata.ToArray(); if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); if (imageResult.StatusCode == HttpStatusCode.NotFound) { break; //finished } int _ImportRetryCount = 200; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from http://www.thedigitalwalters.org/01_ACCESS_WALTERS_MANUSCRIPTS.html /// </summary> /// <param name="resourceNumber">W619</param> /// <param name="friendlyUrl">golestan-walters-01</param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromWalters(string resourceNumber, string friendlyUrl) { string url = $"http://www.thedigitalwalters.org/Data/WaltersManuscripts/ManuscriptDescriptions/{resourceNumber}_tei.xml"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Walters && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Walters, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string xml = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = xml; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } XElement elObject = XDocument.Parse(xml).Root; tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); try { foreach (var prop in elObject .Elements("{http://www.tei-c.org/ns/1.0}teiHeader").First() .Elements("{http://www.tei-c.org/ns/1.0}fileDesc").First() .Elements("{http://www.tei-c.org/ns/1.0}titleStmt").First() .Elements("{http://www.tei-c.org/ns/1.0}title")) { string label = prop.Value; book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = label; tag = await TagHandler.PrepareAttribute(context, "Title", label, 1); meta.Add(tag); break; } } catch { //ignore non-existing = null tags } try { foreach (var prop in elObject .Elements("{http://www.tei-c.org/ns/1.0}teiHeader").First() .Elements("{http://www.tei-c.org/ns/1.0}fileDesc").First() .Elements("{http://www.tei-c.org/ns/1.0}titleStmt").First() .Elements("{http://www.tei-c.org/ns/1.0}author")) { string label = prop.Value; tag = await TagHandler.PrepareAttribute(context, "Contributor Names", label, 1); meta.Add(tag); break; } } catch { //ignore non-existing = null tags } try { foreach (var prop in elObject .Elements("{http://www.tei-c.org/ns/1.0}teiHeader").First() .Elements("{http://www.tei-c.org/ns/1.0}fileDesc").First() .Elements("{http://www.tei-c.org/ns/1.0}titleStmt").First() .Elements("{http://www.tei-c.org/ns/1.0}respStmt")) { string label = prop.Elements("{http://www.tei-c.org/ns/1.0}name").First().Value; tag = await TagHandler.PrepareAttribute(context, "Contributor Names", label, 1); meta.Add(tag); break; } } catch { //ignore non-existing = null tags } try { foreach (var prop in elObject .Elements("{http://www.tei-c.org/ns/1.0}teiHeader").First() .Elements("{http://www.tei-c.org/ns/1.0}fileDesc").First() .Elements("{http://www.tei-c.org/ns/1.0}notesStmt").First() .Elements("{http://www.tei-c.org/ns/1.0}note")) { string label = prop.Value; tag = await TagHandler.PrepareAttribute(context, "Notes", label, 1); meta.Add(tag); } } catch { //ignore non-existing = null tags } tag = await TagHandler.PrepareAttribute(context, "Source", "Digitized Walters Manuscripts", 1); tag.ValueSupplement = $"http://www.thedigitalwalters.org/Data/WaltersManuscripts/html/{job.ResourceNumber}/"; meta.Add(tag); book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; foreach (var surface in elObject .Elements("{http://www.tei-c.org/ns/1.0}facsimile").First() .Elements("{http://www.tei-c.org/ns/1.0}surface")) { foreach (var graphic in surface.Elements("{http://www.tei-c.org/ns/1.0}graphic")) { if (graphic.Attribute("url").Value.Contains("sap.jpg")) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; string imageUrl = $"http://www.thedigitalwalters.org/Data/WaltersManuscripts/{resourceNumber}/data/W.{resourceNumber.Substring(1)}/{graphic.Attribute("url").Value}"; tag = await TagHandler.PrepareAttribute(context, "Source", "Digitized Walters Manuscripts", 1); tag.ValueSupplement = imageUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// import from server folder /// </summary> /// <param name="folderPath">C:\Tools\batches\florence</param> /// <param name="friendlyUrl">shahname-florence</param> /// <param name="srcUrl">https://t.me/dr_khatibi_abolfazl/888</param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromServerFolder(string folderPath, string friendlyUrl, string srcUrl) { try { if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.ServerFolder && j.ResourceNumber == folderPath && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing server folder {folderPath}")); } if (string.IsNullOrEmpty(friendlyUrl)) { return(new RServiceResult <bool>(false, $"Friendly url is empty, server folder {folderPath}")); } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.ServerFolder, ResourceNumber = folderPath, FriendlyUrl = friendlyUrl, SrcUrl = srcUrl, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from server folder {job.ResourceNumber}", $"extracted from server folder {job.ResourceNumber}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); meta.Add(tag); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = ""; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); string[] fileNames = Directory.GetFiles(job.ResourceNumber, "*.jpg"); int order = 0; foreach (string fileName in fileNames) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order * 100 / (decimal)fileNames.Length; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } order++; RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order} of {book.NameInEnglish}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; page.Tags = new RTagValue[] { }; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } using (FileStream imageStream = new FileStream(fileName, FileMode.Open)) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, job.SrcUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } } pages.Add(page); } book.Tags = meta.ToArray(); book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
private async Task <RServiceResult <List <RArtifactItemRecord> > > _InternalHarvardJsonImport(string hardvardResourceNumber, ImportJob job, string friendlyUrl, RMuseumDbContext context, RArtifactMasterRecord book, List <RTagValue> meta) { List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); using (var client = new HttpClient()) { using (var jsonResult = await client.GetAsync($"https://iiif.lib.harvard.edu/manifests/drs:{hardvardResourceNumber}")) { if (jsonResult.IsSuccessStatusCode) { string json = await jsonResult.Content.ReadAsStringAsync(); var parsed = JObject.Parse(json); book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = parsed.SelectToken("label").Value <string>(); RTagValue tag; tag = await TagHandler.PrepareAttribute(context, "Title", book.Name, 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Contributor Names", "تعیین نشده", 1); meta.Add(tag); List <string> labels = new List <string>(); foreach (JToken structure in parsed.SelectTokens("$.structures[*].label")) { labels.Add(structure.Value <string>()); } int order = 0; var canvases = parsed.SelectToken("sequences").First().SelectToken("canvases").ToArray(); int pageCount = canvases.Length; foreach (JToken canvas in canvases) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = order * 100 / (decimal)pageCount; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string label = canvas.SelectToken("label").Value <string>(); if (labels.Where(l => l.IndexOf(label) != -1).SingleOrDefault() != null) { label = labels.Where(l => l.IndexOf(label) != -1).SingleOrDefault(); } string imageUrl = canvas.SelectTokens("images[*]").First().SelectToken("resource").SelectToken("@id").Value <string>(); RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = label, Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; tag = await TagHandler.PrepareAttribute(context, "Source", "Harvard University Islamic Heritage Project", 1); tag.ValueSupplement = imageUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return(new RServiceResult <List <RArtifactItemRecord> >(null, "failed")); } imageResult.Dispose(); GC.Collect(); } } } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({jsonResult.StatusCode}) for https://iiif.lib.harvard.edu/manifests/drs:{hardvardResourceNumber}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return(new RServiceResult <List <RArtifactItemRecord> >(null, "failed")); } } } return(new RServiceResult <List <RArtifactItemRecord> >(pages)); }
/// <summary> /// import from https://viewer.cbl.ie /// </summary> /// <param name="resourceNumber">119</param> /// <param name="friendlyUrl">golestan-baysonghori</param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromChesterBeatty(string resourceNumber, string friendlyUrl) { try { string srcUrl = $"https://viewer.cbl.ie/viewer/object/Per_{resourceNumber}/1/"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.ChesterBeatty && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {srcUrl}")); } if (string.IsNullOrEmpty(friendlyUrl)) { return(new RServiceResult <bool>(false, $"Friendly url is empty, server folder {srcUrl}")); } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.ChesterBeatty, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = srcUrl, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from url {job.ResourceNumber}", $"extracted from url {job.ResourceNumber}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl, }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Chester Beatty Digital Collections", 1); tag.ValueSupplement = srcUrl; meta.Add(tag); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = ""; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; using (var client = new HttpClient()) do { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order} of {book.NameInEnglish}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; string imageUrl = $"https://viewer.cbl.ie/viewer/rest/image/Per_{resourceNumber}/Per{resourceNumber}_{$"{order}".PadLeft(3, '0')}.jpg/full/!10000,10000/0/default.jpg?ignoreWatermark=true"; page.Tags = new RTagValue[] { }; bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } tag = await TagHandler.PrepareAttribute(context, "Source", "Chester Beatty Digital Collections", 1); tag.ValueSupplement = $"https://viewer.cbl.ie/viewer/object/Per_{resourceNumber}/{$"{order}".PadLeft(3, '0')}/";; page.Tags = new RTagValue[] { tag }; pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); if (imageResult.StatusCode == HttpStatusCode.Forbidden || imageResult.StatusCode == HttpStatusCode.NotFound) { break; } int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } tag = await TagHandler.PrepareAttribute(context, "Source", "Chester Beatty Digital Collections", 1); tag.ValueSupplement = $"https://viewer.cbl.ie/viewer/object/Per_{resourceNumber}/{$"{order}".PadLeft(3, '0')}/"; page.Tags = new RTagValue[] { tag }; pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } pages.Add(page); }while (true); book.Tags = meta.ToArray(); book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from https://catalog.hathitrust.org /// </summary> /// <param name="resourceNumber">006814127</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromHathiTrust(string resourceNumber, string friendlyUrl) { string url = $"https://catalog.hathitrust.org/Record/{resourceNumber}.xml"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.HathiTrust && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated artifact friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.HathiTrust, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { if ( (await context.Artifacts.Where(a => a.FriendlyUrl == job.FriendlyUrl).SingleOrDefaultAsync()) != null ) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "aborted because of duplicated friendly url"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string xml = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = xml; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string title = ""; string author = ""; string pdfResourceNumber = ""; int tagOrder = 1; XElement elObject = XDocument.Parse(xml).Root; foreach (var datafield in elObject.Element("record").Elements("datafield")) { tagOrder++; if (datafield.Attribute("tag") == null) { continue; } string hathiTrustTag = datafield.Attribute("tag").Value; switch (hathiTrustTag) { case "245": case "246": foreach (var subfield in datafield.Elements("subfield")) { if (subfield.Attribute("code") != null) { if (subfield.Attribute("code").Value == "a" || subfield.Attribute("code").Value == "f") { title = (title + " " + subfield.Value).Trim(); } } } break; case "100": foreach (var subfield in datafield.Elements("subfield")) { if (subfield.Attribute("code") != null) { if (subfield.Attribute("code").Value == "a" || subfield.Attribute("code").Value == "d") { author = (author + " " + subfield.Value).Trim(); } } } break; case "HOL": foreach (var subfield in datafield.Elements("subfield")) { if (subfield.Attribute("code") != null) { if (subfield.Attribute("code").Value == "p") { pdfResourceNumber = subfield.Value; } } } break; default: { if (int.TryParse(hathiTrustTag, out int tmp)) { if (tmp >= 100 && tmp <= 900) { string note = ""; foreach (var subfield in datafield.Elements("subfield")) { if (subfield.Attribute("code") != null) { note = (note + " " + subfield.Value).Trim(); } } tag = await TagHandler.PrepareAttribute(context, "Notes", note, tagOrder); meta.Add(tag); } } } break; } } if (string.IsNullOrEmpty(pdfResourceNumber)) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "pdfResourceNumber not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } tag = await TagHandler.PrepareAttribute(context, "Title", title, 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Contributor Names", author, 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "HathiTrust Digital Library", 1); string viewerUrl = $"https://babel.hathitrust.org/cgi/pt?id={pdfResourceNumber}"; tag.ValueSupplement = viewerUrl; meta.Add(tag); book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = title; book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); string lastMD5hash = ""; int order = 0; while (true) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string imageUrl = $"https://babel.hathitrust.org/cgi/imgsrv/image?id={pdfResourceNumber};seq={order};size=1000;rotation=0"; RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; tag = await TagHandler.PrepareAttribute(context, "Source", "HathiTrust Digital Library", 1); tag.ValueSupplement = viewerUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 200; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } bool lastPage = false; using (var md5 = MD5.Create()) { string md5hash = string.Join("", md5.ComputeHash(File.ReadAllBytes(Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"))).Select(x => x.ToString("X2"))); if (md5hash == lastMD5hash) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); lastPage = true; } lastMD5hash = md5hash; } if (!lastPage) { page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } else { break; } } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "ages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
private async Task <RServiceResult <RPictureFile> > ProcessImage(IFormFile uploadedImage, RPictureFile pictureFile, Stream stream, string originalFileNameForStreams) { if (uploadedImage == null && stream == null) { return(new RServiceResult <RPictureFile>(null, "ProcessImage: uploadedImage == null && stream == null")); } pictureFile.ContentType = uploadedImage == null ? "image/jpeg" : uploadedImage.ContentType; pictureFile.FileSizeInBytes = uploadedImage == null ? stream.Length : uploadedImage.Length; pictureFile.OriginalFileName = uploadedImage == null ? originalFileNameForStreams : uploadedImage.FileName; string fullDirStorePath = Path.Combine(ImageStoragePath, pictureFile.FolderName); string originalStorePath = Path.Combine(fullDirStorePath, "orig"); string normalStorePath = Path.Combine(fullDirStorePath, "norm"); string thumbStorePath = Path.Combine(fullDirStorePath, "thumb"); foreach (string path in new string[] { fullDirStorePath, originalStorePath, normalStorePath, thumbStorePath }) { if (!Directory.Exists(path)) { try { Directory.CreateDirectory(path); } catch { return(new RServiceResult <RPictureFile>(null, $"ProcessImage: create dir failed {path}")); } } } string ext = uploadedImage == null ? ".jpg" : Path.GetExtension(uploadedImage.FileName); pictureFile.StoredFileName = Path.GetFileNameWithoutExtension(pictureFile.OriginalFileName) + ext; string originalFileStorePath = Path.Combine(originalStorePath, pictureFile.StoredFileName); while (File.Exists(originalFileStorePath)) { pictureFile.StoredFileName = Path.GetFileNameWithoutExtension(pictureFile.OriginalFileName) + "-" + Guid.NewGuid().ToString() + ext; originalFileStorePath = Path.Combine(originalStorePath, pictureFile.StoredFileName); } pictureFile.StoredFileName = $@"orig\{pictureFile.StoredFileName}"; using (FileStream fsMain = new FileStream(originalFileStorePath, FileMode.Create)) { if (uploadedImage != null) { await uploadedImage.CopyToAsync(fsMain); } else { stream.Position = 0; await stream.CopyToAsync(fsMain); } } using (MemoryStream ms = new MemoryStream()) { if (uploadedImage != null) { await uploadedImage.CopyToAsync(ms); } else { stream.Position = 0; await stream.CopyToAsync(ms); } using (Image img = Bitmap.FromStream(ms)) { pictureFile.ImageWidth = img.Width; pictureFile.ImageHeight = img.Height; if (img.Width <= NormalImageMaxWidth && img.Height <= NormalImageMaxHeight && ext.Equals(".jpg", StringComparison.InvariantCultureIgnoreCase)) { pictureFile.NormalSizeImageStoredFileName = pictureFile.StoredFileName; pictureFile.NormalSizeImageWidth = img.Width; pictureFile.NormalSizeImageHeight = img.Height; } else { pictureFile.NormalSizeImageWidth = NormalImageMaxWidth; pictureFile.NormalSizeImageHeight = NormalImageMaxWidth * pictureFile.ImageHeight / pictureFile.ImageWidth; if (pictureFile.NormalSizeImageHeight > NormalImageMaxHeight) { pictureFile.NormalSizeImageHeight = NormalImageMaxHeight; pictureFile.NormalSizeImageWidth = NormalImageMaxHeight * pictureFile.ImageWidth / pictureFile.ImageHeight; } //روی خود img تأثیر می گذارد Image resized = new Bitmap(pictureFile.NormalSizeImageWidth, pictureFile.NormalSizeImageHeight); using (Graphics gResized = Graphics.FromImage(resized)) { gResized.DrawImage(img, 0, 0, pictureFile.NormalSizeImageWidth, pictureFile.NormalSizeImageHeight); } using (MemoryStream msNormal = new MemoryStream()) { resized.Save(msNormal, ImageFormat.Jpeg); } pictureFile.NormalSizeImageStoredFileName = Path.GetFileNameWithoutExtension(pictureFile.OriginalFileName) + ".jpg"; string normalFileStorePath = Path.Combine(normalStorePath, pictureFile.NormalSizeImageStoredFileName); while (File.Exists(normalFileStorePath)) { pictureFile.NormalSizeImageStoredFileName = Path.GetFileNameWithoutExtension(pictureFile.OriginalFileName) + "-" + Guid.NewGuid().ToString() + ".jpg"; normalFileStorePath = Path.Combine(normalStorePath, pictureFile.NormalSizeImageStoredFileName); } pictureFile.NormalSizeImageStoredFileName = $@"norm\{pictureFile.NormalSizeImageStoredFileName}"; using (FileStream fsNormal = new FileStream(normalFileStorePath, FileMode.Create)) { resized.Save(fsNormal, ImageFormat.Jpeg); } } pictureFile.ThumbnailImageWidth = ThumbnailImageWidth; pictureFile.ThumbnailImageHeight = ThumbnailImageWidth * pictureFile.ImageHeight / pictureFile.ImageWidth; if (pictureFile.ThumbnailImageHeight > ThumbnailImageMaxHeight) { pictureFile.ThumbnailImageHeight = ThumbnailImageMaxHeight; pictureFile.ThumbnailImageWidth = pictureFile.ThumbnailImageHeight * pictureFile.ImageWidth / pictureFile.ImageHeight; } //روی خود img تأثیر می گذارد // به احتمال قوی داریم روی تصویر resize کار میکنیم Image thumbnail = new Bitmap(pictureFile.ThumbnailImageWidth, pictureFile.ThumbnailImageHeight); using (Graphics gThumbnail = Graphics.FromImage(thumbnail)) { gThumbnail.DrawImage(img, 0, 0, pictureFile.ThumbnailImageWidth, pictureFile.ThumbnailImageHeight); } using (MemoryStream msThumbnail = new MemoryStream()) { thumbnail.Save(msThumbnail, ImageFormat.Jpeg); } pictureFile.ThumbnailImageStoredFileName = Path.GetFileNameWithoutExtension(pictureFile.OriginalFileName) + ".jpg"; string thumbFileStorePath = Path.Combine(thumbStorePath, pictureFile.ThumbnailImageStoredFileName); while (File.Exists(thumbFileStorePath)) { pictureFile.ThumbnailImageStoredFileName = Path.GetFileNameWithoutExtension(pictureFile.OriginalFileName) + "-" + Guid.NewGuid().ToString() + ".jpg"; thumbFileStorePath = Path.Combine(thumbStorePath, pictureFile.ThumbnailImageStoredFileName); } pictureFile.ThumbnailImageStoredFileName = $@"thumb\{pictureFile.ThumbnailImageStoredFileName}"; using (FileStream fsThumb = new FileStream(thumbFileStorePath, FileMode.Create)) { thumbnail.Save(fsThumb, ImageFormat.Jpeg); } } } return(new RServiceResult <RPictureFile>(pictureFile)); }
/// <summary> /// from http://www.bl.uk /// </summary> /// <param name="resourceNumber">grenville_xli_f001r</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromBritishLibrary(string resourceNumber, string friendlyUrl) { string url = $"http://www.bl.uk/manuscripts/Viewer.aspx?ref={resourceNumber}"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.BritishLibrary && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated artifact friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.BritishLibrary, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { client.Timeout = TimeSpan.FromMinutes(5); using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { if ( (await context.Artifacts.Where(a => a.FriendlyUrl == job.FriendlyUrl).SingleOrDefaultAsync()) != null ) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "aborted because of duplicated friendly url"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string html = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = html; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } int nIdxStart = html.IndexOf("PageList"); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "PageList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart = html.IndexOf("value=\"", nIdxStart); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "value after PageList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart += "value=\"".Length; string strPageList = html.Substring(nIdxStart, html.IndexOf('"', nIdxStart) - nIdxStart); nIdxStart = html.IndexOf("TextList"); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "TextList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart = html.IndexOf("value=\"", nIdxStart); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "value after TextList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart += "value=\"".Length; string strTextList = html.Substring(nIdxStart, html.IndexOf('"', nIdxStart) - nIdxStart); nIdxStart = html.IndexOf("TitleList"); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "TitleList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart = html.IndexOf("value=\"", nIdxStart); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "value after TitleList not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } nIdxStart += "value=\"".Length; string strTitleList = html.Substring(nIdxStart, html.IndexOf('"', nIdxStart) - nIdxStart); string[] PageUrls = strPageList.Split("||", StringSplitOptions.None); string[] PageTexts = strTextList.Split("||", StringSplitOptions.None); string[] PageTitles = strTitleList.Split("||", StringSplitOptions.None); if (PageUrls.Length != PageTexts.Length || PageTexts.Length != PageTitles.Length) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "PageUrls.Length != PageTexts.Length || PageTexts.Length != PageTitles.Length"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } tag = await TagHandler.PrepareAttribute(context, "Title", "Untitled", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Contributor Names", "Unknown", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); book.Tags = meta.ToArray(); tag = await TagHandler.PrepareAttribute(context, "Source", "British Library", 1); string viewerUrl = $"http://www.bl.uk/manuscripts/FullDisplay.aspx?ref={resourceNumber.Substring(0, resourceNumber.LastIndexOf('_'))}"; tag.ValueSupplement = viewerUrl; meta.Add(tag); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; for (int i = 0; i < PageUrls.Length; i++) { if (PageUrls[i] == "##") { continue; } order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; List <RTagValue> pageTags = new List <RTagValue>(); tag = await TagHandler.PrepareAttribute(context, "Source", "British Library", 1); tag.ValueSupplement = $"http://www.bl.uk/manuscripts/Viewer.aspx?ref={PageUrls[i]}"; pageTags.Add(tag); if (!string.IsNullOrEmpty(PageTitles[i])) { RTagValue toc = await TagHandler.PrepareAttribute(context, "Title in TOC", PageTitles[i], 1); toc.ValueSupplement = "1"; //font size pageTags.Add(toc); } if (!string.IsNullOrEmpty(PageTexts[i])) { tag = await TagHandler.PrepareAttribute(context, "Label", PageTexts[i], 1); pageTags.Add(tag); } page.Tags = pageTags.ToArray(); bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, viewerUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } /* * failed multithread attempt: * * BLTileMixer mixer = new BLTileMixer(); * RServiceResult<Stream> blResult = await mixer.DownloadMix(PageUrls[i], order); */ Dictionary <(int x, int y), Image> tiles = new Dictionary <(int x, int y), Image>(); int max_x = -1; for (int x = 0; ; x++) { string imageUrl = $"http://www.bl.uk/manuscripts/Proxy.ashx?view={PageUrls[i]}_files/13/{x}_0.jpg"; var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { imageStream.Position = 0; try { Image tile = Image.FromStream(imageStream); tiles.Add((x, 0), tile); max_x = x; } catch (Exception aexp) { if (aexp is ArgumentException) { break; } throw aexp; } } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } } int max_y = -1; for (int y = 1; ; y++) { string imageUrl = $"http://www.bl.uk/manuscripts/Proxy.ashx?view={PageUrls[i]}_files/13/0_{y}.jpg"; var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { if (imageStream.Length <= 248) { break; } imageStream.Position = 0; try { Image tile = Image.FromStream(imageStream); tiles.Add((0, y), tile); max_y = y; } catch (Exception aexp) { if (aexp is ArgumentException) { break; } throw aexp; } } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } } for (int x = 0; x <= max_x; x++) { for (int y = 0; y <= max_y; y++) { if (tiles.TryGetValue((x, y), out Image tmp) == false) { string imageUrl = $"http://www.bl.uk/manuscripts/Proxy.ashx?view={PageUrls[i]}_files/13/{x}_{y}.jpg"; var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { if (imageStream.Length == 0) { break; } imageStream.Position = 0; tiles.Add((x, y), Image.FromStream(imageStream)); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } } } } int tileWidth = tiles[(0, 0)].Width; int tileHeight = tiles[(0, 0)].Height;
/// <summary> /// from http://pudl.princeton.edu/ /// </summary> /// <param name="resourceNumber">dj52w476m</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromPrinceton(string resourceNumber, string friendlyUrl) { string url = $"http://pudl.princeton.edu/mdCompiler2.php?obj={resourceNumber}"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Princeton && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Princeton, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string xml = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = xml; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } XElement elObject = XDocument.Parse(xml).Root; foreach (var prop in elObject.Element("dmd").Element("properties").Elements("property")) { if (prop.Element("label") == null) { continue; } string label = prop.Element("label").Value.Replace(":", ""); int order = 1; foreach (var value in prop.Elements("valueGrp").Elements("value")) { tag = await TagHandler.PrepareAttribute(context, label, value.Value, order); if (value.Attribute("href") != null) { if (value.Attribute("href").Value.IndexOf("http://localhost") != 0) { tag.ValueSupplement = value.Attribute("href").Value; } } meta.Add(tag); if (label == "Title") { book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = value.Value; } order++; } } tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Princeton Digital Library of Islamic Manuscripts", 1); tag.ValueSupplement = $"http://pudl.princeton.edu/objects/{job.ResourceNumber}"; meta.Add(tag); book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); foreach (var structure in elObject.Elements("structure")) { if (structure.Attribute("type") != null && structure.Attribute("type").Value == "RelatedObjects") { if (structure.Element("div") == null || structure.Element("div").Element("OrderedList") == null) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "structure[RelatedObjects].div.OrderedList is null"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); return; } } int pageCount = structure.Element("div").Element("OrderedList").Elements("div").Count(); int inlineOrder = 0; foreach (var div in structure.Element("div").Element("OrderedList").Elements("div")) { inlineOrder++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = inlineOrder * 100 / (decimal)pageCount; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } int order = int.Parse(div.Attribute("order").Value); RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = div.Attribute("label").Value, Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; string imageUrl = div.Attribute("img").Value; imageUrl = "https://libimages.princeton.edu/loris/" + imageUrl.Substring(imageUrl.LastIndexOf(":") + 1); imageUrl += $"/full/,{div.Attribute("h").Value}/0/default.jpg"; tag = await TagHandler.PrepareAttribute(context, "Source", "Princeton Digital Library of Islamic Manuscripts", 1); tag.ValueSupplement = imageUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } } } foreach (var structure in elObject.Elements("structure")) { if (structure.Attribute("type") != null && structure.Attribute("type").Value == "Physical") { if (structure.Element("RTLBoundManuscript") != null) { foreach (var leaf in structure.Element("RTLBoundManuscript").Elements("Leaf")) { foreach (var side in leaf.Elements("Side")) { int pageOrder = int.Parse(side.Attribute("order").Value); tag = await TagHandler.PrepareAttribute(context, "Leaf Side", side.Attribute("label").Value, 100); RArtifactItemRecord page = pages.Where(p => p.Order == pageOrder).SingleOrDefault(); if (page != null) { List <RTagValue> tags = new List <RTagValue>(page.Tags); tags.Add(tag); page.Tags = tags; } } } foreach (var folio in structure.Element("RTLBoundManuscript").Elements("Folio")) { foreach (var side in folio.Elements("Side")) { int pageOrder = int.Parse(side.Attribute("order").Value); tag = await TagHandler.PrepareAttribute(context, "Folio Side", folio.Attribute("label").Value + ":" + side.Attribute("label").Value, 101); RArtifactItemRecord page = pages.Where(p => p.Order == pageOrder).SingleOrDefault(); if (page != null) { List <RTagValue> tags = new List <RTagValue>(page.Tags); tags.Add(tag); page.Tags = tags; } } } } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "ages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from http://www.library.upenn.edu/ /// </summary> /// <param name="resourceNumber">MEDREN_9949222153503681</param> /// <param name="friendlyUrl"></param> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromPenLibraries(string resourceNumber, string friendlyUrl) { string url = $"http://dla.library.upenn.edu/dla/medren/pageturn.html?id={resourceNumber}&rotation=0&size=0"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.PennLibraries && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated artifact friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.PennLibraries, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (var client = new HttpClient()) { client.Timeout = TimeSpan.FromMinutes(5); using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { using (RMuseumDbContext context = new RMuseumDbContext(Configuration)) { if ( (await context.Artifacts.Where(a => a.FriendlyUrl == job.FriendlyUrl).SingleOrDefaultAsync()) != null ) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "aborted because of duplicated friendly url"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = 0, FriendlyUrl = friendlyUrl }; List <RTagValue> meta = new List <RTagValue>(); RTagValue tag; string html = await result.Content.ReadAsStringAsync(); using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; job.SrcContent = html; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string title = ""; string author = ""; int tagOrder = 1; int nIdxStart = html.IndexOf("https://repo.library.upenn.edu/djatoka/resolver?"); if (nIdxStart == -1) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "https://repo.library.upenn.edu/djatoka/resolver? not found"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } string firstImageUrl = html.Substring(nIdxStart, html.IndexOf('"', nIdxStart) - nIdxStart).Replace("&", "&"); nIdxStart = html.IndexOf("recordinfolabel"); while (nIdxStart != -1) { nIdxStart += "recordinfolabel\">".Length; int nIdxEnd = html.IndexOf(":", nIdxStart); string recordinfolabel = html.Substring(nIdxStart, nIdxEnd - nIdxStart); nIdxStart = html.IndexOf("recordinfotext", nIdxEnd); nIdxStart += "recordinfotext\">".Length; nIdxEnd = html.IndexOf("</td>", nIdxStart); string recordinfotext = html.Substring(nIdxStart, nIdxEnd - nIdxStart).Replace("</div>", "<div>").Replace("\n", "").Replace("\r", "").Trim(); string[] values = recordinfotext.Split("<div>", StringSplitOptions.RemoveEmptyEntries); foreach (string value in values) { if (value.Trim().Length == 0) { continue; } if (recordinfolabel == "Title") { title = value.Trim(); tag = await TagHandler.PrepareAttribute(context, "Title", title, 1); meta.Add(tag); } else if (recordinfolabel == "Author") { author = value.Trim(); tag = await TagHandler.PrepareAttribute(context, "Contributor Names", author, 1); meta.Add(tag); } else { tag = await TagHandler.PrepareAttribute(context, recordinfolabel, value.Trim(), tagOrder++); meta.Add(tag); } } nIdxStart = html.IndexOf("recordinfolabel", nIdxEnd); } tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Penn Libraries", 1); string viewerUrl = $"http://dla.library.upenn.edu/dla/medren/detail.html?id={resourceNumber}"; tag.ValueSupplement = viewerUrl; meta.Add(tag); book.Name = book.NameInEnglish = book.Description = book.DescriptionInEnglish = title; book.Tags = meta.ToArray(); List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); int order = 0; while (true) { order++; using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.ProgressPercent = order; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } string imageUrl = firstImageUrl; RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {order}", Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; tag = await TagHandler.PrepareAttribute(context, "Source", "Penn Libraries", 1); tag.ValueSupplement = viewerUrl; page.Tags = new RTagValue[] { tag }; if (!string.IsNullOrEmpty(imageUrl)) { bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if (order > 1) { string pageUrl = $"http://dla.library.upenn.edu/dla/medren/pageturn.html?id={resourceNumber}&doubleside=0&rotation=0&size=0¤tpage={order}"; var pageResult = await client.GetAsync(pageUrl); if (pageResult.StatusCode == HttpStatusCode.NotFound) { break; //finished } string pageHtml = await pageResult.Content.ReadAsStringAsync(); nIdxStart = pageHtml.IndexOf("https://repo.library.upenn.edu/djatoka/resolver?"); if (nIdxStart == -1) { if (order > 1) { break; //finished } using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"https://repo.library.upenn.edu/djatoka/resolver? not found on page {order}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } imageUrl = pageHtml.Substring(nIdxStart, pageHtml.IndexOf('"', nIdxStart) - nIdxStart).Replace("&", "&"); } var imageResult = await client.GetAsync(imageUrl); if (imageResult.StatusCode == HttpStatusCode.NotFound) { break; //finished } int _ImportRetryCount = 200; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (pages.Count == 0) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = "Pages.Count == 0"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(Configuration)) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }
/// <summary> /// from https://www.loc.gov /// </summary> /// <param name="resourceNumber"> /// <example> /// m084 /// </example> /// </param> /// <param name="friendlyUrl"> /// <example> /// boostan1207 /// </example> /// </param> /// <param name="resourcePrefix"></param> /// <example> /// plmp /// </example> /// <returns></returns> private async Task <RServiceResult <bool> > StartImportingFromTheLibraryOfCongress(string resourceNumber, string friendlyUrl, string resourcePrefix) { string url = $"https://www.loc.gov/resource/{resourcePrefix}.{resourceNumber}/?fo=json&st=gallery"; if ( ( await _context.ImportJobs .Where(j => j.JobType == JobType.Loc && j.ResourceNumber == resourceNumber && !(j.Status == ImportJobStatus.Failed || j.Status == ImportJobStatus.Aborted)) .SingleOrDefaultAsync() ) != null ) { return(new RServiceResult <bool>(false, $"Job is already scheduled or running for importing {url}")); } if (string.IsNullOrEmpty(friendlyUrl)) { friendlyUrl = resourceNumber; } if ( (await _context.Artifacts.Where(a => a.FriendlyUrl == friendlyUrl).SingleOrDefaultAsync()) != null ) { return(new RServiceResult <bool>(false, $"duplicated friendly url '{friendlyUrl}'")); } ImportJob job = new ImportJob() { JobType = JobType.Loc, ResourceNumber = resourceNumber, FriendlyUrl = friendlyUrl, SrcUrl = url, QueueTime = DateTime.Now, ProgressPercent = 0, Status = ImportJobStatus.NotStarted }; await _context.ImportJobs.AddAsync ( job ); await _context.SaveChangesAsync(); try { _backgroundTaskQueue.QueueBackgroundWorkItem ( async token => { try { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.StartTime = DateTime.Now; job.Status = ImportJobStatus.Running; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } int pageCount = 0; int representative_index = 0; //اول یک صفحه را میخوانیم تا تعداد صفحات را مشخص کنیم using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { string json = await result.Content.ReadAsStringAsync(); var parsed = JObject.Parse(json); pageCount = parsed.SelectToken("resource.segment_count").Value <int>(); representative_index = parsed.SelectToken("resource.representative_index").Value <int>(); } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } //here might be problems: loc json does not return correct answer when number of segments are more than 1000 /* * if (pageCount > 1000) * { * job.Exception = $"Page count ({pageCount}) was cut to 1000 for this artifact due to loc bug."; * pageCount = 1000; * } */ //حالا که تعداد صفحات را داریم دوباره میخوانیم url = $"https://www.loc.gov/resource/{resourcePrefix}.{resourceNumber}/?c={pageCount}&fo=json&st=gallery"; using (var client = new HttpClient()) { using (var result = await client.GetAsync(url)) { if (result.IsSuccessStatusCode) { //here is a problem, this method could be called from a background service where _context is disposed, so I need to renew it using (RMuseumDbContext context = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { RArtifactMasterRecord book = new RArtifactMasterRecord($"extracted from {url}", $"extracted from {url}") { Status = PublishStatus.Draft, DateTime = DateTime.Now, LastModified = DateTime.Now, CoverItemIndex = representative_index, FriendlyUrl = friendlyUrl }; string json = await result.Content.ReadAsStringAsync(); job.SrcContent = json; var parsed = JObject.Parse(json); var segmentsArray = parsed.SelectToken("segments").ToArray(); //here might be problems: loc json does not return correct answer when number of segments are more than 1000 //I've added some temporary solutions prior //Here I want to log any paradox I encounter: if (segmentsArray.Length != pageCount) { job.Exception = $"Page count ({pageCount}) is not equal to number of returned resources ({segmentsArray.Length})."; } List <RTagValue> meta = new List <RTagValue>(); string string_value = await HandleSimpleValue(context, parsed, meta, "item.title", "Title"); if (!string.IsNullOrWhiteSpace(string_value)) { book.Name = string_value; book.NameInEnglish = string_value; } await HandleSimpleValue(context, parsed, meta, "item.date", "Date"); string_value = await HandleListValue(context, parsed, meta, "item.other_title", "Other Title"); if (!string.IsNullOrWhiteSpace(string_value)) { book.Name = string_value; } await HandleListValue(context, parsed, meta, "item.contributor_names", "Contributor Names"); await HandleSimpleValue(context, parsed, meta, "item.shelf_id", "Shelf ID"); await HandleListValue(context, parsed, meta, "item.created_published", "Created / Published"); await HandleListValue(context, parsed, meta, "item.subject_headings", "Subject Headings"); await HandleListValue(context, parsed, meta, "item.notes", "Notes"); await HandleListValue(context, parsed, meta, "item.medium", "Medium"); await HandleListValue(context, parsed, meta, "item.call_number", "Call Number/Physical Location"); await HandleListValue(context, parsed, meta, "item.digital_id", "Digital Id"); await HandleSimpleValue(context, parsed, meta, "item.library_of_congress_control_number", "Library of Congress Control Number"); await HandleChildrenValue(context, parsed, meta, "item.language", "Language"); await HandleListValue(context, parsed, meta, "item.online_format", "Online Format"); await HandleListValue(context, parsed, meta, "item.number_oclc", "OCLC Number"); string_value = await HandleListValue(context, parsed, meta, "item.description", "Description"); if (!string.IsNullOrEmpty(string_value)) { book.Description = string_value; book.DescriptionInEnglish = string_value; } await HandleSimpleValue(context, parsed, meta, "cite_this.chicago", "Chicago citation style"); await HandleSimpleValue(context, parsed, meta, "cite_this.apa", "APA citation style"); await HandleSimpleValue(context, parsed, meta, "cite_this.mla", "MLA citation style"); await HandleChildrenValue(context, parsed, meta, "item.dates", "Dates"); await HandleChildrenValue(context, parsed, meta, "item.contributors", "Contributors"); await HandleChildrenValue(context, parsed, meta, "item.location", "Location"); await HandleListValue(context, parsed, meta, "item.rights", "Rights & Access"); RTagValue tag = await TagHandler.PrepareAttribute(context, "Type", "Book", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Type", "Manuscript", 1); meta.Add(tag); tag = await TagHandler.PrepareAttribute(context, "Source", "Library of Congress, African and Middle East Division, Near East Section Persian Manuscript Collection", 1); tag.ValueSupplement = url; string_value = parsed.SelectToken("item.id").Value <string>(); if (!string.IsNullOrWhiteSpace(string_value)) { tag.ValueSupplement = string_value; } meta.Add(tag); book.Tags = meta.ToArray(); int order = 0; List <RArtifactItemRecord> pages = new List <RArtifactItemRecord>(); //due to loc bug for books with more than 1000 pages relying on segmentsArray changed to hard coded image urls and .... //foreach (JToken segment in segmentsArray) for (int pageIndex = 1; pageIndex <= pageCount; pageIndex++) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.ProgressPercent = order * 100 / (decimal)pageCount; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } order++; RArtifactItemRecord page = new RArtifactItemRecord() { Name = $"تصویر {order}", NameInEnglish = $"Image {pageIndex} of {book.NameInEnglish}", //segment.SelectToken("title").Value<string>(), Description = "", DescriptionInEnglish = "", Order = order, FriendlyUrl = $"p{$"{order}".PadLeft(4, '0')}", LastModified = DateTime.Now }; tag = await TagHandler.PrepareAttribute(context, "Source", "Library of Congress, African and Middle East Division, Near East Section Persian Manuscript Collection", 1); tag.ValueSupplement = $"http://www.loc.gov/resource/{resourcePrefix}.{resourceNumber}/?sp={pageIndex}"; //segment.SelectToken("id").Value<string>(); page.Tags = new RTagValue[] { tag }; string imageUrlPart = $"{pageIndex}".PadLeft(4, '0'); string imageUrl = $"https://tile.loc.gov/image-services/iiif/service:amed:{resourcePrefix}:{resourceNumber}:{imageUrlPart}/full/pct:100/0/default.jpg"; //string imageUrl = $"https://tile.loc.gov/image-services/iiif/service:rbc:{resourcePrefix}:2015:{resourceNumber}:{imageUrlPart}/full/pct:100/0/default.jpg"; /* * List<string> list = segment.SelectToken("image_url").ToObject<List<string>>(); * if (list != null && list.Count > 0) * { * for (int i = 0; i < list.Count; i++) * { * if (list[i].IndexOf(".jpg") != -1) * { * if (imageUrl == "") * imageUrl = list[i]; * else * { * if (imageUrl.IndexOf("#h=") != -1 && imageUrl.IndexOf("&w=", imageUrl.IndexOf("#h=")) != -1) * { * int h1 = int.Parse(imageUrl.Substring(imageUrl.IndexOf("#h=") + "#h=".Length, imageUrl.IndexOf("&w=") - imageUrl.IndexOf("#h=") - "&w=".Length)); * if (list[i].IndexOf("#h=") != -1 && list[i].IndexOf("&w=", list[i].IndexOf("#h=")) != -1) * { * int h2 = int.Parse(list[i].Substring(list[i].IndexOf("#h=") + "#h=".Length, list[i].IndexOf("&w=") - list[i].IndexOf("#h=") - "&w=".Length)); * * if (h2 > h1) * { * imageUrl = list[i]; * } * } * } * else * imageUrl = list[i]; * * } * } * } * } */ if (!string.IsNullOrEmpty(imageUrl)) { //imageUrl = "https:" + imageUrl.Substring(0, imageUrl.IndexOf('#')); bool recovered = false; if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) && File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { RServiceResult <RPictureFile> picture = await _pictureFileService.RecoverFromeFiles(page.Name, page.Description, 1, imageUrl, Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg"), Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg"), $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result != null) { recovered = true; page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } if (!recovered) { if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "orig"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "norm"), $"{order}".PadLeft(4, '0') + ".jpg") ); } if ( File.Exists ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ) ) { File.Delete ( Path.Combine(Path.Combine(Path.Combine(_pictureFileService.ImageStoragePath, friendlyUrl), "thumb"), $"{order}".PadLeft(4, '0') + ".jpg") ); } var imageResult = await client.GetAsync(imageUrl); int _ImportRetryCount = 5; int _ImportRetryInitialSleep = 500; int retryCount = 0; while (retryCount < _ImportRetryCount && !imageResult.IsSuccessStatusCode && imageResult.StatusCode == HttpStatusCode.ServiceUnavailable) { imageResult.Dispose(); Thread.Sleep(_ImportRetryInitialSleep * (retryCount + 1)); imageResult = await client.GetAsync(imageUrl); retryCount++; } if (imageResult.IsSuccessStatusCode) { using (Stream imageStream = await imageResult.Content.ReadAsStreamAsync()) { RServiceResult <RPictureFile> picture = await _pictureFileService.Add(page.Name, page.Description, 1, null, imageUrl, imageStream, $"{order}".PadLeft(4, '0') + ".jpg", friendlyUrl); if (picture.Result == null) { throw new Exception($"_pictureFileService.Add : {picture.ExceptionString}"); } page.Images = new RPictureFile[] { picture.Result }; page.CoverImageIndex = 0; if (book.CoverItemIndex == (order - 1)) { book.CoverImage = RPictureFile.Duplicate(picture.Result); } pages.Add(page); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({imageResult.StatusCode}) for page {order}, url {imageUrl}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } imageResult.Dispose(); return; } imageResult.Dispose(); GC.Collect(); } } } book.Items = pages.ToArray(); book.ItemCount = pages.Count; if (book.CoverImage == null && pages.Count > 0) { book.CoverImage = RPictureFile.Duplicate(pages[0].Images.First()); } await context.Artifacts.AddAsync(book); await context.SaveChangesAsync(); job.ProgressPercent = 100; job.Status = ImportJobStatus.Succeeded; job.ArtifactId = book.Id; job.EndTime = DateTime.Now; context.Update(job); await context.SaveChangesAsync(); } } else { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = $"Http result is not ok ({result.StatusCode}) for {url}"; importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } return; } } } } catch (Exception exp) { using (RMuseumDbContext importJobUpdaterDb = new RMuseumDbContext(new DbContextOptions <RMuseumDbContext>())) { job.EndTime = DateTime.Now; job.Status = ImportJobStatus.Failed; job.Exception = exp.ToString(); importJobUpdaterDb.Update(job); await importJobUpdaterDb.SaveChangesAsync(); } } } ); return(new RServiceResult <bool>(true)); } catch (Exception exp) { return(new RServiceResult <bool>(false, exp.ToString())); } }