protected async Task <Guid> LoadBlob(IPublishEndpoint endpoint, Guid userId, string bucket, string fileName, string contentType = "application/octet-stream", IDictionary <string, object> metadata = null) { var path = Path.Combine(Directory.GetCurrentDirectory(), "Resources", fileName); if (!File.Exists(path)) { throw new FileNotFoundException(path); } var blobId = await _blobStorage.AddFileAsync(fileName, File.OpenRead(path), contentType, bucket, metadata); var blobInfo = await _blobStorage.GetFileInfo(blobId, bucket); await endpoint.Publish <BlobLoaded>(new { //CorrelationId = blobInfo.Metadata != null ? blobInfo.Metadata.ContainsKey("correlationId") ? new Guid(blobInfo.Metadata["correlationId"].ToString()) : Guid.Empty, BlobInfo = new LoadedBlobInfo(blobId, fileName, blobInfo.Length, userId, blobInfo.UploadDateTime, blobInfo.MD5, bucket, blobInfo.Metadata), TimeStamp = DateTimeOffset.UtcNow }); //Thread.Sleep(100); Log.Debug($"BlobLoaded: {fileName}; BlobId: {blobId}"); return(blobId); }
public async Task AddBlob(Guid blobId, Guid userId, string bucket, string fileName, IDictionary <string, object> metadata = null) { var path = Path.Combine(Directory.GetCurrentDirectory(), "Resources", fileName); if (File.Exists(path)) { await _blobStorage.AddFileAsync(blobId, fileName, File.OpenRead(path), "application/octet-stream", bucket, metadata); } }
public async Task Consume(ConsumeContext <GeneratePdfFromHtml> context) { var message = context.Message; try { WebClient client = new WebClient(); Stream stream = client.OpenRead(message.Url); string title = "no-title"; using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { var content = reader.ReadToEnd(); title = Regex.Match(content, @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)\</title\>", RegexOptions.IgnoreCase).Groups["Title"].Value; stream.Flush(); } var tempFileName = Path.GetTempFileName(); var url = context.Message.Url; var pdfBytes = HtmlToPdf.GetPdfAsByteArray(message.Url); var dataStream = new MemoryStream(pdfBytes); dataStream.Seek(0, SeekOrigin.Begin); var blobId = Guid.NewGuid(); await blobStorage.AddFileAsync(blobId, $"{blobId}.pdf", dataStream, "application/pdf", context.Message.Bucket); var blobInfo = await blobStorage.GetFileInfo(blobId, context.Message.Bucket); await context.Publish <PdfGenerated>(new { Id = NewId.NextGuid(), CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, Bucket = context.Message.Bucket, Title = title, BlobId = blobId, PageId = context.Message.Id, Lenght = blobInfo.Length, Md5 = blobInfo.MD5 }); } catch (Exception e) { await context.Publish <PdfGenerationFailed>(new { Id = NewId.NextGuid(), CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, Message = $"Can not get pdf from url {context.Message.Url}. Details: {e.Message}" }); } }
public async Task Send_command_to_parse_valid_cif_should_publish_one_RecordParsed_and_one_FileParsed_event([Frozen] FileParsedEvent expectedEvent) { try { await _harness.Start(); var blobId = await _blobStorage.AddFileAsync("1100110.cif", Resource._1100110, "chemical/x-cif", BUCKET); await _harness.InputQueueSendEndpoint.Send <ParseFile>(new { expectedEvent.Id, Bucket = BUCKET, BlobId = blobId, expectedEvent.CorrelationId, expectedEvent.UserId }); _consumer.Consumed.Select <ParseFile>().Any(); _harness.Published.Select <FileParsed>().Any(); var allEvents = _harness.Published.ToList(); var parsed = allEvents.Select <RecordParsed>().FirstOrDefault(); parsed.Should().NotBeNull(); parsed.ShouldBeEquivalentTo(new { FileId = expectedEvent.Id, Bucket = BUCKET, Index = 0L, expectedEvent.UserId }, options => options.ExcludingMissingMembers() ); parsed.Fields.Count().Should().Be(22); } finally { await _harness.Stop(); } }
public async Task Send_command_to_parse_valid_rxn_should_publish_one_RecordParsed_one_FileParsed_event([Frozen] FileParsedEvent expectedEvent) { try { await _harness.Start(); var blobId = await _blobStorage.AddFileAsync("10001.rxn", Resource._10001, "chemical/x-mdl-rxnfile", BUCKET); await _harness.InputQueueSendEndpoint.Send <ParseFile>(new { expectedEvent.Id, Bucket = BUCKET, BlobId = blobId, expectedEvent.CorrelationId, expectedEvent.UserId }); var res = _consumer.Consumed.Select <ParseFile>().Any(); res.Should().BeTrue(); var allEvents = _harness.Published.ToList(); allEvents.Where(e => e.MessageType == typeof(RecordParsed)).Count().Should().Be(1); var parsed = allEvents.Select <FileParsed>().FirstOrDefault(); parsed.Should().NotBeNull(); parsed.Should().BeEquivalentTo(expectedEvent, options => options .Excluding(p => p.TimeStamp) .Excluding(p => p.Fields) .Excluding(p => p.TotalRecords) ); parsed.TotalRecords.Should().Be(1); } finally { await _harness.Stop(); } }
public async Task <IActionResult> Post() { string bucket = User.FindFirst("sub").Value; Log.Debug($"Request to standardization"); if (!IsMultipartContentType(Request.ContentType)) { return(new UnsupportedMediaTypeResult()); } Log.Debug($"POSTing files..."); var boundary = HeaderUtilities.RemoveQuotes(MediaTypeHeaderValue.Parse(Request.ContentType).Boundary); var reader = new MultipartReader(boundary.Value, Request.Body); MultipartSection section; while ((section = await reader.ReadNextSectionAsync()) != null) { var contentDisposition = section.GetContentDispositionHeader(); if (contentDisposition.IsFileDisposition()) { var fileSection = section.AsFileSection(); if (fileSection.FileName.ToLower().EndsWith(".mol")) { Log.Debug($"Saving file {fileSection.FileName}"); var blobId = await _blobStorage.AddFileAsync(fileSection.FileName, fileSection.FileStream, fileSection.Section.ContentType, bucket); await _bus.Publish <Standardize>(new { Id = NewId.Next(), Bucket = bucket, BlobId = blobId, CorrelationId = Guid.Empty, UserId = UserId }); return(CreatedAtRoute("GetStandardization", new { id = blobId }, null)); //uploading only one file and return. } } } return(BadRequest()); }
public async Task Consume(ConsumeContext <ValidateStandardize> context) { try { using (var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket)) { StreamReader reader = new StreamReader(blob.GetContentAsStream()); string mol = reader.ReadToEnd(); var validResult = validation.Validate(mol); var standardResult = standardization.Standardize(mol); var newId = Guid.NewGuid(); var issues = standardResult.Issues.Concat(validResult.Issues); var record = new StandardizedValidatedRecord { StandardizedId = newId, Issues = IssuesResolver.ResolveIssues(issues, issuesConfig) }; var bucket = context.Message.Id.ToString(); await blobStorage.AddFileAsync(newId, $"{newId}.mol", new MemoryStream(Encoding.UTF8.GetBytes(standardResult.Standardized)), "chemical/x-mdl-molfile", bucket); await context.Publish <ValidatedStandardized>(new { Id = context.Message.Id, Record = record, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId }); } } catch (Exception ex) { await context.Publish <StandardizationValidationFailed>(new { Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId, Message = $"Blob with id {context.Message.BlobId} from bucket {context.Message.Bucket} can not be validated and standardized or not found. Error: {ex.Message}" }); } }
public async Task <IActionResult> Post() { string bucket = UserId.ToString(); Log.Information($"Request to calculate ChemicalProperties"); if (!IsMultipartContentType(Request.ContentType)) { return(new UnsupportedMediaTypeResult()); } Log.Information($"POSTing files..."); var boundary = HeaderUtilities.RemoveQuotes(MediaTypeHeaderValue.Parse(Request.ContentType).Boundary); var reader = new MultipartReader(boundary.Value, Request.Body); MultipartSection section; while ((section = await reader.ReadNextSectionAsync()) != null) { var contentDisposition = section.GetContentDispositionHeader(); if (contentDisposition.IsFileDisposition()) { var fileSection = section.AsFileSection(); if (fileSection.FileName.ToLower().EndsWith(".mol")) { Log.Information($"Saving file {fileSection.FileName}"); var id = await _blobStorage.AddFileAsync(fileSection.FileName, fileSection.FileStream, fileSection.Section.ContentType, bucket); //await _commandSender.Send(new CalculateChemicalProperties(Guid.NewGuid(), Guid.NewGuid(), UserId, bucket, id)); return(CreatedAtRoute("Get", new { id = id }, null)); //uploading only one file and return. } } } return(BadRequest()); }
public async Task Consume(ConsumeContext <ConvertToPdf> context) { var blobId = Guid.NewGuid(); var path = Path.Combine(Directory.GetCurrentDirectory(), "Resources", "Abdelaziz A Full_manuscript.pdf"); if (!System.IO.File.Exists(path)) { throw new FileNotFoundException(path); } await _blobStorage.AddFileAsync(blobId, $"{blobId}.pdf", System.IO.File.OpenRead(path), "application/pdf", context.Message.Bucket); await context.Publish <ConvertedToPdf>(new { Bucket = context.Message.Bucket, BlobId = blobId, Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId }); }
public async Task <IActionResult> UploadFiles() { if (!IsMultipartContentType(Request.ContentType)) { return(new UnsupportedMediaTypeResult()); } Log.Information($"POSTing files..."); var boundary = HeaderUtilities.RemoveQuotes(MediaTypeHeaderValue.Parse(Request.ContentType).Boundary); var reader = new MultipartReader(boundary.Value, Request.Body); MultipartSection section; IList <Image> imagesRequest = null; var result = new List <FileImages>(); while ((section = await reader.ReadNextSectionAsync()) != null) { var contentDisposition = section.GetContentDispositionHeader(); if (contentDisposition.IsFormDisposition() && imagesRequest is null) { var formSection = section.AsFormDataSection(); string formValue = await formSection.GetValueAsync(); imagesRequest = JsonConvert.DeserializeObject <IList <Image> >(formValue, new JsonSerializerSettings { Error = delegate(object sender, Newtonsoft.Json.Serialization.ErrorEventArgs args) { args.ErrorContext.Handled = true; } }); } if (contentDisposition.IsFileDisposition()) { var fileSection = section.AsFileSection(); if (string.IsNullOrEmpty(fileSection.FileName)) { Log.Information($"Empty file section"); continue; } Log.Information($"Saving file {fileSection.FileName}"); var blobId = await _blobStorage.AddFileAsync(Path.GetFileName(fileSection.FileName), fileSection.FileStream, fileSection.Section.ContentType, _bucket); if (imagesRequest is null || !imagesRequest.Any()) { imagesRequest = new Image[] { new Image { Width = int.Parse(_configuration["DefaultImage:Width"]), Format = _configuration["DefaultImage:Format"], Height = int.Parse(_configuration["DefaultImage:Height"]) } }; } foreach (var requestedImage in imagesRequest) { requestedImage.Id = NewId.Next().ToGuid(); if (requestedImage.Width <= 0) { requestedImage.Width = int.Parse(_configuration["DefaultImage:Width"]); } if (requestedImage.Height <= 0) { requestedImage.Height = int.Parse(_configuration["DefaultImage:Height"]); } } var fileImages = new FileImages { Id = blobId, Images = imagesRequest, Bucket = _bucket }; //await _imagesMetaCollection.InsertOneAsync(fileImages); foreach (var requestedImage in imagesRequest) { await _bus.Publish <GenerateImage>(new { Id = requestedImage.Id, Bucket = _bucket, BlobId = blobId, Image = requestedImage, UserId = new Guid(_bucket) }); } result.Add(fileImages); } } return(Ok(result)); }
public async Task Consume(ConsumeContext <ProcessWebPage> context) { var message = context.Message; switch (message.Url) { case "https://en.wikipedia.org/wiki/Aspirin": { var blobId = NewId.NextGuid(); var path = Path.Combine(Directory.GetCurrentDirectory(), "Resources", "wiki.json"); if (!System.IO.File.Exists(path)) { throw new FileNotFoundException(path); } await _blobStorage.AddFileAsync(blobId, "wiki.mol", System.IO.File.OpenRead(path), "application/json", message.Bucket); var blobInfo = await _blobStorage.GetFileInfo(blobId, message.Bucket); await context.Publish <WebPageProcessed>(new { Id = message.Id, CorrelationId = message.CorrelationId, UserId = message.UserId, BlobId = (Guid)blobId, Bucket = message.Bucket }); break; } case "http://www.chemspider.com/Chemical-Structure.2157.html?rid=d8424976-d183-431d-9d19-b663a5c4b1df": { var blobId = NewId.NextGuid(); var path = Path.Combine(Directory.GetCurrentDirectory(), "Resources", "chemspider.json"); if (!System.IO.File.Exists(path)) { throw new FileNotFoundException(path); } await _blobStorage.AddFileAsync(blobId, "chemspider.mol", System.IO.File.OpenRead(path), "application/json", message.Bucket); var blobInfo = await _blobStorage.GetFileInfo(blobId, message.Bucket); await context.Publish <WebPageProcessed>(new { Id = message.Id, CorrelationId = message.CorrelationId, UserId = message.UserId, BlobId = (Guid)blobId, Bucket = message.Bucket }); break; } case "http://lifescience.opensource.epam.com/indigo/api/#loading-molecules-and-query-molecules": { var blobId = NewId.NextGuid(); var path = Path.Combine(Directory.GetCurrentDirectory(), "Resources", "Generic.pdf"); if (!System.IO.File.Exists(path)) { throw new FileNotFoundException(path); } await _blobStorage.AddFileAsync(blobId, "Generic.pdf", System.IO.File.OpenRead(path), "application/json", message.Bucket); var blobInfo = await _blobStorage.GetFileInfo(blobId, message.Bucket); await context.Publish <WebPageProcessed>(new { Id = message.Id, CorrelationId = message.CorrelationId, UserId = message.UserId, BlobId = (Guid)blobId, Bucket = message.Bucket }); break; } default: { await context.Publish <WebPageProcessFailed>(new { Id = message.Id, CorrelationId = message.CorrelationId, UserId = message.UserId, Message = $"Cannot import file. Format is not supported." }); break; } } }
public async Task Handle(ParseFile message, CancellationToken token) { try { var blob = await blobStorage.GetFileAsync(message.BlobId, message.Bucket); string txtData = null; Dictionary <string, byte[]> images = null; //tables = null; switch (Path.GetExtension(blob.Info.FileName).ToLower()) { case ".pdf": if (((OperationType)message.ByteTypes & OperationType.Text) != 0) { txtData = PdfImporter.GetText(blob.GetContentAsStream(), blob.Info.FileName); } if (((OperationType)message.ByteTypes & OperationType.Images) != 0) { images = PdfImporter.GetImagesAsBytes(blob.GetContentAsStream(), blob.Info.FileName); } if (((OperationType)message.ByteTypes & OperationType.Tables) != 0) { //tables } break; default: await eventPublisher.Publish(new FileParseFailed(message.Id, message.CorrelationId, message.UserId, $"Cannot find file parser for {blob.Info.FileName}")); break; } string bucket = message.Bucket; if (txtData != null) { var txtFileId = Guid.NewGuid(); await blobStorage.AddFileAsync(txtFileId, $"Text from {blob.Info.FileName}.txt", new MemoryStream(Encoding.UTF8.GetBytes(txtData)), "text/plain", bucket); await eventPublisher.Publish(new TextExported(message.Id, message.CorrelationId, message.UserId, txtFileId)); } if (images != null && images.Count != 0) { var imgCount = 0; foreach (var img in images) { if (img.Value != null) { var imgId = Guid.NewGuid(); await blobStorage.AddFileAsync(imgId, $"{img.Key}", new MemoryStream(img.Value), "image/jpeg", bucket); imgCount++; await eventPublisher.Publish(new ImageExported(imgId, message.CorrelationId, message.UserId, message.BlobId, imgCount)); } } } await eventPublisher.Publish(new FileParsed(message.Id, message.CorrelationId, message.UserId, message.ByteTypes)); } catch (Exception e) { await eventPublisher.Publish(new FileParseFailed(message.Id, message.CorrelationId, message.UserId, $"Cannot parse pdf file from bucket {message.Bucket} with Id {message.BlobId}. Error: {e.Message}")); } }
public async Task <IActionResult> Post(string bucket) { if (!IsMultipartContentType(Request.ContentType)) { return(BadRequest()); } var ids = new List <Guid>(); Log.Information($"POSTing files..."); var boundary = HeaderUtilities.RemoveQuotes(MediaTypeHeaderValue.Parse(Request.ContentType).Boundary); var reader = new MultipartReader(boundary.Value, Request.Body); MultipartSection section; IDictionary <string, object> metadata = new Dictionary <string, object>(); bool isFileLoaded = false; while ((section = await reader.ReadNextSectionAsync()) != null) { var contentDisposition = section.GetContentDispositionHeader(); if (contentDisposition.IsFormDisposition()) { if (isFileLoaded) //clear metada accumulator if file loaded, and start new metadata collection { metadata.Clear(); isFileLoaded = false; } var formDataSection = section.AsFormDataSection(); string key = formDataSection.Name; string value = await formDataSection.GetValueAsync(); if (!string.Equals(value, "null", StringComparison.OrdinalIgnoreCase)) { metadata.Add(key, value); } } if (contentDisposition.IsFileDisposition()) { var fileSection = section.AsFileSection(); Log.Information($"Saving file {fileSection.FileName}"); var id = await _blobStorage.AddFileAsync(Path.GetFileName(fileSection.FileName), fileSection.FileStream, fileSection.Section.ContentType, bucket, metadata); ids.Add(id); var blobInfo = await _blobStorage.GetFileInfo(id, bucket); await _bus.Publish <BlobLoaded>(new { BlobInfo = new LoadedBlobInfo(id, fileSection.FileName, blobInfo.Length, UserID, blobInfo.UploadDateTime, blobInfo.MD5, bucket, metadata), TimeStamp = DateTimeOffset.UtcNow }); isFileLoaded = true; } } return(Ok(ids)); }
public async Task Consume(ConsumeContext <ProcessWebPage> context) { var message = context.Message; try { string content = ""; var meta = new Dictionary <string, object>(); if (message.Url.ToLower().Contains("chemspider")) { var cs = new Chemspider(new List <string> { message.Url }); content = cs.Content; meta = cs.Meta; } if (message.Url.Contains("wikipedia")) { try { var wiki = new Wikipedia(new List <string> { message.Url }); content = wiki.Content; meta = wiki.Meta; } catch (Exception e) { // } } if (content != "" && content != "{}") { Guid blobId = Guid.NewGuid(); string fileName = $"{blobId}.json"; await blobStorage.AddFileAsync(blobId, fileName, new MemoryStream(Encoding.UTF8.GetBytes(content)), "application/json", message.Bucket, meta); await context.Publish <WebPageProcessed>(new { Id = message.Id, CorrelationId = message.CorrelationId, UserId = message.UserId, BlobId = blobId, Bucket = message.Bucket }); } else { await context.Publish <WebPageProcessed>(new { Id = message.Id, CorrelationId = message.CorrelationId, UserId = message.UserId }); } } catch (Exception e) { await context.Publish <WebPageProcessFailed>(new { Id = message.Id, CorrelationId = message.CorrelationId, UserId = message.UserId, Message = e.Message }); } }
public async Task Consume(ConsumeContext <ParseFile> context) { var blob = await _blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket); switch (blob.Info.FileName.ToLower()) { case "ringcount_0.mol": { var blobId = Guid.NewGuid(); var startTime = DateTimeOffset.UtcNow; await _blobStorage.AddFileAsync(blobId, $"{blobId}.mol", blob.GetContentAsStream(), "chemical/x-mdl-molfile", context.Message.Bucket); await context.Publish <FileParsed>(new { Id = context.Message.Id, TotalRecords = 1, ParsedRecords = 0, FailedRecords = 1, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = startTime }); await context.Publish <RecordParseFailed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Index = 0, Message = "molfile loader: ring bond count is allowed only for queries", CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = startTime }); } break; case "aspirin.mol": { var blobId = Guid.NewGuid(); await _blobStorage.AddFileAsync(blobId, $"{blobId}.mol", blob.GetContentAsStream(), "chemical/x-mdl-molfile", context.Message.Bucket); await context.Publish <RecordParsed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Index = 0, Fields = new Field[] { new Field("StdInChI", "InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)"), new Field("StdInChIKey", "BSYNRYMUTXBXSQ-UHFFFAOYSA-N"), new Field("SMILES", "CC(OC1=C(C(=O)O)C=CC=C1)=O") }, Bucket = context.Message.Bucket, BlobId = blobId, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); await context.Publish <FileParsed>(new { Id = context.Message.Id, TotalRecords = 1, Fields = new string[] { "StdInChI", "StdInChIKey", "SMILES" }, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); break; } case "test_solubility.sdf": { await context.Publish <RecordParseFailed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Index = 0, Message = "sdffile loader: could not process file", CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); var blobId = Guid.NewGuid(); await _blobStorage.AddFileAsync(blobId, $"{blobId}.mol", blob.GetContentAsStream(), "chemical/x-mdl-molfile", context.Message.Bucket); await context.Publish <RecordParsed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Index = 1, Fields = new Field[] { new Field("StdInChI", "InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)"), new Field("StdInChIKey", "BSYNRYMUTXBXSQ-UHFFFAOYSA-N"), new Field("SMILES", "CC(OC1=C(C(=O)O)C=CC=C1)=O") }, Bucket = context.Message.Bucket, BlobId = blobId, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); await context.Publish <FileParsed>(new { Id = context.Message.Id, TotalRecords = 2, ParsedRecords = 1, FailedRecords = 1, Fields = new string[] { "StdInChI", "StdInChIKey", "SMILES" }, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); } break; case "invalid_sdf_with_20_records_where_first_and_second_are_invalid.sdf": { await context.Publish <RecordParseFailed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Index = 0, Message = "sdffile loader: could not process file", CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); await context.Publish <RecordParseFailed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Index = 1, Message = "sdffile loader: could not process file", CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); for (var i = 2; i < 20; i++) { var blobId = Guid.NewGuid(); await _blobStorage.AddFileAsync(blobId, $"{blobId}.mol", blob.GetContentAsStream(), "chemical/x-mdl-molfile", context.Message.Bucket); await context.Publish <RecordParsed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Index = i, Fields = new Field[] { new Field("StdInChI", "InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)"), new Field("StdInChIKey", "BSYNRYMUTXBXSQ-UHFFFAOYSA-N"), new Field("SMILES", "CC(OC1=C(C(=O)O)C=CC=C1)=O") }, Bucket = context.Message.Bucket, BlobId = blobId, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); } await context.Publish <FileParsed>(new { Id = context.Message.Id, TotalRecords = 20, ParsedRecords = 19, FailedRecords = 1, Fields = new string[] { "StdInChI", "StdInChIKey", "SMILES" }, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); } break; case "drugbank_10_records.sdf": case "combined lysomotrophic.sdf": { var totalRecords = 2; for (var i = 0; i < totalRecords; i++) { var blobId = Guid.NewGuid(); await _blobStorage.AddFileAsync(blobId, $"{blobId}.mol", blob.GetContentAsStream(), "chemical/x-mdl-molfile", context.Message.Bucket); await context.Publish <RecordParsed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Index = i, Fields = new Field[] { new Field("StdInChI", $"StdInChI-{i}"), new Field("StdInChIKey", $"StdInChIKey-{i}"), new Field("SMILES", $"SMILES-{i}") }, Bucket = context.Message.Bucket, BlobId = blobId, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); } await context.Publish <FileParsed>(new { Id = context.Message.Id, TotalRecords = totalRecords, Fields = new string[] { "StdInChI", "StdInChIKey", "SMILES" }, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); break; } case "125_11mos.cdx": { var totalRecords = 3; for (var i = 0; i < totalRecords; i++) { var blobId = Guid.NewGuid(); await _blobStorage.AddFileAsync(blobId, $"{blobId}.mol", blob.GetContentAsStream(), "chemical/x-mdl-molfile", context.Message.Bucket); await context.Publish <RecordParsed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Index = i, Fields = new Field[] {}, Bucket = context.Message.Bucket, BlobId = blobId, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); } await context.Publish <FileParsed>(new { Id = context.Message.Id, TotalRecords = totalRecords, Fields = new string[] {}, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); break; } default: await context.Publish <FileParseFailed>(new { Id = context.Message.Id, Message = $"Cannot parse chemical file {blob.Info.FileName}. Format is not supported.", CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); break; } }
public async Task Consume(ConsumeContext <ParseFile> context) { long totalRecords = 0; try { var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket); IEnumerable <Record> records = null; switch (Path.GetExtension(blob.Info.FileName).ToLower()) { case ".rdf": case ".rxn": records = new RdfParser.RdfParser(blob.GetContentAsStream()); break; default: await context.Publish <FileParseFailed>(new { Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId, Message = $"Cannot find file parser for {blob.Info.FileName}" }); break; } string bucket = context.Message.Bucket; List <string> fields = new List <string>(); var e = records.GetEnumerator(); while (e.MoveNext()) { totalRecords++; try { var record = e.Current; var blobId = NewId.NextGuid(); fields.AddRange(record.Properties.Select(p => p.Name).Where(n => !fields.Contains(n)).ToList()); await blobStorage.AddFileAsync(blobId, $"{blobId}.rxn", new MemoryStream(Encoding.UTF8.GetBytes(record.Data)), "chemical/x-mdl-rxnfile", bucket); await context.Publish <RecordParsed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Bucket = bucket, BlobId = blobId, Index = record.Index, Fields = record.Properties?.Select(p => new Field(p.Name, p.Value)), UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId }); } catch (Exception ex) { await context.Publish <RecordParseFailed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId, Message = $"Cannot parse reaction record #{totalRecords} from file {context.Message.Id}. Error: {ex.Message}" }); } // temporary limitation: we don't want to process more than 100 records inside any file if (totalRecords >= 100) { break; } } await context.Publish <FileParsed>(new { Id = context.Message.Id, TotalRecords = totalRecords, Fields = fields, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId }); } catch (Exception e) { await context.Publish <FileParseFailed>(new { Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId, Message = $"Cannot parse reaction file from bucket {context.Message.Bucket} with Id {context.Message.Id}. Error: {e.Message}" }); } }
public async Task Consume(ConsumeContext <ConvertToPdf> context) { try { var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket); Stream data = null; IConvert converter = null; var tempFilePath = Path.GetTempFileName(); using (var fileStream = File.Create(tempFilePath)) { blob.GetContentAsStream().CopyTo(fileStream); } using (FileStream fs = new FileStream(tempFilePath, FileMode.Open, FileAccess.ReadWrite, FileShare.None, 4096, FileOptions.RandomAccess | FileOptions.DeleteOnClose)) { switch (Path.GetExtension(blob.Info.FileName).ToLower()) { case ".doc": case ".docx": case ".odt": converter = new DocToPdf(); data = converter.Convert(fs); break; case ".xls": case ".xlsx": case ".ods": converter = new XlsToPdf(); data = converter.Convert(fs); break; case ".ppt": case ".pptx": case ".odp": converter = new PptToPdf(); data = converter.Convert(fs); break; default: await context.Publish <ConvertToPdfFailed>(new { Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId, Message = $"Cannot find file converter for {blob.Info.FileName}" }); break; } string bucket = context.Message.Bucket; if (data != null) { var blobId = Guid.NewGuid(); data.Seek(0, SeekOrigin.Begin); await blobStorage.AddFileAsync(blobId, $"{blobId}.pdf", data, "application/pdf", bucket); await context.Publish <ConvertedToPdf>(new { Bucket = bucket, BlobId = blobId, Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId }); } } } catch (Exception e) { await context.Publish <ConvertToPdfFailed>(new { Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId, Message = $"Cannot convert file to pdf from bucket {context.Message.Bucket} with Id {context.Message.BlobId}. Error: {e.Message}" }); } }
public async Task Consume(ConsumeContext <GenerateImage> context) { try { var blobInfo = await _blobStorage.GetFileInfo(context.Message.BlobId, context.Message.Bucket); string extension = Path.GetExtension(blobInfo.FileName); if (!FileRasterizer.Supports(extension)) { throw new InvalidDataException($"Unsupported file type {extension}"); } using (var stream = new MemoryStream()) { await _blobStorage.DownloadFileToStreamAsync(context.Message.BlobId, stream, context.Message.Bucket); stream.Position = 0; byte[] imageBytes; if (context.Message.Image.Format.ToLower() != "svg") { var format = context.Message.Image.Format.ParseImageFormat(); var rasterizer = new FileRasterizer(); Log.Information($"Rasterizing source '{context.Message.BlobId}'"); var image = rasterizer.Rasterize(stream, extension); image = image.Scale(context.Message.Image.Width, context.Message.Image.Height); imageBytes = image.Convert(format); } else { string data = System.Text.Encoding.ASCII.GetString(stream.ToArray()); switch (extension.ToLower()) { case ".mol": imageBytes = new IndigoAdapter().Mol2Image(data, context.Message.Image.Format, context.Message.Image.Width, context.Message.Image.Height); break; case ".rxn": imageBytes = new IndigoAdapter().Rxn2Image(data, context.Message.Image.Format, context.Message.Image.Width, context.Message.Image.Height); break; default: throw new InvalidDataException($"Unsupported file type {extension} for {context.Message.Image.Format} generation"); } } Log.Information($"Saving image file {context.Message.Image.Id} as {context.Message.Image.Format}"); await _blobStorage.AddFileAsync( id : context.Message.Image.Id, fileName : $"{blobInfo.FileName}.{$"{context.Message.Image.Format}".ToLower()}", source : imageBytes, contentType : $"{context.Message.Image.Format}".GetMimeType(), bucketName : context.Message.Bucket, metadata : new Dictionary <string, object> { { "SourceId", context.Message.BlobId } } ); Log.Information($"Image file {context.Message.Image.Id} as {context.Message.Image.Format} saved."); } context.Message.Image.MimeType = context.Message.Image.Format.GetMimeType(); await context.Publish <ImageGenerated>(new { Id = context.Message.Id, Bucket = context.Message.Bucket, BlobId = context.Message.BlobId, Image = context.Message.Image, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); } catch (Exception e) { context.Message.Image.Exception = e.Message; await context.Publish <ImageGenerationFailed>(new { Id = context.Message.Id, Image = context.Message.Image, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); } }
public async Task Consume(ConsumeContext <ParseWebPage> context) { var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket); var importedFrom = blob.Info.Metadata["ImportedFrom"]; IEnumerable <Record> records = null; switch (importedFrom) { case "wikipedia.org": records = new WikipediaReader(blob.GetContentAsStream()); break; default: // break; } var bucket = context.Message.Bucket; long totalRecords = 0; List <string> fields = new List <string>(); foreach (var record in records) { var blobId = Guid.NewGuid(); var extension = ""; var mimetype = ""; switch (record.Type) { case RecordType.Chemical: extension = "mol"; mimetype = "chemical/x-mdl-molfile"; break; case RecordType.Crystal: extension = "cif"; mimetype = "chemical/x-cif"; break; case RecordType.Reaction: extension = "rxn"; mimetype = "chemical/x-mdl-rxn"; break; case RecordType.Spectrum: extension = "jdx"; mimetype = "chemical/x-jcamp-dx"; break; default: extension = "txt"; mimetype = "text/plain"; break; } await blobStorage.AddFileAsync(blobId, $"{blobId}.{extension}", new MemoryStream(Encoding.UTF8.GetBytes(record.Data == null ? "" : record.Data)), mimetype, bucket); fields.AddRange(record.Properties.Select(p => p.Name).Where(n => !fields.Contains(n)).ToList()); await context.Publish <RecordParsed>(new { Id = NewId.NextGuid(), CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, FileId = context.Message.Id, Bucket = bucket, BlobId = blobId, Index = record.Index, Fields = record.Properties?.Select(p => new Field(p.Name, p.Value)) }); totalRecords++; } await context.Publish <WebPageParsed>(new { Id = context.Message.Id, CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TotalRecords = totalRecords, Fields = fields }); }
public static async Task AddFileAsync(this IBlobStorage storage, Guid id, string fileName, byte[] source, string contentType = "application/octet-stream", string bucketName = null, IDictionary <string, object> metadata = null) { await storage.AddFileAsync(id, fileName, new MemoryStream(source), contentType, bucketName, metadata); }
public async Task Consume(ConsumeContext <ParseFile> context) { try { var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket); var fields = new List <string>(); IEnumerable <Record> records = null; switch (Path.GetExtension(blob.Info.FileName).ToLower()) { case ".dx": case ".jdx": records = new JcampReader(blob.GetContentAsStream()); break; default: await context.Publish <FileParseFailed>(new { Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId, Message = $"Cannot find file parser for {blob.Info.FileName}" }); break; } long totalRecords = 0; string bucket = context.Message.Bucket; foreach (var record in records) { var blobId = NewId.NextGuid(); fields.AddRange(record.Properties?.Select(p => p.Name).Where(n => !fields.Contains(n)).ToList()); await blobStorage.AddFileAsync(blobId, blobId + Path.GetExtension(blob.Info.FileName).ToLower(), new MemoryStream(Encoding.UTF8.GetBytes(record.Data)), "chemical/x-jcamp-dx", bucket); await context.Publish <RecordParsed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Bucket = bucket, BlobId = blobId, Index = record.Index, Fields = record.Properties?.Select(p => new Field(p.Name, p.Value)), UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId }); totalRecords++; } await context.Publish <FileParsed>(new { Id = context.Message.Id, TotalRecords = totalRecords, Fields = fields, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId }); } catch (Exception e) { await context.Publish <FileParseFailed>(new { Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId, Message = $"Cannot parse spectra file from bucket {context.Message.Bucket} with Id {context.Message.BlobId}. Error: {e.Message}" }); } }
public async Task Consume(ConsumeContext <ParseFile> context) { var blob = await _blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket); switch (blob.Info.FileName.ToLower()) { case "13csample.jdx": await context.Publish <FileParseFailed>(new { Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId, Message = $"Cannot parse spectra file {blob.Info.FileName}." }); break; case "2-methyl-1-propanol.jdx": var blobId = Guid.NewGuid(); await _blobStorage.AddFileAsync(blobId, $"{blobId}.jdx", blob.GetContentAsStream(), "chemical/x-jcamp-dx", context.Message.Bucket); var fields = new Field[] { new Field("Field1", "Value1"), new Field("Field2", "Value2") }; await context.Publish <RecordParsed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Bucket = context.Message.Bucket, BlobId = blobId, Index = 0, Fields = fields, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId }); await context.Publish <FileParsed>(new { Id = context.Message.Id, TotalRecords = 1, Fields = fields.Select(f => f.Name), UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId }); break; default: await context.Publish <FileParseFailed>(new { Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId, Message = $"Cannot parse spectra file {blob.Info.FileName}. Format is not supported." }); break; } }
public async Task Consume(ConsumeContext <ParseFile> context) { var failedRecords = 0; var parsedRecords = 0; try { var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket); if (blob == null) { throw new FileNotFoundException($"Blob with Id {context.Message.BlobId} not found in bucket {context.Message.Bucket}"); } IEnumerable <Record> records = null; switch (Path.GetExtension(blob.Info.FileName).ToLower()) { case ".mol": case ".sdf": records = new SdfIndigoParser(blob.GetContentAsStream()); break; case ".cdx": records = new CdxParser.CdxParser(blob.GetContentAsStream()); break; default: await context.Publish <FileParseFailed>(new { Id = context.Message.Id, Message = $"Cannot parse chemical file {blob.Info.FileName}. Format is not supported.", CorrelationId = context.Message.CorrelationId, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); return; } var bucket = context.Message.Bucket; var index = 0; List <string> fields = new List <string>(); var enumerator = records.GetEnumerator(); while (enumerator.MoveNext()) { try { var record = enumerator.Current; var blobId = Guid.NewGuid(); await blobStorage.AddFileAsync(blobId, $"{blobId}.mol", new MemoryStream(Encoding.UTF8.GetBytes(record.Data)), "chemical/x-mdl-molfile", bucket); fields.AddRange(record.Properties.Select(p => p.Name).Where(n => !fields.Contains(n)).ToList()); await context.Publish <RecordParsed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Index = index, Fields = record.Properties?.Select(p => new Field(p.Name, p.Value)), Bucket = bucket, BlobId = blobId, context.Message.CorrelationId, context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); parsedRecords++; } catch (Exception ex) { await context.Publish <RecordParseFailed>(new { Id = NewId.NextGuid(), FileId = context.Message.Id, Index = index, ex.Message, context.Message.CorrelationId, context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); failedRecords++; } index++; // temporary limitation: we don't want to process more than 100 records inside any file if (index >= 100) { break; } } await context.Publish <FileParsed>(new { context.Message.Id, FailedRecords = failedRecords, ParsedRecords = parsedRecords, TotalRecords = parsedRecords + failedRecords, Fields = fields, context.Message.CorrelationId, context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); } catch (Exception ex) { await context.Publish <FileParseFailed>(new { context.Message.Id, FailedRecords = failedRecords, ParsedRecords = parsedRecords, TotalRecords = parsedRecords + failedRecords, ex.Message, context.Message.CorrelationId, context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow }); } }