Esempio n. 1
0
        protected async Task <Guid> LoadBlob(IPublishEndpoint endpoint, Guid userId, string bucket, string fileName, string contentType = "application/octet-stream", IDictionary <string, object> metadata = null)
        {
            var path = Path.Combine(Directory.GetCurrentDirectory(), "Resources", fileName);

            if (!File.Exists(path))
            {
                throw new FileNotFoundException(path);
            }

            var blobId = await _blobStorage.AddFileAsync(fileName, File.OpenRead(path), contentType, bucket, metadata);

            var blobInfo = await _blobStorage.GetFileInfo(blobId, bucket);

            await endpoint.Publish <BlobLoaded>(new
            {
                //CorrelationId = blobInfo.Metadata != null ? blobInfo.Metadata.ContainsKey("correlationId") ? new Guid(blobInfo.Metadata["correlationId"].ToString()) : Guid.Empty,
                BlobInfo  = new LoadedBlobInfo(blobId, fileName, blobInfo.Length, userId, blobInfo.UploadDateTime, blobInfo.MD5, bucket, blobInfo.Metadata),
                TimeStamp = DateTimeOffset.UtcNow
            });

            //Thread.Sleep(100);

            Log.Debug($"BlobLoaded: {fileName}; BlobId: {blobId}");

            return(blobId);
        }
Esempio n. 2
0
        public async Task AddBlob(Guid blobId, Guid userId, string bucket, string fileName, IDictionary <string, object> metadata = null)
        {
            var path = Path.Combine(Directory.GetCurrentDirectory(), "Resources", fileName);

            if (File.Exists(path))
            {
                await _blobStorage.AddFileAsync(blobId, fileName, File.OpenRead(path), "application/octet-stream", bucket, metadata);
            }
        }
Esempio n. 3
0
        public async Task Consume(ConsumeContext <GeneratePdfFromHtml> context)
        {
            var message = context.Message;

            try
            {
                WebClient client = new WebClient();
                Stream    stream = client.OpenRead(message.Url);
                string    title  = "no-title";
                using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
                {
                    var content = reader.ReadToEnd();
                    title = Regex.Match(content, @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)\</title\>", RegexOptions.IgnoreCase).Groups["Title"].Value;
                    stream.Flush();
                }

                var tempFileName = Path.GetTempFileName();

                var url = context.Message.Url;

                var pdfBytes = HtmlToPdf.GetPdfAsByteArray(message.Url);

                var dataStream = new MemoryStream(pdfBytes);

                dataStream.Seek(0, SeekOrigin.Begin);

                var blobId = Guid.NewGuid();

                await blobStorage.AddFileAsync(blobId, $"{blobId}.pdf", dataStream, "application/pdf", context.Message.Bucket);

                var blobInfo = await blobStorage.GetFileInfo(blobId, context.Message.Bucket);

                await context.Publish <PdfGenerated>(new
                {
                    Id            = NewId.NextGuid(),
                    CorrelationId = context.Message.CorrelationId,
                    UserId        = context.Message.UserId,
                    Bucket        = context.Message.Bucket,
                    Title         = title,
                    BlobId        = blobId,
                    PageId        = context.Message.Id,
                    Lenght        = blobInfo.Length,
                    Md5           = blobInfo.MD5
                });
            }
            catch (Exception e)
            {
                await context.Publish <PdfGenerationFailed>(new
                {
                    Id            = NewId.NextGuid(),
                    CorrelationId = context.Message.CorrelationId,
                    UserId        = context.Message.UserId,
                    Message       = $"Can not get pdf from url {context.Message.Url}. Details: {e.Message}"
                });
            }
        }
Esempio n. 4
0
        public async Task Send_command_to_parse_valid_cif_should_publish_one_RecordParsed_and_one_FileParsed_event([Frozen] FileParsedEvent expectedEvent)
        {
            try
            {
                await _harness.Start();

                var blobId = await _blobStorage.AddFileAsync("1100110.cif", Resource._1100110, "chemical/x-cif", BUCKET);

                await _harness.InputQueueSendEndpoint.Send <ParseFile>(new
                {
                    expectedEvent.Id,
                    Bucket = BUCKET,
                    BlobId = blobId,
                    expectedEvent.CorrelationId,
                    expectedEvent.UserId
                });

                _consumer.Consumed.Select <ParseFile>().Any();
                _harness.Published.Select <FileParsed>().Any();

                var allEvents = _harness.Published.ToList();

                var parsed = allEvents.Select <RecordParsed>().FirstOrDefault();
                parsed.Should().NotBeNull();
                parsed.ShouldBeEquivalentTo(new
                {
                    FileId = expectedEvent.Id,
                    Bucket = BUCKET,
                    Index  = 0L,
                    expectedEvent.UserId
                },
                                            options => options.ExcludingMissingMembers()
                                            );
                parsed.Fields.Count().Should().Be(22);
            }
            finally
            {
                await _harness.Stop();
            }
        }
Esempio n. 5
0
        public async Task Send_command_to_parse_valid_rxn_should_publish_one_RecordParsed_one_FileParsed_event([Frozen] FileParsedEvent expectedEvent)
        {
            try
            {
                await _harness.Start();

                var blobId = await _blobStorage.AddFileAsync("10001.rxn", Resource._10001, "chemical/x-mdl-rxnfile", BUCKET);

                await _harness.InputQueueSendEndpoint.Send <ParseFile>(new
                {
                    expectedEvent.Id,
                    Bucket = BUCKET,
                    BlobId = blobId,
                    expectedEvent.CorrelationId,
                    expectedEvent.UserId
                });

                var res = _consumer.Consumed.Select <ParseFile>().Any();
                res.Should().BeTrue();

                var allEvents = _harness.Published.ToList();

                allEvents.Where(e => e.MessageType == typeof(RecordParsed)).Count().Should().Be(1);

                var parsed = allEvents.Select <FileParsed>().FirstOrDefault();
                parsed.Should().NotBeNull();
                parsed.Should().BeEquivalentTo(expectedEvent,
                                               options => options
                                               .Excluding(p => p.TimeStamp)
                                               .Excluding(p => p.Fields)
                                               .Excluding(p => p.TotalRecords)
                                               );
                parsed.TotalRecords.Should().Be(1);
            }
            finally
            {
                await _harness.Stop();
            }
        }
Esempio n. 6
0
        public async Task <IActionResult> Post()
        {
            string bucket = User.FindFirst("sub").Value;

            Log.Debug($"Request to standardization");

            if (!IsMultipartContentType(Request.ContentType))
            {
                return(new UnsupportedMediaTypeResult());
            }

            Log.Debug($"POSTing files...");

            var boundary = HeaderUtilities.RemoveQuotes(MediaTypeHeaderValue.Parse(Request.ContentType).Boundary);
            var reader   = new MultipartReader(boundary.Value, Request.Body);

            MultipartSection section;

            while ((section = await reader.ReadNextSectionAsync()) != null)
            {
                var contentDisposition = section.GetContentDispositionHeader();

                if (contentDisposition.IsFileDisposition())
                {
                    var fileSection = section.AsFileSection();

                    if (fileSection.FileName.ToLower().EndsWith(".mol"))
                    {
                        Log.Debug($"Saving file {fileSection.FileName}");

                        var blobId = await _blobStorage.AddFileAsync(fileSection.FileName, fileSection.FileStream, fileSection.Section.ContentType, bucket);

                        await _bus.Publish <Standardize>(new
                        {
                            Id            = NewId.Next(),
                            Bucket        = bucket,
                            BlobId        = blobId,
                            CorrelationId = Guid.Empty,
                            UserId        = UserId
                        });

                        return(CreatedAtRoute("GetStandardization", new { id = blobId }, null));  //uploading only one file and return.
                    }
                }
            }
            return(BadRequest());
        }
        public async Task Consume(ConsumeContext <ValidateStandardize> context)
        {
            try
            {
                using (var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket))
                {
                    StreamReader reader = new StreamReader(blob.GetContentAsStream());
                    string       mol    = reader.ReadToEnd();

                    var validResult    = validation.Validate(mol);
                    var standardResult = standardization.Standardize(mol);

                    var newId  = Guid.NewGuid();
                    var issues = standardResult.Issues.Concat(validResult.Issues);

                    var record = new StandardizedValidatedRecord
                    {
                        StandardizedId = newId,
                        Issues         = IssuesResolver.ResolveIssues(issues, issuesConfig)
                    };
                    var bucket = context.Message.Id.ToString();

                    await blobStorage.AddFileAsync(newId, $"{newId}.mol", new MemoryStream(Encoding.UTF8.GetBytes(standardResult.Standardized)), "chemical/x-mdl-molfile", bucket);

                    await context.Publish <ValidatedStandardized>(new
                    {
                        Id            = context.Message.Id,
                        Record        = record,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow,
                        CorrelationId = context.Message.CorrelationId
                    });
                }
            }
            catch (Exception ex)
            {
                await context.Publish <StandardizationValidationFailed>(new
                {
                    Id            = context.Message.Id,
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow,
                    CorrelationId = context.Message.CorrelationId,
                    Message       = $"Blob with id {context.Message.BlobId} from bucket {context.Message.Bucket} can not be validated and standardized or not found. Error: {ex.Message}"
                });
            }
        }
Esempio n. 8
0
        public async Task <IActionResult> Post()
        {
            string bucket = UserId.ToString();

            Log.Information($"Request to calculate ChemicalProperties");

            if (!IsMultipartContentType(Request.ContentType))
            {
                return(new UnsupportedMediaTypeResult());
            }

            Log.Information($"POSTing files...");

            var boundary = HeaderUtilities.RemoveQuotes(MediaTypeHeaderValue.Parse(Request.ContentType).Boundary);
            var reader   = new MultipartReader(boundary.Value, Request.Body);

            MultipartSection section;

            while ((section = await reader.ReadNextSectionAsync()) != null)
            {
                var contentDisposition = section.GetContentDispositionHeader();

                if (contentDisposition.IsFileDisposition())
                {
                    var fileSection = section.AsFileSection();

                    if (fileSection.FileName.ToLower().EndsWith(".mol"))
                    {
                        Log.Information($"Saving file {fileSection.FileName}");

                        var id = await _blobStorage.AddFileAsync(fileSection.FileName, fileSection.FileStream, fileSection.Section.ContentType, bucket);

                        //await _commandSender.Send(new CalculateChemicalProperties(Guid.NewGuid(), Guid.NewGuid(), UserId, bucket, id));

                        return(CreatedAtRoute("Get", new { id = id }, null));  //uploading only one file and return.
                    }
                }
            }

            return(BadRequest());
        }
Esempio n. 9
0
        public async Task Consume(ConsumeContext <ConvertToPdf> context)
        {
            var blobId = Guid.NewGuid();

            var path = Path.Combine(Directory.GetCurrentDirectory(), "Resources", "Abdelaziz A Full_manuscript.pdf");

            if (!System.IO.File.Exists(path))
            {
                throw new FileNotFoundException(path);
            }

            await _blobStorage.AddFileAsync(blobId, $"{blobId}.pdf", System.IO.File.OpenRead(path), "application/pdf", context.Message.Bucket);

            await context.Publish <ConvertedToPdf>(new
            {
                Bucket        = context.Message.Bucket,
                BlobId        = blobId,
                Id            = context.Message.Id,
                UserId        = context.Message.UserId,
                TimeStamp     = DateTimeOffset.UtcNow,
                CorrelationId = context.Message.CorrelationId
            });
        }
Esempio n. 10
0
        public async Task <IActionResult> UploadFiles()
        {
            if (!IsMultipartContentType(Request.ContentType))
            {
                return(new UnsupportedMediaTypeResult());
            }

            Log.Information($"POSTing files...");

            var boundary = HeaderUtilities.RemoveQuotes(MediaTypeHeaderValue.Parse(Request.ContentType).Boundary);
            var reader   = new MultipartReader(boundary.Value, Request.Body);

            MultipartSection section;

            IList <Image> imagesRequest = null;
            var           result        = new List <FileImages>();

            while ((section = await reader.ReadNextSectionAsync()) != null)
            {
                var contentDisposition = section.GetContentDispositionHeader();
                if (contentDisposition.IsFormDisposition() && imagesRequest is null)
                {
                    var    formSection = section.AsFormDataSection();
                    string formValue   = await formSection.GetValueAsync();

                    imagesRequest = JsonConvert.DeserializeObject <IList <Image> >(formValue, new JsonSerializerSettings
                    {
                        Error = delegate(object sender, Newtonsoft.Json.Serialization.ErrorEventArgs args)
                        {
                            args.ErrorContext.Handled = true;
                        }
                    });
                }
                if (contentDisposition.IsFileDisposition())
                {
                    var fileSection = section.AsFileSection();
                    if (string.IsNullOrEmpty(fileSection.FileName))
                    {
                        Log.Information($"Empty file section");
                        continue;
                    }
                    Log.Information($"Saving file {fileSection.FileName}");

                    var blobId = await _blobStorage.AddFileAsync(Path.GetFileName(fileSection.FileName), fileSection.FileStream, fileSection.Section.ContentType, _bucket);

                    if (imagesRequest is null || !imagesRequest.Any())
                    {
                        imagesRequest = new Image[]
                        {
                            new Image
                            {
                                Width  = int.Parse(_configuration["DefaultImage:Width"]),
                                Format = _configuration["DefaultImage:Format"],
                                Height = int.Parse(_configuration["DefaultImage:Height"])
                            }
                        };
                    }

                    foreach (var requestedImage in imagesRequest)
                    {
                        requestedImage.Id = NewId.Next().ToGuid();

                        if (requestedImage.Width <= 0)
                        {
                            requestedImage.Width = int.Parse(_configuration["DefaultImage:Width"]);
                        }

                        if (requestedImage.Height <= 0)
                        {
                            requestedImage.Height = int.Parse(_configuration["DefaultImage:Height"]);
                        }
                    }

                    var fileImages = new FileImages
                    {
                        Id     = blobId,
                        Images = imagesRequest,
                        Bucket = _bucket
                    };

                    //await _imagesMetaCollection.InsertOneAsync(fileImages);

                    foreach (var requestedImage in imagesRequest)
                    {
                        await _bus.Publish <GenerateImage>(new
                        {
                            Id     = requestedImage.Id,
                            Bucket = _bucket,
                            BlobId = blobId,
                            Image  = requestedImage,
                            UserId = new Guid(_bucket)
                        });
                    }

                    result.Add(fileImages);
                }
            }

            return(Ok(result));
        }
Esempio n. 11
0
        public async Task Consume(ConsumeContext <ProcessWebPage> context)
        {
            var message = context.Message;

            switch (message.Url)
            {
            case "https://en.wikipedia.org/wiki/Aspirin":
            {
                var blobId = NewId.NextGuid();

                var path = Path.Combine(Directory.GetCurrentDirectory(), "Resources", "wiki.json");

                if (!System.IO.File.Exists(path))
                {
                    throw new FileNotFoundException(path);
                }

                await _blobStorage.AddFileAsync(blobId, "wiki.mol", System.IO.File.OpenRead(path), "application/json", message.Bucket);

                var blobInfo = await _blobStorage.GetFileInfo(blobId, message.Bucket);

                await context.Publish <WebPageProcessed>(new
                    {
                        Id            = message.Id,
                        CorrelationId = message.CorrelationId,
                        UserId        = message.UserId,
                        BlobId        = (Guid)blobId,
                        Bucket        = message.Bucket
                    });

                break;
            }

            case "http://www.chemspider.com/Chemical-Structure.2157.html?rid=d8424976-d183-431d-9d19-b663a5c4b1df":
            {
                var blobId = NewId.NextGuid();

                var path = Path.Combine(Directory.GetCurrentDirectory(), "Resources", "chemspider.json");

                if (!System.IO.File.Exists(path))
                {
                    throw new FileNotFoundException(path);
                }

                await _blobStorage.AddFileAsync(blobId, "chemspider.mol", System.IO.File.OpenRead(path), "application/json", message.Bucket);

                var blobInfo = await _blobStorage.GetFileInfo(blobId, message.Bucket);

                await context.Publish <WebPageProcessed>(new
                    {
                        Id            = message.Id,
                        CorrelationId = message.CorrelationId,
                        UserId        = message.UserId,
                        BlobId        = (Guid)blobId,
                        Bucket        = message.Bucket
                    });

                break;
            }

            case "http://lifescience.opensource.epam.com/indigo/api/#loading-molecules-and-query-molecules":
            {
                var blobId = NewId.NextGuid();

                var path = Path.Combine(Directory.GetCurrentDirectory(), "Resources", "Generic.pdf");

                if (!System.IO.File.Exists(path))
                {
                    throw new FileNotFoundException(path);
                }

                await _blobStorage.AddFileAsync(blobId, "Generic.pdf", System.IO.File.OpenRead(path), "application/json", message.Bucket);

                var blobInfo = await _blobStorage.GetFileInfo(blobId, message.Bucket);

                await context.Publish <WebPageProcessed>(new
                    {
                        Id            = message.Id,
                        CorrelationId = message.CorrelationId,
                        UserId        = message.UserId,
                        BlobId        = (Guid)blobId,
                        Bucket        = message.Bucket
                    });

                break;
            }

            default:
            {
                await context.Publish <WebPageProcessFailed>(new
                    {
                        Id            = message.Id,
                        CorrelationId = message.CorrelationId,
                        UserId        = message.UserId,
                        Message       = $"Cannot import file. Format is not supported."
                    });

                break;
            }
            }
        }
Esempio n. 12
0
        public async Task Handle(ParseFile message, CancellationToken token)
        {
            try
            {
                var blob = await blobStorage.GetFileAsync(message.BlobId, message.Bucket);

                string txtData = null;

                Dictionary <string, byte[]> images = null;

                //tables = null;


                switch (Path.GetExtension(blob.Info.FileName).ToLower())
                {
                case ".pdf":

                    if (((OperationType)message.ByteTypes & OperationType.Text) != 0)
                    {
                        txtData = PdfImporter.GetText(blob.GetContentAsStream(), blob.Info.FileName);
                    }

                    if (((OperationType)message.ByteTypes & OperationType.Images) != 0)
                    {
                        images = PdfImporter.GetImagesAsBytes(blob.GetContentAsStream(), blob.Info.FileName);
                    }

                    if (((OperationType)message.ByteTypes & OperationType.Tables) != 0)
                    {
                        //tables
                    }
                    break;

                default:
                    await eventPublisher.Publish(new FileParseFailed(message.Id, message.CorrelationId, message.UserId, $"Cannot find file parser for {blob.Info.FileName}"));

                    break;
                }

                string bucket = message.Bucket;

                if (txtData != null)
                {
                    var txtFileId = Guid.NewGuid();

                    await blobStorage.AddFileAsync(txtFileId, $"Text from {blob.Info.FileName}.txt", new MemoryStream(Encoding.UTF8.GetBytes(txtData)), "text/plain", bucket);

                    await eventPublisher.Publish(new TextExported(message.Id, message.CorrelationId, message.UserId, txtFileId));
                }

                if (images != null && images.Count != 0)
                {
                    var imgCount = 0;

                    foreach (var img in images)
                    {
                        if (img.Value != null)
                        {
                            var imgId = Guid.NewGuid();

                            await blobStorage.AddFileAsync(imgId, $"{img.Key}", new MemoryStream(img.Value), "image/jpeg", bucket);

                            imgCount++;

                            await eventPublisher.Publish(new ImageExported(imgId, message.CorrelationId, message.UserId, message.BlobId, imgCount));
                        }
                    }
                }

                await eventPublisher.Publish(new FileParsed(message.Id, message.CorrelationId, message.UserId, message.ByteTypes));
            }
            catch (Exception e)
            {
                await eventPublisher.Publish(new FileParseFailed(message.Id, message.CorrelationId, message.UserId, $"Cannot parse pdf file from bucket {message.Bucket} with Id {message.BlobId}. Error: {e.Message}"));
            }
        }
Esempio n. 13
0
        public async Task <IActionResult> Post(string bucket)
        {
            if (!IsMultipartContentType(Request.ContentType))
            {
                return(BadRequest());
            }

            var ids = new List <Guid>();

            Log.Information($"POSTing files...");

            var boundary = HeaderUtilities.RemoveQuotes(MediaTypeHeaderValue.Parse(Request.ContentType).Boundary);
            var reader   = new MultipartReader(boundary.Value, Request.Body);

            MultipartSection section;

            IDictionary <string, object> metadata = new Dictionary <string, object>();
            bool isFileLoaded = false;

            while ((section = await reader.ReadNextSectionAsync()) != null)
            {
                var contentDisposition = section.GetContentDispositionHeader();
                if (contentDisposition.IsFormDisposition())
                {
                    if (isFileLoaded) //clear metada accumulator if file loaded, and start new metadata collection
                    {
                        metadata.Clear();
                        isFileLoaded = false;
                    }

                    var formDataSection = section.AsFormDataSection();

                    string key   = formDataSection.Name;
                    string value = await formDataSection.GetValueAsync();

                    if (!string.Equals(value, "null", StringComparison.OrdinalIgnoreCase))
                    {
                        metadata.Add(key, value);
                    }
                }

                if (contentDisposition.IsFileDisposition())
                {
                    var fileSection = section.AsFileSection();

                    Log.Information($"Saving file {fileSection.FileName}");

                    var id = await _blobStorage.AddFileAsync(Path.GetFileName(fileSection.FileName), fileSection.FileStream, fileSection.Section.ContentType, bucket, metadata);

                    ids.Add(id);

                    var blobInfo = await _blobStorage.GetFileInfo(id, bucket);

                    await _bus.Publish <BlobLoaded>(new
                    {
                        BlobInfo  = new LoadedBlobInfo(id, fileSection.FileName, blobInfo.Length, UserID, blobInfo.UploadDateTime, blobInfo.MD5, bucket, metadata),
                        TimeStamp = DateTimeOffset.UtcNow
                    });

                    isFileLoaded = true;
                }
            }

            return(Ok(ids));
        }
Esempio n. 14
0
        public async Task Consume(ConsumeContext <ProcessWebPage> context)
        {
            var message = context.Message;

            try
            {
                string content = "";
                var    meta    = new Dictionary <string, object>();

                if (message.Url.ToLower().Contains("chemspider"))
                {
                    var cs = new Chemspider(new List <string> {
                        message.Url
                    });
                    content = cs.Content;
                    meta    = cs.Meta;
                }

                if (message.Url.Contains("wikipedia"))
                {
                    try
                    {
                        var wiki = new Wikipedia(new List <string> {
                            message.Url
                        });
                        content = wiki.Content;
                        meta    = wiki.Meta;
                    }
                    catch (Exception e)
                    {
                        //
                    }
                }


                if (content != "" && content != "{}")
                {
                    Guid   blobId   = Guid.NewGuid();
                    string fileName = $"{blobId}.json";
                    await blobStorage.AddFileAsync(blobId, fileName, new MemoryStream(Encoding.UTF8.GetBytes(content)), "application/json", message.Bucket, meta);

                    await context.Publish <WebPageProcessed>(new
                    {
                        Id            = message.Id,
                        CorrelationId = message.CorrelationId,
                        UserId        = message.UserId,
                        BlobId        = blobId,
                        Bucket        = message.Bucket
                    });
                }
                else
                {
                    await context.Publish <WebPageProcessed>(new
                    {
                        Id            = message.Id,
                        CorrelationId = message.CorrelationId,
                        UserId        = message.UserId
                    });
                }
            }
            catch (Exception e)
            {
                await context.Publish <WebPageProcessFailed>(new
                {
                    Id            = message.Id,
                    CorrelationId = message.CorrelationId,
                    UserId        = message.UserId,
                    Message       = e.Message
                });
            }
        }
Esempio n. 15
0
        public async Task Consume(ConsumeContext <ParseFile> context)
        {
            var blob = await _blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket);

            switch (blob.Info.FileName.ToLower())
            {
            case "ringcount_0.mol":
            {
                var blobId    = Guid.NewGuid();
                var startTime = DateTimeOffset.UtcNow;
                await _blobStorage.AddFileAsync(blobId, $"{blobId}.mol", blob.GetContentAsStream(), "chemical/x-mdl-molfile", context.Message.Bucket);

                await context.Publish <FileParsed>(new
                    {
                        Id            = context.Message.Id,
                        TotalRecords  = 1,
                        ParsedRecords = 0,
                        FailedRecords = 1,
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = startTime
                    });

                await context.Publish <RecordParseFailed>(new
                    {
                        Id            = NewId.NextGuid(),
                        FileId        = context.Message.Id,
                        Index         = 0,
                        Message       = "molfile loader: ring bond count is allowed only for queries",
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = startTime
                    });
            }
            break;

            case "aspirin.mol":
            {
                var blobId = Guid.NewGuid();
                await _blobStorage.AddFileAsync(blobId, $"{blobId}.mol", blob.GetContentAsStream(), "chemical/x-mdl-molfile", context.Message.Bucket);

                await context.Publish <RecordParsed>(new
                    {
                        Id     = NewId.NextGuid(),
                        FileId = context.Message.Id,
                        Index  = 0,
                        Fields = new Field[] {
                            new Field("StdInChI", "InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)"),
                            new Field("StdInChIKey", "BSYNRYMUTXBXSQ-UHFFFAOYSA-N"),
                            new Field("SMILES", "CC(OC1=C(C(=O)O)C=CC=C1)=O")
                        },
                        Bucket        = context.Message.Bucket,
                        BlobId        = blobId,
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow
                    });

                await context.Publish <FileParsed>(new
                    {
                        Id            = context.Message.Id,
                        TotalRecords  = 1,
                        Fields        = new string[] { "StdInChI", "StdInChIKey", "SMILES" },
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow
                    });

                break;
            }

            case "test_solubility.sdf":
            {
                await context.Publish <RecordParseFailed>(new
                    {
                        Id            = NewId.NextGuid(),
                        FileId        = context.Message.Id,
                        Index         = 0,
                        Message       = "sdffile loader: could not process file",
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow
                    });

                var blobId = Guid.NewGuid();
                await _blobStorage.AddFileAsync(blobId, $"{blobId}.mol", blob.GetContentAsStream(), "chemical/x-mdl-molfile", context.Message.Bucket);

                await context.Publish <RecordParsed>(new
                    {
                        Id     = NewId.NextGuid(),
                        FileId = context.Message.Id,
                        Index  = 1,
                        Fields = new Field[] {
                            new Field("StdInChI", "InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)"),
                            new Field("StdInChIKey", "BSYNRYMUTXBXSQ-UHFFFAOYSA-N"),
                            new Field("SMILES", "CC(OC1=C(C(=O)O)C=CC=C1)=O")
                        },
                        Bucket        = context.Message.Bucket,
                        BlobId        = blobId,
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow
                    });

                await context.Publish <FileParsed>(new
                    {
                        Id            = context.Message.Id,
                        TotalRecords  = 2,
                        ParsedRecords = 1,
                        FailedRecords = 1,
                        Fields        = new string[] { "StdInChI", "StdInChIKey", "SMILES" },
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow
                    });
            }
            break;

            case "invalid_sdf_with_20_records_where_first_and_second_are_invalid.sdf":
            {
                await context.Publish <RecordParseFailed>(new
                    {
                        Id            = NewId.NextGuid(),
                        FileId        = context.Message.Id,
                        Index         = 0,
                        Message       = "sdffile loader: could not process file",
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow
                    });

                await context.Publish <RecordParseFailed>(new
                    {
                        Id            = NewId.NextGuid(),
                        FileId        = context.Message.Id,
                        Index         = 1,
                        Message       = "sdffile loader: could not process file",
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow
                    });

                for (var i = 2; i < 20; i++)
                {
                    var blobId = Guid.NewGuid();
                    await _blobStorage.AddFileAsync(blobId, $"{blobId}.mol", blob.GetContentAsStream(), "chemical/x-mdl-molfile", context.Message.Bucket);

                    await context.Publish <RecordParsed>(new
                        {
                            Id     = NewId.NextGuid(),
                            FileId = context.Message.Id,
                            Index  = i,
                            Fields = new Field[] {
                                new Field("StdInChI", "InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)"),
                                new Field("StdInChIKey", "BSYNRYMUTXBXSQ-UHFFFAOYSA-N"),
                                new Field("SMILES", "CC(OC1=C(C(=O)O)C=CC=C1)=O")
                            },
                            Bucket        = context.Message.Bucket,
                            BlobId        = blobId,
                            CorrelationId = context.Message.CorrelationId,
                            UserId        = context.Message.UserId,
                            TimeStamp     = DateTimeOffset.UtcNow
                        });
                }

                await context.Publish <FileParsed>(new
                    {
                        Id            = context.Message.Id,
                        TotalRecords  = 20,
                        ParsedRecords = 19,
                        FailedRecords = 1,
                        Fields        = new string[] { "StdInChI", "StdInChIKey", "SMILES" },
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow
                    });
            }
            break;

            case "drugbank_10_records.sdf":
            case "combined lysomotrophic.sdf":
            {
                var totalRecords = 2;

                for (var i = 0; i < totalRecords; i++)
                {
                    var blobId = Guid.NewGuid();
                    await _blobStorage.AddFileAsync(blobId, $"{blobId}.mol", blob.GetContentAsStream(), "chemical/x-mdl-molfile", context.Message.Bucket);

                    await context.Publish <RecordParsed>(new
                        {
                            Id     = NewId.NextGuid(),
                            FileId = context.Message.Id,
                            Index  = i,
                            Fields = new Field[] {
                                new Field("StdInChI", $"StdInChI-{i}"),
                                new Field("StdInChIKey", $"StdInChIKey-{i}"),
                                new Field("SMILES", $"SMILES-{i}")
                            },
                            Bucket        = context.Message.Bucket,
                            BlobId        = blobId,
                            CorrelationId = context.Message.CorrelationId,
                            UserId        = context.Message.UserId,
                            TimeStamp     = DateTimeOffset.UtcNow
                        });
                }

                await context.Publish <FileParsed>(new
                    {
                        Id            = context.Message.Id,
                        TotalRecords  = totalRecords,
                        Fields        = new string[] { "StdInChI", "StdInChIKey", "SMILES" },
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow
                    });

                break;
            }

            case "125_11mos.cdx":
            {
                var totalRecords = 3;

                for (var i = 0; i < totalRecords; i++)
                {
                    var blobId = Guid.NewGuid();
                    await _blobStorage.AddFileAsync(blobId, $"{blobId}.mol", blob.GetContentAsStream(), "chemical/x-mdl-molfile", context.Message.Bucket);

                    await context.Publish <RecordParsed>(new
                        {
                            Id            = NewId.NextGuid(),
                            FileId        = context.Message.Id,
                            Index         = i,
                            Fields        = new Field[] {},
                            Bucket        = context.Message.Bucket,
                            BlobId        = blobId,
                            CorrelationId = context.Message.CorrelationId,
                            UserId        = context.Message.UserId,
                            TimeStamp     = DateTimeOffset.UtcNow
                        });
                }

                await context.Publish <FileParsed>(new
                    {
                        Id            = context.Message.Id,
                        TotalRecords  = totalRecords,
                        Fields        = new string[] {},
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow
                    });

                break;
            }

            default:
                await context.Publish <FileParseFailed>(new
                {
                    Id            = context.Message.Id,
                    Message       = $"Cannot parse chemical file {blob.Info.FileName}. Format is not supported.",
                    CorrelationId = context.Message.CorrelationId,
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow
                });

                break;
            }
        }
Esempio n. 16
0
        public async Task Consume(ConsumeContext <ParseFile> context)
        {
            long totalRecords = 0;

            try
            {
                var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket);

                IEnumerable <Record> records = null;

                switch (Path.GetExtension(blob.Info.FileName).ToLower())
                {
                case ".rdf":
                case ".rxn":
                    records = new RdfParser.RdfParser(blob.GetContentAsStream());
                    break;

                default:
                    await context.Publish <FileParseFailed>(new
                    {
                        Id            = context.Message.Id,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow,
                        CorrelationId = context.Message.CorrelationId,
                        Message       = $"Cannot find file parser for {blob.Info.FileName}"
                    });

                    break;
                }


                string        bucket = context.Message.Bucket;
                List <string> fields = new List <string>();
                var           e      = records.GetEnumerator();

                while (e.MoveNext())
                {
                    totalRecords++;
                    try
                    {
                        var record = e.Current;

                        var blobId = NewId.NextGuid();

                        fields.AddRange(record.Properties.Select(p => p.Name).Where(n => !fields.Contains(n)).ToList());

                        await blobStorage.AddFileAsync(blobId, $"{blobId}.rxn", new MemoryStream(Encoding.UTF8.GetBytes(record.Data)), "chemical/x-mdl-rxnfile", bucket);

                        await context.Publish <RecordParsed>(new
                        {
                            Id            = NewId.NextGuid(),
                            FileId        = context.Message.Id,
                            Bucket        = bucket,
                            BlobId        = blobId,
                            Index         = record.Index,
                            Fields        = record.Properties?.Select(p => new Field(p.Name, p.Value)),
                            UserId        = context.Message.UserId,
                            TimeStamp     = DateTimeOffset.UtcNow,
                            CorrelationId = context.Message.CorrelationId
                        });
                    }
                    catch (Exception ex)
                    {
                        await context.Publish <RecordParseFailed>(new
                        {
                            Id            = NewId.NextGuid(),
                            FileId        = context.Message.Id,
                            UserId        = context.Message.UserId,
                            TimeStamp     = DateTimeOffset.UtcNow,
                            CorrelationId = context.Message.CorrelationId,
                            Message       = $"Cannot parse reaction record #{totalRecords} from file {context.Message.Id}. Error: {ex.Message}"
                        });
                    }

                    //  temporary limitation: we don't want to process more than 100 records inside any file
                    if (totalRecords >= 100)
                    {
                        break;
                    }
                }

                await context.Publish <FileParsed>(new
                {
                    Id            = context.Message.Id,
                    TotalRecords  = totalRecords,
                    Fields        = fields,
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow,
                    CorrelationId = context.Message.CorrelationId
                });
            }
            catch (Exception e)
            {
                await context.Publish <FileParseFailed>(new
                {
                    Id            = context.Message.Id,
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow,
                    CorrelationId = context.Message.CorrelationId,
                    Message       = $"Cannot parse reaction file from bucket {context.Message.Bucket} with Id {context.Message.Id}. Error: {e.Message}"
                });
            }
        }
Esempio n. 17
0
        public async Task Consume(ConsumeContext <ConvertToPdf> context)
        {
            try
            {
                var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket);

                Stream   data      = null;
                IConvert converter = null;

                var tempFilePath = Path.GetTempFileName();

                using (var fileStream = File.Create(tempFilePath))
                {
                    blob.GetContentAsStream().CopyTo(fileStream);
                }

                using (FileStream fs = new FileStream(tempFilePath, FileMode.Open, FileAccess.ReadWrite, FileShare.None,
                                                      4096, FileOptions.RandomAccess | FileOptions.DeleteOnClose))

                {
                    switch (Path.GetExtension(blob.Info.FileName).ToLower())
                    {
                    case ".doc":
                    case ".docx":
                    case ".odt":
                        converter = new DocToPdf();
                        data      = converter.Convert(fs);
                        break;

                    case ".xls":
                    case ".xlsx":
                    case ".ods":
                        converter = new XlsToPdf();
                        data      = converter.Convert(fs);
                        break;

                    case ".ppt":
                    case ".pptx":
                    case ".odp":
                        converter = new PptToPdf();
                        data      = converter.Convert(fs);
                        break;

                    default:
                        await context.Publish <ConvertToPdfFailed>(new
                        {
                            Id            = context.Message.Id,
                            UserId        = context.Message.UserId,
                            TimeStamp     = DateTimeOffset.UtcNow,
                            CorrelationId = context.Message.CorrelationId,
                            Message       = $"Cannot find file converter for {blob.Info.FileName}"
                        });

                        break;
                    }

                    string bucket = context.Message.Bucket;

                    if (data != null)
                    {
                        var blobId = Guid.NewGuid();

                        data.Seek(0, SeekOrigin.Begin);

                        await blobStorage.AddFileAsync(blobId, $"{blobId}.pdf", data, "application/pdf", bucket);

                        await context.Publish <ConvertedToPdf>(new
                        {
                            Bucket        = bucket,
                            BlobId        = blobId,
                            Id            = context.Message.Id,
                            UserId        = context.Message.UserId,
                            TimeStamp     = DateTimeOffset.UtcNow,
                            CorrelationId = context.Message.CorrelationId
                        });
                    }
                }
            }
            catch (Exception e)
            {
                await context.Publish <ConvertToPdfFailed>(new
                {
                    Id            = context.Message.Id,
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow,
                    CorrelationId = context.Message.CorrelationId,
                    Message       = $"Cannot convert file to pdf from bucket {context.Message.Bucket} with Id {context.Message.BlobId}. Error: {e.Message}"
                });
            }
        }
Esempio n. 18
0
        public async Task Consume(ConsumeContext <GenerateImage> context)
        {
            try
            {
                var blobInfo = await _blobStorage.GetFileInfo(context.Message.BlobId, context.Message.Bucket);

                string extension = Path.GetExtension(blobInfo.FileName);

                if (!FileRasterizer.Supports(extension))
                {
                    throw new InvalidDataException($"Unsupported file type {extension}");
                }

                using (var stream = new MemoryStream())
                {
                    await _blobStorage.DownloadFileToStreamAsync(context.Message.BlobId, stream, context.Message.Bucket);

                    stream.Position = 0;

                    byte[] imageBytes;

                    if (context.Message.Image.Format.ToLower() != "svg")
                    {
                        var format     = context.Message.Image.Format.ParseImageFormat();
                        var rasterizer = new FileRasterizer();

                        Log.Information($"Rasterizing source '{context.Message.BlobId}'");

                        var image = rasterizer.Rasterize(stream, extension);
                        image      = image.Scale(context.Message.Image.Width, context.Message.Image.Height);
                        imageBytes = image.Convert(format);
                    }
                    else
                    {
                        string data = System.Text.Encoding.ASCII.GetString(stream.ToArray());
                        switch (extension.ToLower())
                        {
                        case ".mol":
                            imageBytes = new IndigoAdapter().Mol2Image(data, context.Message.Image.Format, context.Message.Image.Width, context.Message.Image.Height);
                            break;

                        case ".rxn":
                            imageBytes = new IndigoAdapter().Rxn2Image(data, context.Message.Image.Format, context.Message.Image.Width, context.Message.Image.Height);
                            break;

                        default:
                            throw new InvalidDataException($"Unsupported file type {extension} for {context.Message.Image.Format} generation");
                        }
                    }

                    Log.Information($"Saving image file {context.Message.Image.Id} as {context.Message.Image.Format}");

                    await _blobStorage.AddFileAsync(
                        id : context.Message.Image.Id,
                        fileName : $"{blobInfo.FileName}.{$"{context.Message.Image.Format}".ToLower()}",
                        source : imageBytes,
                        contentType : $"{context.Message.Image.Format}".GetMimeType(),
                        bucketName : context.Message.Bucket,
                        metadata : new Dictionary <string, object> {
                        { "SourceId", context.Message.BlobId }
                    }
                        );

                    Log.Information($"Image file {context.Message.Image.Id} as {context.Message.Image.Format} saved.");
                }

                context.Message.Image.MimeType = context.Message.Image.Format.GetMimeType();

                await context.Publish <ImageGenerated>(new
                {
                    Id            = context.Message.Id,
                    Bucket        = context.Message.Bucket,
                    BlobId        = context.Message.BlobId,
                    Image         = context.Message.Image,
                    CorrelationId = context.Message.CorrelationId,
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow
                });
            }
            catch (Exception e)
            {
                context.Message.Image.Exception = e.Message;

                await context.Publish <ImageGenerationFailed>(new
                {
                    Id            = context.Message.Id,
                    Image         = context.Message.Image,
                    CorrelationId = context.Message.CorrelationId,
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow
                });
            }
        }
Esempio n. 19
0
        public async Task Consume(ConsumeContext <ParseWebPage> context)
        {
            var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket);

            var importedFrom = blob.Info.Metadata["ImportedFrom"];

            IEnumerable <Record> records = null;

            switch (importedFrom)
            {
            case "wikipedia.org":
                records = new WikipediaReader(blob.GetContentAsStream());
                break;

            default:
                //
                break;
            }

            var           bucket       = context.Message.Bucket;
            long          totalRecords = 0;
            List <string> fields       = new List <string>();

            foreach (var record in records)
            {
                var blobId = Guid.NewGuid();

                var extension = "";
                var mimetype  = "";

                switch (record.Type)
                {
                case RecordType.Chemical:
                    extension = "mol";
                    mimetype  = "chemical/x-mdl-molfile";
                    break;

                case RecordType.Crystal:
                    extension = "cif";
                    mimetype  = "chemical/x-cif";
                    break;

                case RecordType.Reaction:
                    extension = "rxn";
                    mimetype  = "chemical/x-mdl-rxn";
                    break;

                case RecordType.Spectrum:
                    extension = "jdx";
                    mimetype  = "chemical/x-jcamp-dx";
                    break;

                default:
                    extension = "txt";
                    mimetype  = "text/plain";
                    break;
                }

                await blobStorage.AddFileAsync(blobId, $"{blobId}.{extension}", new MemoryStream(Encoding.UTF8.GetBytes(record.Data == null ? "" : record.Data)), mimetype, bucket);

                fields.AddRange(record.Properties.Select(p => p.Name).Where(n => !fields.Contains(n)).ToList());

                await context.Publish <RecordParsed>(new
                {
                    Id            = NewId.NextGuid(),
                    CorrelationId = context.Message.CorrelationId,
                    UserId        = context.Message.UserId,
                    FileId        = context.Message.Id,
                    Bucket        = bucket,
                    BlobId        = blobId,
                    Index         = record.Index,
                    Fields        = record.Properties?.Select(p => new Field(p.Name, p.Value))
                });

                totalRecords++;
            }

            await context.Publish <WebPageParsed>(new
            {
                Id            = context.Message.Id,
                CorrelationId = context.Message.CorrelationId,
                UserId        = context.Message.UserId,
                TotalRecords  = totalRecords,
                Fields        = fields
            });
        }
Esempio n. 20
0
 public static async Task AddFileAsync(this IBlobStorage storage, Guid id, string fileName, byte[] source, string contentType = "application/octet-stream", string bucketName = null, IDictionary <string, object> metadata = null)
 {
     await storage.AddFileAsync(id, fileName, new MemoryStream(source), contentType, bucketName, metadata);
 }
Esempio n. 21
0
        public async Task Consume(ConsumeContext <ParseFile> context)
        {
            try
            {
                var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket);

                var fields = new List <string>();

                IEnumerable <Record> records = null;

                switch (Path.GetExtension(blob.Info.FileName).ToLower())
                {
                case ".dx":
                case ".jdx":
                    records = new JcampReader(blob.GetContentAsStream());
                    break;

                default:
                    await context.Publish <FileParseFailed>(new
                    {
                        Id            = context.Message.Id,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow,
                        CorrelationId = context.Message.CorrelationId,
                        Message       = $"Cannot find file parser for {blob.Info.FileName}"
                    });

                    break;
                }

                long   totalRecords = 0;
                string bucket       = context.Message.Bucket;

                foreach (var record in records)
                {
                    var blobId = NewId.NextGuid();

                    fields.AddRange(record.Properties?.Select(p => p.Name).Where(n => !fields.Contains(n)).ToList());

                    await blobStorage.AddFileAsync(blobId, blobId + Path.GetExtension(blob.Info.FileName).ToLower(), new MemoryStream(Encoding.UTF8.GetBytes(record.Data)), "chemical/x-jcamp-dx", bucket);

                    await context.Publish <RecordParsed>(new
                    {
                        Id            = NewId.NextGuid(),
                        FileId        = context.Message.Id,
                        Bucket        = bucket,
                        BlobId        = blobId,
                        Index         = record.Index,
                        Fields        = record.Properties?.Select(p => new Field(p.Name, p.Value)),
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow,
                        CorrelationId = context.Message.CorrelationId
                    });

                    totalRecords++;
                }

                await context.Publish <FileParsed>(new
                {
                    Id            = context.Message.Id,
                    TotalRecords  = totalRecords,
                    Fields        = fields,
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow,
                    CorrelationId = context.Message.CorrelationId
                });
            }
            catch (Exception e)
            {
                await context.Publish <FileParseFailed>(new
                {
                    Id            = context.Message.Id,
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow,
                    CorrelationId = context.Message.CorrelationId,
                    Message       = $"Cannot parse spectra file from bucket {context.Message.Bucket} with Id {context.Message.BlobId}. Error: {e.Message}"
                });
            }
        }
Esempio n. 22
0
        public async Task Consume(ConsumeContext <ParseFile> context)
        {
            var blob = await _blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket);

            switch (blob.Info.FileName.ToLower())
            {
            case "13csample.jdx":
                await context.Publish <FileParseFailed>(new
                {
                    Id            = context.Message.Id,
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow,
                    CorrelationId = context.Message.CorrelationId,
                    Message       = $"Cannot parse spectra file {blob.Info.FileName}."
                });

                break;

            case "2-methyl-1-propanol.jdx":
                var blobId = Guid.NewGuid();
                await _blobStorage.AddFileAsync(blobId, $"{blobId}.jdx", blob.GetContentAsStream(), "chemical/x-jcamp-dx", context.Message.Bucket);

                var fields = new Field[] {
                    new Field("Field1", "Value1"),
                    new Field("Field2", "Value2")
                };

                await context.Publish <RecordParsed>(new
                {
                    Id            = NewId.NextGuid(),
                    FileId        = context.Message.Id,
                    Bucket        = context.Message.Bucket,
                    BlobId        = blobId,
                    Index         = 0,
                    Fields        = fields,
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow,
                    CorrelationId = context.Message.CorrelationId
                });

                await context.Publish <FileParsed>(new
                {
                    Id            = context.Message.Id,
                    TotalRecords  = 1,
                    Fields        = fields.Select(f => f.Name),
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow,
                    CorrelationId = context.Message.CorrelationId
                });

                break;

            default:
                await context.Publish <FileParseFailed>(new
                {
                    Id            = context.Message.Id,
                    UserId        = context.Message.UserId,
                    TimeStamp     = DateTimeOffset.UtcNow,
                    CorrelationId = context.Message.CorrelationId,
                    Message       = $"Cannot parse spectra file {blob.Info.FileName}. Format is not supported."
                });

                break;
            }
        }
Esempio n. 23
0
        public async Task Consume(ConsumeContext <ParseFile> context)
        {
            var failedRecords = 0;
            var parsedRecords = 0;

            try
            {
                var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket);

                if (blob == null)
                {
                    throw new FileNotFoundException($"Blob with Id {context.Message.BlobId} not found in bucket {context.Message.Bucket}");
                }

                IEnumerable <Record> records = null;

                switch (Path.GetExtension(blob.Info.FileName).ToLower())
                {
                case ".mol":
                case ".sdf":
                    records = new SdfIndigoParser(blob.GetContentAsStream());
                    break;

                case ".cdx":
                    records = new CdxParser.CdxParser(blob.GetContentAsStream());
                    break;

                default:
                    await context.Publish <FileParseFailed>(new
                    {
                        Id            = context.Message.Id,
                        Message       = $"Cannot parse chemical file {blob.Info.FileName}. Format is not supported.",
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow
                    });

                    return;
                }

                var           bucket     = context.Message.Bucket;
                var           index      = 0;
                List <string> fields     = new List <string>();
                var           enumerator = records.GetEnumerator();

                while (enumerator.MoveNext())
                {
                    try
                    {
                        var record = enumerator.Current;

                        var blobId = Guid.NewGuid();

                        await blobStorage.AddFileAsync(blobId, $"{blobId}.mol", new MemoryStream(Encoding.UTF8.GetBytes(record.Data)), "chemical/x-mdl-molfile", bucket);

                        fields.AddRange(record.Properties.Select(p => p.Name).Where(n => !fields.Contains(n)).ToList());

                        await context.Publish <RecordParsed>(new
                        {
                            Id     = NewId.NextGuid(),
                            FileId = context.Message.Id,
                            Index  = index,
                            Fields = record.Properties?.Select(p => new Field(p.Name, p.Value)),
                            Bucket = bucket,
                            BlobId = blobId,
                            context.Message.CorrelationId,
                            context.Message.UserId,
                            TimeStamp = DateTimeOffset.UtcNow
                        });

                        parsedRecords++;
                    }
                    catch (Exception ex)
                    {
                        await context.Publish <RecordParseFailed>(new
                        {
                            Id     = NewId.NextGuid(),
                            FileId = context.Message.Id,
                            Index  = index,
                            ex.Message,
                            context.Message.CorrelationId,
                            context.Message.UserId,
                            TimeStamp = DateTimeOffset.UtcNow
                        });

                        failedRecords++;
                    }
                    index++;

                    //  temporary limitation: we don't want to process more than 100 records inside any file
                    if (index >= 100)
                    {
                        break;
                    }
                }

                await context.Publish <FileParsed>(new
                {
                    context.Message.Id,
                    FailedRecords = failedRecords,
                    ParsedRecords = parsedRecords,
                    TotalRecords  = parsedRecords + failedRecords,
                    Fields        = fields,
                    context.Message.CorrelationId,
                    context.Message.UserId,
                    TimeStamp = DateTimeOffset.UtcNow
                });
            }
            catch (Exception ex)
            {
                await context.Publish <FileParseFailed>(new
                {
                    context.Message.Id,
                    FailedRecords = failedRecords,
                    ParsedRecords = parsedRecords,
                    TotalRecords  = parsedRecords + failedRecords,
                    ex.Message,
                    context.Message.CorrelationId,
                    context.Message.UserId,
                    TimeStamp = DateTimeOffset.UtcNow
                });
            }
        }