Exemplo n.º 1
0
        public void ReadSdfUsingIndigoMethodByBoris()
        {
            var records = new SdfIndigoParser(new MemoryStream(Resource.nr_ahr)).ToList();

            foreach (var record in records)
            {
                Assert.IsType <FileParser.Record>(record);
                Assert.NotNull(record.Data);
            }
        }
Exemplo n.º 2
0
        public void ReadSdfUsingIndigoMethod1MRecords()
        {
            var records = new SdfIndigoParser(File.Open("Resources\\chembl_23.sdf", FileMode.Open));

            foreach (var record in records)
            {
                Assert.IsType <FileParser.Record>(record);
                Assert.NotNull(record.Data);
            }
        }
        public void ReadMolMethod()
        {
            var records = new SdfIndigoParser(new MemoryStream(Resource._S__Glutamic_Acid)).ToList();

            foreach (var record in records)
            {
                //Assert.InstanceOfType(record, typeof(Record));
                Assert.NotNull(record.Data);
            }

            //Assert.Equal(records.Count(), 1);
        }
Exemplo n.º 4
0
        public void ReadSdfUsingIndigoMethod2()
        {
            var records = new SdfIndigoParser(new MemoryStream(Resource.AChemo_extract_of_dictionary_names_and_CAS_numbers_For_dictionary_for_deposition)).ToList();

            var nullRecords = records.Where(r => r == null).ToList();

            foreach (var record in records)
            {
                Assert.IsType <FileParser.Record>(record);
                //Assert.NotNull(record.Data);
            }
        }
Exemplo n.º 5
0
        public void ReadSdfMethod()
        {
            var records = new SdfIndigoParser(new MemoryStream(Resource.AChemo_extract_of_dictionary_names_and_CAS_numbers_For_dictionary_for_deposition)).ToList();

            foreach (var record in records)
            {
                Assert.IsType <FileParser.Record>(record);
                //Assert.NotNull(record.Data);
            }

            records.Count().Should().Be(2117);
        }
Exemplo n.º 6
0
        public async Task <IEnumerator <Record> > GetRecords(Guid fileId, string bucket)
        {
            var blob = await blobStorage.GetFileAsync(fileId, bucket);

            if (blob == null)
            {
                throw new FileNotFoundException($"Blob with Id {fileId} not found.");
            }

            IEnumerable <FileParser.Record> records;

            switch (Path.GetExtension(blob.Info.FileName).ToLower())
            {
            case ".mol":
            case ".sdf":
                records = new SdfIndigoParser(blob.GetContentAsStream());
                break;

            case ".cdx":
                records = new CdxParser.CdxParser(blob.GetContentAsStream());
                break;

            default:
                records = null;
                break;
            }
            return(records.Select(r => new Record {
                Mol = r.Data,
                Properties = r.Properties.Select(p => new PropertyValue
                {
                    Name = "Properties.Fields." + p.Name,
                    Value = p.Value
                }),
                Index = r.Index
            }).GetEnumerator());
        }
Exemplo n.º 7
0
        public async Task Consume(ConsumeContext <ParseFile> context)
        {
            var failedRecords = 0;
            var parsedRecords = 0;

            try
            {
                var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket);

                if (blob == null)
                {
                    throw new FileNotFoundException($"Blob with Id {context.Message.BlobId} not found in bucket {context.Message.Bucket}");
                }

                IEnumerable <Record> records = null;

                switch (Path.GetExtension(blob.Info.FileName).ToLower())
                {
                case ".mol":
                case ".sdf":
                    records = new SdfIndigoParser(blob.GetContentAsStream());
                    break;

                case ".cdx":
                    records = new CdxParser.CdxParser(blob.GetContentAsStream());
                    break;

                default:
                    await context.Publish <FileParseFailed>(new
                    {
                        Id            = context.Message.Id,
                        Message       = $"Cannot parse chemical file {blob.Info.FileName}. Format is not supported.",
                        CorrelationId = context.Message.CorrelationId,
                        UserId        = context.Message.UserId,
                        TimeStamp     = DateTimeOffset.UtcNow
                    });

                    return;
                }

                var           bucket     = context.Message.Bucket;
                var           index      = 0;
                List <string> fields     = new List <string>();
                var           enumerator = records.GetEnumerator();

                while (enumerator.MoveNext())
                {
                    try
                    {
                        var record = enumerator.Current;

                        var blobId = Guid.NewGuid();

                        await blobStorage.AddFileAsync(blobId, $"{blobId}.mol", new MemoryStream(Encoding.UTF8.GetBytes(record.Data)), "chemical/x-mdl-molfile", bucket);

                        fields.AddRange(record.Properties.Select(p => p.Name).Where(n => !fields.Contains(n)).ToList());

                        await context.Publish <RecordParsed>(new
                        {
                            Id     = NewId.NextGuid(),
                            FileId = context.Message.Id,
                            Index  = index,
                            Fields = record.Properties?.Select(p => new Field(p.Name, p.Value)),
                            Bucket = bucket,
                            BlobId = blobId,
                            context.Message.CorrelationId,
                            context.Message.UserId,
                            TimeStamp = DateTimeOffset.UtcNow
                        });

                        parsedRecords++;
                    }
                    catch (Exception ex)
                    {
                        await context.Publish <RecordParseFailed>(new
                        {
                            Id     = NewId.NextGuid(),
                            FileId = context.Message.Id,
                            Index  = index,
                            ex.Message,
                            context.Message.CorrelationId,
                            context.Message.UserId,
                            TimeStamp = DateTimeOffset.UtcNow
                        });

                        failedRecords++;
                    }
                    index++;

                    //  temporary limitation: we don't want to process more than 100 records inside any file
                    if (index >= 100)
                    {
                        break;
                    }
                }

                await context.Publish <FileParsed>(new
                {
                    context.Message.Id,
                    FailedRecords = failedRecords,
                    ParsedRecords = parsedRecords,
                    TotalRecords  = parsedRecords + failedRecords,
                    Fields        = fields,
                    context.Message.CorrelationId,
                    context.Message.UserId,
                    TimeStamp = DateTimeOffset.UtcNow
                });
            }
            catch (Exception ex)
            {
                await context.Publish <FileParseFailed>(new
                {
                    context.Message.Id,
                    FailedRecords = failedRecords,
                    ParsedRecords = parsedRecords,
                    TotalRecords  = parsedRecords + failedRecords,
                    ex.Message,
                    context.Message.CorrelationId,
                    context.Message.UserId,
                    TimeStamp = DateTimeOffset.UtcNow
                });
            }
        }