Пример #1
0
        private static IEnumerable <AnnotationRange> GetAnnotationRanges(NirvanaConfig config, GenomeAssembly genomeAssembly)
        {
            string cachePathPrefix = LambdaUtilities.GetCachePathPrefix(genomeAssembly);

            IntervalForest <IGene>            geneIntervalForest;
            IDictionary <string, IChromosome> refNameToChromosome;
            List <long> blockOffsets;

            using (var tabixStream = PersistentStreamUtils.GetReadStream(config.tabixUrl))
                using (var tabixReader = new BinaryReader(new BlockGZipStream(tabixStream, CompressionMode.Decompress)))
                    using (var referenceStream = PersistentStreamUtils.GetReadStream(LambdaUrlHelper.GetRefUrl(genomeAssembly)))
                        using (var sequenceProvider = new ReferenceSequenceProvider(referenceStream))
                            using (var taProvider = new TranscriptAnnotationProvider(cachePathPrefix, sequenceProvider, null))
                            {
                                long vcfSize       = HttpUtilities.GetLength(config.vcfUrl);
                                int  numPartitions = Math.Max(Math.Min((int)((vcfSize - 1) / MinPartitionSize + 1), MaxNumPartitions), MinNumPartitions);

                                var tabixIndex = Reader.Read(tabixReader, sequenceProvider.RefNameToChromosome);
                                blockOffsets = PartitionUtilities.GetFileOffsets(config.vcfUrl, numPartitions, tabixIndex);

                                IntervalArray <ITranscript>[] transcriptIntervalArrays = taProvider.TranscriptIntervalArrays;
                                geneIntervalForest  = GeneForestGenerator.GetGeneForest(transcriptIntervalArrays);
                                refNameToChromosome = sequenceProvider.RefNameToChromosome;
                            }

            IEnumerable <AnnotationRange> annotationRanges = PartitionUtilities.GenerateAnnotationRanges(blockOffsets, config.vcfUrl, geneIntervalForest, refNameToChromosome);

            return(annotationRanges);
        }
Пример #2
0
        public ValidationResult Run(ValidationConfig config, ILambdaContext context)
        {
            string snsTopicArn = null;

            try
            {
                LogUtilities.UpdateLogger(context.Logger, null);
                LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion);
                LogUtilities.LogObject("Config", config);
                LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey });
                LambdaUtilities.GarbageCollect();
                snsTopicArn = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey);

                config.Validate();
                GenomeAssembly genomeAssembly = GenomeAssemblyHelper.Convert(config.genomeAssembly);

                string nirvanaS3Ref = LambdaUrlHelper.GetRefUrl(genomeAssembly);
                var    refProvider  = ProviderUtilities.GetSequenceProvider(nirvanaS3Ref);

                using (var stream = PersistentStreamUtils.GetReadStream(config.customStrUrl))
                    TryLoadStrFile(stream, genomeAssembly, refProvider);
            }
            catch (Exception exception)
            {
                return(HandleException(config.id, exception, snsTopicArn));
            }

            return(GetSuccessOutput(config.id));
        }
Пример #3
0
        private static AnnotationResources GetAnnotationResources(AnnotationConfig annotationConfig)
        {
            var    genomeAssembly  = GenomeAssemblyHelper.Convert(annotationConfig.genomeAssembly);
            string cachePathPrefix = LambdaUrlHelper.GetCacheFolder().UrlCombine(genomeAssembly.ToString()).UrlCombine(LambdaUrlHelper.DefaultCacheSource);
            string nirvanaS3Ref    = LambdaUrlHelper.GetRefUrl(genomeAssembly);
            string saManifestUrl   = LambdaUtilities.GetManifestUrl(annotationConfig.supplementaryAnnotations ?? "latest", genomeAssembly);
            var    metrics         = new PerformanceMetrics();

            var annotationResources = new AnnotationResources(nirvanaS3Ref, cachePathPrefix,
                                                              saManifestUrl == null? null: new List <string> {
                saManifestUrl
            },
                                                              annotationConfig.customAnnotations,
                                                              annotationConfig.customStrUrl,
                                                              false,
                                                              false,
                                                              false,
                                                              metrics);

            using (var tabixStream = PersistentStreamUtils.GetReadStream(annotationConfig.tabixUrl))
            {
                annotationResources.InputStartVirtualPosition = GetTabixVirtualPosition(annotationConfig.annotationRange, tabixStream, annotationResources.SequenceProvider.RefNameToChromosome);
            }

            Logger.WriteLine($"Tabix position :{annotationResources.InputStartVirtualPosition}");

            return(annotationResources);
        }
Пример #4
0
        public static string GetCachePathPrefix(int vepVersion, GenomeAssembly genomeAssembly)
        {
            string cacheFolder = LambdaUrlHelper.GetBaseUrl() + "ab0cf104f39708eabd07b8cb67e149ba-Cache/26/";
            string suffix      = $"{genomeAssembly}/{LambdaUrlHelper.DefaultCacheSource}";

            return(UrlCombine(vepVersion == 84 ? $"{cacheFolder}VEP84/" : cacheFolder, suffix));
        }
Пример #5
0
        public static string GetCachePathPrefix(int vepVersion, GenomeAssembly genomeAssembly)
        {
            string suffix = $"{genomeAssembly}/{LambdaUrlHelper.DefaultCacheSource}";

            //LambdaUrlHelper.GetBaseUrl() +
            switch (vepVersion)
            {
            case 84:
                return(UrlCombine($"{LambdaUrlHelper.GetBaseUrl()+LambdaUrlHelper.S3CacheFolderBase}/26/VEP84/", suffix));

            default:
                return(UrlCombine($"{LambdaUrlHelper.GetBaseUrl()+LambdaUrlHelper.S3CacheFolder}", suffix));
            }
        }
Пример #6
0
        private static AnnotationResources GetAnnotationResources(SingleConfig lambdaConfig)
        {
            GenomeAssembly genomeAssembly  = GenomeAssemblyHelper.Convert(lambdaConfig.genomeAssembly);
            string         cachePathPrefix = CacheUtilities.GetCachePathPrefix(lambdaConfig.vepVersion, genomeAssembly);
            string         nirvanaS3Ref    = LambdaUrlHelper.GetRefUrl(genomeAssembly);

            string annotatorVersion = "Nirvana " + CommandLineUtilities.GetVersion(Assembly.GetAssembly(typeof(SingleAnnotationLambda)));
            var    metrics          = new PerformanceMetrics();

            Logger.WriteLine($"Cache prefix: {cachePathPrefix}");
            //todo: get customStrTsv from lambdaConfig
            var annotationResources = new AnnotationResources(nirvanaS3Ref, cachePathPrefix,
                                                              null, lambdaConfig.customAnnotations, null, false, false, false, metrics)
            {
                AnnotatorVersionTag = annotatorVersion
            };

            return(annotationResources);
        }
Пример #7
0
        public static string GetManifestUrl(string version, GenomeAssembly genomeAssembly, string baseUrl = null)
        {
            if (string.IsNullOrEmpty(version))
            {
                version = "latest";
            }
            string s3BaseUrl = LambdaUrlHelper.GetBaseUrl(baseUrl);

            switch (version)
            {
            case "latest":
                return($"{s3BaseUrl}latest_SA_{genomeAssembly}.txt");

            case "release":
                return($"{s3BaseUrl}DRAGEN_3.4_{genomeAssembly}.txt");

            default:
                return($"{s3BaseUrl}{version}_SA_{genomeAssembly}.txt");
            }
        }
Пример #8
0
 public static string GetCachePathPrefix(GenomeAssembly genomeAssembly, string baseUrl = null)
 {
     return(LambdaUrlHelper.GetCacheFolder(baseUrl).UrlCombine(genomeAssembly.ToString())
            .UrlCombine(LambdaUrlHelper.DefaultCacheSource));
 }
Пример #9
0
 public void GetS3RefLocation_AsExpected()
 {
     Assert.Equal(LambdaUrlHelper.GetRefPrefix("whatever") + "GRCh37" + LambdaUrlHelper.RefSuffix, LambdaUrlHelper.GetRefUrl(GenomeAssembly.GRCh37, "whatever"));
 }
Пример #10
0
 public void GetDataUrlBase_AsExpected()
 {
     Assert.Equal($"http://somewhere.on.the.earth/ab0cf104f39708eabd07b8cb67e149ba-Cache/{CacheConstants.DataVersion}/", LambdaUrlHelper.GetCacheFolder("http://somewhere.on.the.earth/"));
     Assert.Equal($"http://somewhere.on.the.earth/d95867deadfe690e40f42068d6b59df8-References/{ReferenceSequenceCommon.HeaderVersion}/Homo_sapiens.", LambdaUrlHelper.GetRefPrefix("http://somewhere.on.the.earth/"));
 }
Пример #11
0
        private static VariantAnnotationsParser GetVariantAnnotationsParserFromCustomTsvStream(PersistentStream customTsvStream)
        {
            var parser = VariantAnnotationsParser.Create(new StreamReader(GZipUtilities.GetAppropriateStream(customTsvStream)));

            parser.SequenceProvider = new ReferenceSequenceProvider(PersistentStreamUtils.GetReadStream(LambdaUrlHelper.GetRefUrl(parser.Assembly)));

            return(parser);
        }
Пример #12
0
 public void GetDataUrlBase_AsExpected()
 {
     Assert.Equal("http://somewhere.on.the.earth/ab0cf104f39708eabd07b8cb67e149ba-Cache/26/", LambdaUrlHelper.GetCacheFolder("http://somewhere.on.the.earth/"));
     Assert.Equal("http://somewhere.on.the.earth/d95867deadfe690e40f42068d6b59df8-References/7/Homo_sapiens.", LambdaUrlHelper.GetRefPrefix("http://somewhere.on.the.earth/"));
 }
Пример #13
0
 private static GeneAnnotationsParser GetGeneAnnotationsParserFromCustomTsvStream(PersistentStream customTsvStream)
 {
     var(entrezGeneIdToSymbol, ensemblGeneIdToSymbol) = GeneUtilities.ParseUniversalGeneArchive(null, LambdaUrlHelper.GetUgaUrl());
     return(GeneAnnotationsParser.Create(new StreamReader(GZipUtilities.GetAppropriateStream(customTsvStream)), entrezGeneIdToSymbol, ensemblGeneIdToSymbol));
 }
Пример #14
0
        public static CustomResult Create(CustomConfig config, string inputBaseName, CustomResult result, IS3Client s3Client)
        {
            string ngaFileName     = inputBaseName + SaCommon.NgaFileSuffix;
            string localNgaPath    = Path.Combine(Path.GetTempPath(), ngaFileName);
            string localSchemaPath = localNgaPath + SaCommon.JsonSchemaSuffix;
            string localLogPath    = Path.Combine(Path.GetTempPath(), LogFileName);

            HttpUtilities.ValidateUrl(LambdaUrlHelper.GetUgaUrl());
            var outputFiles = new List <string>();

            using (var aes = new AesCryptoServiceProvider())
            {
                FileMetadata ngaMetadata, schemaMetadata, logMetaData;
                using (var logStream = FileUtilities.GetCreateStream(localLogPath))
                    using (var logCryptoStream = new CryptoStream(logStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                        using (var logMd5Stream = new MD5Stream(logCryptoStream))
                            //
                            using (var customTsvStream = (PersistentStream)PersistentStreamUtils.GetReadStream(config.tsvUrl))
                                using (var parser = GetGeneAnnotationsParserFromCustomTsvStream(customTsvStream))
                                    //
                                    using (var ngaStream = FileUtilities.GetCreateStream(localNgaPath))
                                        using (var ngaCryptoStream = new CryptoStream(ngaStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                            using (var ngaMd5Stream = new MD5Stream(ngaCryptoStream))
                                                //
                                                using (var schemaStream = FileUtilities.GetCreateStream(localSchemaPath))
                                                    using (var schemaCryptoStream = new CryptoStream(schemaStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                                        using (var schemaMd5Stream = new MD5Stream(schemaCryptoStream))
                                                        {
                                                            using (var ngaWriter = CaUtilities.GetNgaWriter(ngaMd5Stream, parser, config.tsvUrl))
                                                                using (var schemaWriter = new StreamWriter(schemaMd5Stream))
                                                                    using (var logWriter = new StreamWriter(logMd5Stream))
                                                                    {
                                                                        ngaWriter.Write(parser.GetItems(config.skipGeneIdValidation, logWriter));
                                                                        schemaWriter.Write(parser.JsonSchema);
                                                                    }
                                                            //all the writers have to be disposed before GetFileMetaData is called

                                                            ngaMetadata    = ngaMd5Stream.GetFileMetadata();
                                                            schemaMetadata = schemaMd5Stream.GetFileMetadata();
                                                            logMetaData    = logMd5Stream.GetFileMetadata();
                                                        }

                if (config.skipGeneIdValidation)
                {
                    string logS3Key = string.Join('/', config.outputDir.path.Trim('/'), LogFileName);
                    Logger.WriteLine("uploading log file to " + logS3Key);
                    s3Client.DecryptUpload(config.outputDir.bucketName, logS3Key, localLogPath, aes, logMetaData);
                }

                string nsaS3Path    = string.Join('/', config.outputDir.path.Trim('/'), ngaFileName);
                string schemaS3Path = nsaS3Path + SaCommon.JsonSchemaSuffix;

                s3Client.DecryptUpload(config.outputDir.bucketName, nsaS3Path, localNgaPath, aes, ngaMetadata);
                s3Client.DecryptUpload(config.outputDir.bucketName, schemaS3Path, localSchemaPath, aes, schemaMetadata);


                outputFiles.Add(ngaFileName);
                outputFiles.Add(ngaFileName + SaCommon.JsonSchemaSuffix);

                LambdaUtilities.DeleteTempOutput();

                return(CustomAnnotationLambda.GetSuccessResult(config, result, outputFiles));
            }
        }