private static IEnumerable <AnnotationRange> GetAnnotationRanges(NirvanaConfig config, GenomeAssembly genomeAssembly) { string cachePathPrefix = LambdaUtilities.GetCachePathPrefix(genomeAssembly); IntervalForest <IGene> geneIntervalForest; IDictionary <string, IChromosome> refNameToChromosome; List <long> blockOffsets; using (var tabixStream = PersistentStreamUtils.GetReadStream(config.tabixUrl)) using (var tabixReader = new BinaryReader(new BlockGZipStream(tabixStream, CompressionMode.Decompress))) using (var referenceStream = PersistentStreamUtils.GetReadStream(LambdaUrlHelper.GetRefUrl(genomeAssembly))) using (var sequenceProvider = new ReferenceSequenceProvider(referenceStream)) using (var taProvider = new TranscriptAnnotationProvider(cachePathPrefix, sequenceProvider, null)) { long vcfSize = HttpUtilities.GetLength(config.vcfUrl); int numPartitions = Math.Max(Math.Min((int)((vcfSize - 1) / MinPartitionSize + 1), MaxNumPartitions), MinNumPartitions); var tabixIndex = Reader.Read(tabixReader, sequenceProvider.RefNameToChromosome); blockOffsets = PartitionUtilities.GetFileOffsets(config.vcfUrl, numPartitions, tabixIndex); IntervalArray <ITranscript>[] transcriptIntervalArrays = taProvider.TranscriptIntervalArrays; geneIntervalForest = GeneForestGenerator.GetGeneForest(transcriptIntervalArrays); refNameToChromosome = sequenceProvider.RefNameToChromosome; } IEnumerable <AnnotationRange> annotationRanges = PartitionUtilities.GenerateAnnotationRanges(blockOffsets, config.vcfUrl, geneIntervalForest, refNameToChromosome); return(annotationRanges); }
public ValidationResult Run(ValidationConfig config, ILambdaContext context) { string snsTopicArn = null; try { LogUtilities.UpdateLogger(context.Logger, null); LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion); LogUtilities.LogObject("Config", config); LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey }); LambdaUtilities.GarbageCollect(); snsTopicArn = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey); config.Validate(); GenomeAssembly genomeAssembly = GenomeAssemblyHelper.Convert(config.genomeAssembly); string nirvanaS3Ref = LambdaUrlHelper.GetRefUrl(genomeAssembly); var refProvider = ProviderUtilities.GetSequenceProvider(nirvanaS3Ref); using (var stream = PersistentStreamUtils.GetReadStream(config.customStrUrl)) TryLoadStrFile(stream, genomeAssembly, refProvider); } catch (Exception exception) { return(HandleException(config.id, exception, snsTopicArn)); } return(GetSuccessOutput(config.id)); }
private static AnnotationResources GetAnnotationResources(AnnotationConfig annotationConfig) { var genomeAssembly = GenomeAssemblyHelper.Convert(annotationConfig.genomeAssembly); string cachePathPrefix = LambdaUrlHelper.GetCacheFolder().UrlCombine(genomeAssembly.ToString()).UrlCombine(LambdaUrlHelper.DefaultCacheSource); string nirvanaS3Ref = LambdaUrlHelper.GetRefUrl(genomeAssembly); string saManifestUrl = LambdaUtilities.GetManifestUrl(annotationConfig.supplementaryAnnotations ?? "latest", genomeAssembly); var metrics = new PerformanceMetrics(); var annotationResources = new AnnotationResources(nirvanaS3Ref, cachePathPrefix, saManifestUrl == null? null: new List <string> { saManifestUrl }, annotationConfig.customAnnotations, annotationConfig.customStrUrl, false, false, false, metrics); using (var tabixStream = PersistentStreamUtils.GetReadStream(annotationConfig.tabixUrl)) { annotationResources.InputStartVirtualPosition = GetTabixVirtualPosition(annotationConfig.annotationRange, tabixStream, annotationResources.SequenceProvider.RefNameToChromosome); } Logger.WriteLine($"Tabix position :{annotationResources.InputStartVirtualPosition}"); return(annotationResources); }
public static string GetCachePathPrefix(int vepVersion, GenomeAssembly genomeAssembly) { string cacheFolder = LambdaUrlHelper.GetBaseUrl() + "ab0cf104f39708eabd07b8cb67e149ba-Cache/26/"; string suffix = $"{genomeAssembly}/{LambdaUrlHelper.DefaultCacheSource}"; return(UrlCombine(vepVersion == 84 ? $"{cacheFolder}VEP84/" : cacheFolder, suffix)); }
public static string GetCachePathPrefix(int vepVersion, GenomeAssembly genomeAssembly) { string suffix = $"{genomeAssembly}/{LambdaUrlHelper.DefaultCacheSource}"; //LambdaUrlHelper.GetBaseUrl() + switch (vepVersion) { case 84: return(UrlCombine($"{LambdaUrlHelper.GetBaseUrl()+LambdaUrlHelper.S3CacheFolderBase}/26/VEP84/", suffix)); default: return(UrlCombine($"{LambdaUrlHelper.GetBaseUrl()+LambdaUrlHelper.S3CacheFolder}", suffix)); } }
private static AnnotationResources GetAnnotationResources(SingleConfig lambdaConfig) { GenomeAssembly genomeAssembly = GenomeAssemblyHelper.Convert(lambdaConfig.genomeAssembly); string cachePathPrefix = CacheUtilities.GetCachePathPrefix(lambdaConfig.vepVersion, genomeAssembly); string nirvanaS3Ref = LambdaUrlHelper.GetRefUrl(genomeAssembly); string annotatorVersion = "Nirvana " + CommandLineUtilities.GetVersion(Assembly.GetAssembly(typeof(SingleAnnotationLambda))); var metrics = new PerformanceMetrics(); Logger.WriteLine($"Cache prefix: {cachePathPrefix}"); //todo: get customStrTsv from lambdaConfig var annotationResources = new AnnotationResources(nirvanaS3Ref, cachePathPrefix, null, lambdaConfig.customAnnotations, null, false, false, false, metrics) { AnnotatorVersionTag = annotatorVersion }; return(annotationResources); }
public static string GetManifestUrl(string version, GenomeAssembly genomeAssembly, string baseUrl = null) { if (string.IsNullOrEmpty(version)) { version = "latest"; } string s3BaseUrl = LambdaUrlHelper.GetBaseUrl(baseUrl); switch (version) { case "latest": return($"{s3BaseUrl}latest_SA_{genomeAssembly}.txt"); case "release": return($"{s3BaseUrl}DRAGEN_3.4_{genomeAssembly}.txt"); default: return($"{s3BaseUrl}{version}_SA_{genomeAssembly}.txt"); } }
public static string GetCachePathPrefix(GenomeAssembly genomeAssembly, string baseUrl = null) { return(LambdaUrlHelper.GetCacheFolder(baseUrl).UrlCombine(genomeAssembly.ToString()) .UrlCombine(LambdaUrlHelper.DefaultCacheSource)); }
public void GetS3RefLocation_AsExpected() { Assert.Equal(LambdaUrlHelper.GetRefPrefix("whatever") + "GRCh37" + LambdaUrlHelper.RefSuffix, LambdaUrlHelper.GetRefUrl(GenomeAssembly.GRCh37, "whatever")); }
public void GetDataUrlBase_AsExpected() { Assert.Equal($"http://somewhere.on.the.earth/ab0cf104f39708eabd07b8cb67e149ba-Cache/{CacheConstants.DataVersion}/", LambdaUrlHelper.GetCacheFolder("http://somewhere.on.the.earth/")); Assert.Equal($"http://somewhere.on.the.earth/d95867deadfe690e40f42068d6b59df8-References/{ReferenceSequenceCommon.HeaderVersion}/Homo_sapiens.", LambdaUrlHelper.GetRefPrefix("http://somewhere.on.the.earth/")); }
private static VariantAnnotationsParser GetVariantAnnotationsParserFromCustomTsvStream(PersistentStream customTsvStream) { var parser = VariantAnnotationsParser.Create(new StreamReader(GZipUtilities.GetAppropriateStream(customTsvStream))); parser.SequenceProvider = new ReferenceSequenceProvider(PersistentStreamUtils.GetReadStream(LambdaUrlHelper.GetRefUrl(parser.Assembly))); return(parser); }
public void GetDataUrlBase_AsExpected() { Assert.Equal("http://somewhere.on.the.earth/ab0cf104f39708eabd07b8cb67e149ba-Cache/26/", LambdaUrlHelper.GetCacheFolder("http://somewhere.on.the.earth/")); Assert.Equal("http://somewhere.on.the.earth/d95867deadfe690e40f42068d6b59df8-References/7/Homo_sapiens.", LambdaUrlHelper.GetRefPrefix("http://somewhere.on.the.earth/")); }
private static GeneAnnotationsParser GetGeneAnnotationsParserFromCustomTsvStream(PersistentStream customTsvStream) { var(entrezGeneIdToSymbol, ensemblGeneIdToSymbol) = GeneUtilities.ParseUniversalGeneArchive(null, LambdaUrlHelper.GetUgaUrl()); return(GeneAnnotationsParser.Create(new StreamReader(GZipUtilities.GetAppropriateStream(customTsvStream)), entrezGeneIdToSymbol, ensemblGeneIdToSymbol)); }
public static CustomResult Create(CustomConfig config, string inputBaseName, CustomResult result, IS3Client s3Client) { string ngaFileName = inputBaseName + SaCommon.NgaFileSuffix; string localNgaPath = Path.Combine(Path.GetTempPath(), ngaFileName); string localSchemaPath = localNgaPath + SaCommon.JsonSchemaSuffix; string localLogPath = Path.Combine(Path.GetTempPath(), LogFileName); HttpUtilities.ValidateUrl(LambdaUrlHelper.GetUgaUrl()); var outputFiles = new List <string>(); using (var aes = new AesCryptoServiceProvider()) { FileMetadata ngaMetadata, schemaMetadata, logMetaData; using (var logStream = FileUtilities.GetCreateStream(localLogPath)) using (var logCryptoStream = new CryptoStream(logStream, aes.CreateEncryptor(), CryptoStreamMode.Write)) using (var logMd5Stream = new MD5Stream(logCryptoStream)) // using (var customTsvStream = (PersistentStream)PersistentStreamUtils.GetReadStream(config.tsvUrl)) using (var parser = GetGeneAnnotationsParserFromCustomTsvStream(customTsvStream)) // using (var ngaStream = FileUtilities.GetCreateStream(localNgaPath)) using (var ngaCryptoStream = new CryptoStream(ngaStream, aes.CreateEncryptor(), CryptoStreamMode.Write)) using (var ngaMd5Stream = new MD5Stream(ngaCryptoStream)) // using (var schemaStream = FileUtilities.GetCreateStream(localSchemaPath)) using (var schemaCryptoStream = new CryptoStream(schemaStream, aes.CreateEncryptor(), CryptoStreamMode.Write)) using (var schemaMd5Stream = new MD5Stream(schemaCryptoStream)) { using (var ngaWriter = CaUtilities.GetNgaWriter(ngaMd5Stream, parser, config.tsvUrl)) using (var schemaWriter = new StreamWriter(schemaMd5Stream)) using (var logWriter = new StreamWriter(logMd5Stream)) { ngaWriter.Write(parser.GetItems(config.skipGeneIdValidation, logWriter)); schemaWriter.Write(parser.JsonSchema); } //all the writers have to be disposed before GetFileMetaData is called ngaMetadata = ngaMd5Stream.GetFileMetadata(); schemaMetadata = schemaMd5Stream.GetFileMetadata(); logMetaData = logMd5Stream.GetFileMetadata(); } if (config.skipGeneIdValidation) { string logS3Key = string.Join('/', config.outputDir.path.Trim('/'), LogFileName); Logger.WriteLine("uploading log file to " + logS3Key); s3Client.DecryptUpload(config.outputDir.bucketName, logS3Key, localLogPath, aes, logMetaData); } string nsaS3Path = string.Join('/', config.outputDir.path.Trim('/'), ngaFileName); string schemaS3Path = nsaS3Path + SaCommon.JsonSchemaSuffix; s3Client.DecryptUpload(config.outputDir.bucketName, nsaS3Path, localNgaPath, aes, ngaMetadata); s3Client.DecryptUpload(config.outputDir.bucketName, schemaS3Path, localSchemaPath, aes, schemaMetadata); outputFiles.Add(ngaFileName); outputFiles.Add(ngaFileName + SaCommon.JsonSchemaSuffix); LambdaUtilities.DeleteTempOutput(); return(CustomAnnotationLambda.GetSuccessResult(config, result, outputFiles)); } }