public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider, ProteinConservationProvider conservationProvider)
        {
            Name                  = "Transcript annotation provider";
            _sequence             = sequenceProvider.Sequence;
            _refNameToChromosome  = sequenceProvider.RefNameToChromosome;
            _conservationProvider = conservationProvider;

            using (var stream = PersistentStreamUtils.GetReadStream(CacheConstants.TranscriptPath(pathPrefix)))
            {
                (_transcriptCache, TranscriptIntervalArrays, VepVersion) = InitiateCache(stream, sequenceProvider.RefIndexToChromosome, sequenceProvider.Assembly);
            }

            Assembly           = _transcriptCache.Assembly;
            DataSourceVersions = _transcriptCache.DataSourceVersions;

            // TODO: this is not great. We should not be using IEnumerables if we have to resort to strange stuff like this
            if (conservationProvider != null)
            {
                DataSourceVersions = DataSourceVersions.Concat(new[] { conservationProvider.Version });
            }

            _siftStream = PersistentStreamUtils.GetReadStream(CacheConstants.SiftPath(pathPrefix));
            _siftReader = new PredictionCacheReader(_siftStream, PredictionCacheReader.SiftDescriptions);

            _polyphenStream = PersistentStreamUtils.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix));
            _polyphenReader = new PredictionCacheReader(_polyphenStream, PredictionCacheReader.PolyphenDescriptions);
        }
Пример #2
0
        public static string GetGeneAnnotation(GeneConfig input, string saManifestFilePath, string saPathPrefix)
        {
            var geneAnnotationProvider = new GeneAnnotationProvider(PersistentStreamUtils.GetStreams(
                                                                        GetNgaFileList(saManifestFilePath, saPathPrefix, input.ngaUrls).ToList()));

            var sb         = new StringBuilder(1024 * 1024);
            var jsonObject = new JsonObject(sb);

            sb.Append(JsonObject.OpenBrace);
            jsonObject.AddStringValue(JasixCommons.HeaderSectionTag, GetHeader(geneAnnotationProvider), false);

            //not all gene symbols have annotations. So, we need to check and only output the ones that are not null
            var geneAnnotations = input.geneSymbols.Select(geneSymbol => geneAnnotationProvider.Annotate(geneSymbol))
                                  .Where(annotation => !string.IsNullOrEmpty(annotation))
                                  .ToList();

            jsonObject.AddStringValues("genes", geneAnnotations, false);
            sb.Append(JsonObject.CloseBrace);

            // AWS lambda response message can not be larger than 6MB
            if (sb.Length > 6_000_000)
            {
                throw new UserErrorException("Too many genes provided in the request. Please decrease the number of genes and try again later.");
            }

            return(sb.ToString());
        }
Пример #3
0
        public ValidationResult Run(ValidationConfig config, ILambdaContext context)
        {
            string snsTopicArn = null;

            try
            {
                LogUtilities.UpdateLogger(context.Logger, null);
                LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion);
                LogUtilities.LogObject("Config", config);
                LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey });
                LambdaUtilities.GarbageCollect();
                snsTopicArn = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey);

                config.Validate();
                GenomeAssembly genomeAssembly = GenomeAssemblyHelper.Convert(config.genomeAssembly);

                string nirvanaS3Ref = LambdaUrlHelper.GetRefUrl(genomeAssembly);
                var    refProvider  = ProviderUtilities.GetSequenceProvider(nirvanaS3Ref);

                using (var stream = PersistentStreamUtils.GetReadStream(config.customStrUrl))
                    TryLoadStrFile(stream, genomeAssembly, refProvider);
            }
            catch (Exception exception)
            {
                return(HandleException(config.id, exception, snsTopicArn));
            }

            return(GetSuccessOutput(config.id));
        }
Пример #4
0
        public static IEnumerable <string> GetNgaFileList(string saManifestPath, string saPathPrefix, string[] ngaFiles)
        {
            using (var reader = new StreamReader(PersistentStreamUtils.GetReadStream(saManifestPath)))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    string filePath = saPathPrefix + line;
                    string suffix   = filePath.GetFileSuffix(true);
                    if (suffix == SaCommon.NgaFileSuffix)
                    {
                        yield return(filePath);
                    }
                }
            }

            if (ngaFiles == null)
            {
                yield break;
            }

            foreach (string ngaFile in ngaFiles)
            {
                yield return(ngaFile);
            }
        }
Пример #5
0
        private static AnnotationResources GetAnnotationResources(AnnotationConfig annotationConfig)
        {
            var    genomeAssembly  = GenomeAssemblyHelper.Convert(annotationConfig.genomeAssembly);
            string cachePathPrefix = LambdaUrlHelper.GetCacheFolder().UrlCombine(genomeAssembly.ToString()).UrlCombine(LambdaUrlHelper.DefaultCacheSource);
            string nirvanaS3Ref    = LambdaUrlHelper.GetRefUrl(genomeAssembly);
            string saManifestUrl   = LambdaUtilities.GetManifestUrl(annotationConfig.supplementaryAnnotations ?? "latest", genomeAssembly);
            var    metrics         = new PerformanceMetrics();

            var annotationResources = new AnnotationResources(nirvanaS3Ref, cachePathPrefix,
                                                              saManifestUrl == null? null: new List <string> {
                saManifestUrl
            },
                                                              annotationConfig.customAnnotations,
                                                              annotationConfig.customStrUrl,
                                                              false,
                                                              false,
                                                              false,
                                                              metrics);

            using (var tabixStream = PersistentStreamUtils.GetReadStream(annotationConfig.tabixUrl))
            {
                annotationResources.InputStartVirtualPosition = GetTabixVirtualPosition(annotationConfig.annotationRange, tabixStream, annotationResources.SequenceProvider.RefNameToChromosome);
            }

            Logger.WriteLine($"Tabix position :{annotationResources.InputStartVirtualPosition}");

            return(annotationResources);
        }
Пример #6
0
        private static IEnumerable <AnnotationRange> GetAnnotationRanges(NirvanaConfig config, GenomeAssembly genomeAssembly)
        {
            string cachePathPrefix = LambdaUtilities.GetCachePathPrefix(genomeAssembly);

            IntervalForest <IGene>            geneIntervalForest;
            IDictionary <string, IChromosome> refNameToChromosome;
            List <long> blockOffsets;

            using (var tabixStream = PersistentStreamUtils.GetReadStream(config.tabixUrl))
                using (var tabixReader = new BinaryReader(new BlockGZipStream(tabixStream, CompressionMode.Decompress)))
                    using (var referenceStream = PersistentStreamUtils.GetReadStream(LambdaUrlHelper.GetRefUrl(genomeAssembly)))
                        using (var sequenceProvider = new ReferenceSequenceProvider(referenceStream))
                            using (var taProvider = new TranscriptAnnotationProvider(cachePathPrefix, sequenceProvider, null))
                            {
                                long vcfSize       = HttpUtilities.GetLength(config.vcfUrl);
                                int  numPartitions = Math.Max(Math.Min((int)((vcfSize - 1) / MinPartitionSize + 1), MaxNumPartitions), MinNumPartitions);

                                var tabixIndex = Reader.Read(tabixReader, sequenceProvider.RefNameToChromosome);
                                blockOffsets = PartitionUtilities.GetFileOffsets(config.vcfUrl, numPartitions, tabixIndex);

                                IntervalArray <ITranscript>[] transcriptIntervalArrays = taProvider.TranscriptIntervalArrays;
                                geneIntervalForest  = GeneForestGenerator.GetGeneForest(transcriptIntervalArrays);
                                refNameToChromosome = sequenceProvider.RefNameToChromosome;
                            }

            IEnumerable <AnnotationRange> annotationRanges = PartitionUtilities.GenerateAnnotationRanges(blockOffsets, config.vcfUrl, geneIntervalForest, refNameToChromosome);

            return(annotationRanges);
        }
Пример #7
0
        private static VariantAnnotationsParser GetVariantAnnotationsParserFromCustomTsvStream(PersistentStream customTsvStream)
        {
            var parser = VariantAnnotationsParser.Create(new StreamReader(GZipUtilities.GetAppropriateStream(customTsvStream)));

            parser.SequenceProvider = new ReferenceSequenceProvider(PersistentStreamUtils.GetReadStream(LambdaUrlHelper.GetRefUrl(parser.Assembly)));

            return(parser);
        }
Пример #8
0
        public static Stream GetAppropriateReadStream(string filePath)
        {
            var header = GetHeader(PersistentStreamUtils.GetReadStream(filePath));
            var compressionAlgorithm = IdentifyCompressionAlgorithm(header);
            var fileStream           = PersistentStreamUtils.GetReadStream(filePath);

            return(GetAppropriateStream(fileStream, compressionAlgorithm));
        }
Пример #9
0
        public static Stream GetAppropriateReadStream(string filePath)
        {
            CompressionAlgorithm compressionAlgorithm;

            using (var headerStream = PersistentStreamUtils.GetReadStream(filePath))
            {
                byte[] header = GetHeader(headerStream);
                compressionAlgorithm = IdentifyCompressionAlgorithm(header);
            }

            var fileStream = PersistentStreamUtils.GetReadStream(filePath);

            return(GetAppropriateStream(fileStream, compressionAlgorithm));
        }
Пример #10
0
        private static bool IsGeneAnnotationTsv(string tsvUrl)
        {
            using (var customTsvStream = (PersistentStream)PersistentStreamUtils.GetReadStream(tsvUrl))
                using (var reader = new StreamReader(customTsvStream))
                {
                    reader.ReadLine();
                    string secondLine = reader.ReadLine();
                    if (secondLine == null)
                    {
                        throw new UserErrorException("The input TSV file has less than two lines");
                    }

                    return(secondLine.StartsWith("#geneSymbol"));
                }
        }
Пример #11
0
        private static Stream GetTsvStream(GenomeAssembly genomeAssembly, string customTsvPath)
        {
            //since we are using the executing assembly, we cannot move the following lines about getting stream further upstream.
            var    assembly     = System.Reflection.Assembly.GetExecutingAssembly();
            string resourceName = $"RepeatExpansions.Resources.RepeatExpansions.{genomeAssembly}.tsv";
            var    stream       = customTsvPath != null
                ? PersistentStreamUtils.GetReadStream(customTsvPath)
                : assembly.GetManifestResourceStream(resourceName);

            if (stream == null)
            {
                throw new NullReferenceException("Unable to read from the STR resource file");
            }
            return(stream);
        }
Пример #12
0
                       refNameToChromosome, int numRefSeqs) GetDictionaries(string referencePath)
        {
            IDictionary <ushort, IChromosome> refIndexToChromosome;
            IDictionary <string, IChromosome> refNameToChromosome;
            int numRefSeqs;

            using (var reader = new CompressedSequenceReader(PersistentStreamUtils.GetReadStream(referencePath)))
            {
                refIndexToChromosome = reader.RefIndexToChromosome;
                refNameToChromosome  = reader.RefNameToChromosome;
                numRefSeqs           = reader.NumRefSeqs;
            }

            return(refIndexToChromosome, refNameToChromosome, numRefSeqs);
        }
Пример #13
0
        private static AnnotationResources GetAnnotationResources()
        {
            var annotationResources = new AnnotationResources(_refSequencePath, _inputCachePrefix, SupplementaryAnnotationDirectories, null, _pluginDirectory, _disableRecomposition, _forceMitochondrialAnnotation);

            if (SupplementaryAnnotationDirectories.Count == 0)
            {
                return(annotationResources);
            }

            using (var preloadVcfStream = GZipUtilities.GetAppropriateStream(
                       new PersistentStream(PersistentStreamUtils.GetReadStream(_vcfPath),
                                            ConnectUtilities.GetFileConnectFunc(_vcfPath), 0)))
            {
                annotationResources.GetVariantPositions(preloadVcfStream, null);
            }
            return(annotationResources);
        }
Пример #14
0
        public static IAnnotationProvider GetNsaProvider(AnnotationFiles files)
        {
            if (files == null)
            {
                return(null);
            }
            //todo: use using block to release nsa streams
            var nsaReaders = files.NsaFiles?.Select(x => new NsaReader(PersistentStreamUtils.GetReadStream(x.Nsa), PersistentStreamUtils.GetReadStream(x.Idx)))
                             .OrderBy(x => x.JsonKey, StringComparer.Ordinal).ToArray() ?? new INsaReader[] { };
            //todo: use using block to release nsi streams
            var nsiReaders = files.NsiFiles?.Select(x => NsiReader.Read(PersistentStreamUtils.GetReadStream(x)))
                             .OrderBy(x => x.JsonKey, StringComparer.Ordinal).ToArray() ?? new INsiReader[] { };

            if (nsaReaders.Length == 0 && nsiReaders.Length == 0)
            {
                return(null);
            }

            return(new NsaProvider(nsaReaders, nsiReaders));
        }
Пример #15
0
        private static AnnotationPosition[] AdjustPartitionGenomicStarts(IReadOnlyList <long> blockBasedOffsets, string vcfUrl,
                                                                         IIntervalForest <IGene> geneIntervalForest, IDictionary <string, IChromosome> refNameToChromosome)
        {
            var allAdjustedStarts = new AnnotationPosition[blockBasedOffsets.Count];

            for (var index = 0; index < blockBasedOffsets.Count; index++)
            {
                long blockBasedOffset = blockBasedOffsets[index];

                using (var stream = PersistentStreamUtils.GetReadStream(vcfUrl, blockBasedOffset))
                    using (var gzipStream = new BlockGZipStream(stream, CompressionMode.Decompress))
                    {
                        var annotationPosition = GetFirstGenomicPosition(gzipStream, index == 0);
                        allAdjustedStarts[index] = FindProperStartPosition(annotationPosition, geneIntervalForest, refNameToChromosome);
                    }
            }

            AnnotationPosition[] adjustedStarts = MergeConsecutiveEqualValues(allAdjustedStarts).ToArray();
            return(adjustedStarts);
        }
Пример #16
0
        public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider)
        {
            Name      = "Transcript annotation provider";
            _sequence = sequenceProvider.Sequence;

            var transcriptStream = PersistentStreamUtils.GetReadStream(CacheConstants.TranscriptPath(pathPrefix));

            (_transcriptCache, TranscriptIntervalArrays, VepVersion) = InitiateCache(transcriptStream, sequenceProvider.RefIndexToChromosome, sequenceProvider.Assembly);

            Assembly           = _transcriptCache.Assembly;
            DataSourceVersions = _transcriptCache.DataSourceVersions;


            var siftStream = PersistentStreamUtils.GetReadStream(CacheConstants.SiftPath(pathPrefix));

            _siftReader = new PredictionCacheReader(siftStream, PredictionCacheReader.SiftDescriptions);

            var polyphenStream = PersistentStreamUtils.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix));

            _polyphenReader = new PredictionCacheReader(polyphenStream, PredictionCacheReader.PolyphenDescriptions);
        }
Пример #17
0
        public SearchTestsRemoteMother()
        {
            var chr2 = new Chromosome("chr2", "2", 1);

            var refNameToChromosome = new Dictionary <string, IChromosome>
            {
                [chr2.EnsemblName] = chr2,
                [chr2.UcscName]    = chr2
            };

            Index index;

            using (var stream = PersistentStreamUtils.GetReadStream("https://illumina-annotation.s3.amazonaws.com/Test/Mother.vcf.gz.tbi"))
            {
                index = Reader.GetTabixIndex(stream, refNameToChromosome);
            }

            var vcfStream = PersistentStreamUtils.GetReadStream("https://illumina-annotation.s3.amazonaws.com/Test/Mother.vcf.gz");

            _search = new Search(index, vcfStream);
        }
Пример #18
0
        private static AnnotationResources GetAnnotationResources()
        {
            if (_outputFileName == "-")
            {
                Logger.Silence();
            }
            var metrics = new PerformanceMetrics();

            var annotationResources = new AnnotationResources(_refSequencePath, _inputCachePrefix,
                                                              SupplementaryAnnotationDirectories, null, _customStrTsv,
                                                              _disableRecomposition, _forceMitochondrialAnnotation, _useLegacyVids, metrics);

            if (SupplementaryAnnotationDirectories.Count == 0)
            {
                return(annotationResources);
            }

            using (var preloadVcfStream = GZipUtilities.GetAppropriateStream(PersistentStreamUtils.GetReadStream(_vcfPath)))
            {
                annotationResources.GetVariantPositions(preloadVcfStream, null);
            }

            return(annotationResources);
        }
Пример #19
0
        public static CustomResult Create(CustomConfig config, string inputFileName, CustomResult result, IS3Client s3Client)
        {
            string tempPath        = Path.GetTempPath();
            string inputBaseName   = inputFileName.TrimEndFromFirst(".tsv");
            string nsaFileName     = inputBaseName + SaCommon.SaFileSuffix;
            string localNsaPath    = Path.Combine(tempPath, nsaFileName);
            string localIndexPath  = localNsaPath + SaCommon.IndexSufix;
            string localSchemaPath = localNsaPath + SaCommon.JsonSchemaSuffix;

            var outputFiles = new List <string>();

            using (var aes = new AesCryptoServiceProvider())
            {
                FileMetadata nsaMetadata, indexMetadata, schemaMetadata;

                List <CustomInterval> intervals;
                string            jsonTag;
                SaJsonSchema      intervalJsonSchema;
                DataSourceVersion version;
                GenomeAssembly    genomeAssembly;
                int       nsaItemsCount;
                ReportFor reportFor;

                using (var customTsvStream = (PersistentStream)PersistentStreamUtils.GetReadStream(config.tsvUrl))
                    using (var parser = GetVariantAnnotationsParserFromCustomTsvStream(customTsvStream))
                        //
                        using (var nsaStream = FileUtilities.GetCreateStream(localNsaPath))
                            using (var nsaCryptoStream = new CryptoStream(nsaStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                using (var nsaMd5Stream = new MD5Stream(nsaCryptoStream))
                                    //
                                    using (var indexStream = FileUtilities.GetCreateStream(localIndexPath))
                                        using (var indexCryptoStream = new CryptoStream(indexStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                            using (var indexMd5Stream = new MD5Stream(indexCryptoStream))
                                                //
                                                using (var schemaStream = FileUtilities.GetCreateStream(localSchemaPath))
                                                    using (var schemaCryptoStream = new CryptoStream(schemaStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                                        using (var schemaMd5Stream = new MD5Stream(schemaCryptoStream))
                                                        {
                                                            genomeAssembly        = parser.Assembly;
                                                            result.genomeAssembly = genomeAssembly.ToString();
                                                            reportFor             = parser.ReportFor;

                                                            using (var nsaWriter = CaUtilities.GetNsaWriter(nsaMd5Stream, indexMd5Stream, parser, inputFileName, parser.SequenceProvider, out version))
                                                                using (var schemaWriter = new StreamWriter(schemaMd5Stream))
                                                                {
                                                                    (jsonTag, nsaItemsCount, intervalJsonSchema, intervals) = CaUtilities.WriteSmallVariants(parser, nsaWriter, schemaWriter);
                                                                }

                                                            nsaMetadata    = nsaMd5Stream.GetFileMetadata();
                                                            indexMetadata  = indexMd5Stream.GetFileMetadata();
                                                            schemaMetadata = schemaMd5Stream.GetFileMetadata();
                                                        }

                if (nsaItemsCount > 0)
                {
                    string nsaS3Path    = string.Join('/', config.outputDir.path.Trim('/'), nsaFileName);
                    string indexS3Path  = nsaS3Path + SaCommon.IndexSufix;
                    string schemaS3Path = nsaS3Path + SaCommon.JsonSchemaSuffix;

                    s3Client.DecryptUpload(config.outputDir.bucketName, nsaS3Path, localNsaPath, aes, nsaMetadata);
                    s3Client.DecryptUpload(config.outputDir.bucketName, indexS3Path, localIndexPath, aes,
                                           indexMetadata);
                    s3Client.DecryptUpload(config.outputDir.bucketName, schemaS3Path, localSchemaPath, aes,
                                           schemaMetadata);

                    outputFiles.Add(nsaFileName);
                    outputFiles.Add(nsaFileName + SaCommon.IndexSufix);
                    outputFiles.Add(nsaFileName + SaCommon.JsonSchemaSuffix);
                }

                if (intervals == null)
                {
                    return(CustomAnnotationLambda.GetSuccessResult(config, result, outputFiles));
                }

                FileMetadata nsiMetadata, nsiSchemaMetadata;
                string       nsiFileName        = inputBaseName + SaCommon.SiFileSuffix;
                string       localNsiPath       = Path.Combine(tempPath, nsiFileName);
                string       localNsiSchemaPath = localNsiPath + SaCommon.JsonSchemaSuffix;
                //
                using (var nsiStream = FileUtilities.GetCreateStream(localNsiPath))
                    using (var nsiCryptoStream = new CryptoStream(nsiStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                        using (var nsiMd5Stream = new MD5Stream(nsiCryptoStream))
                            //
                            using (var nsiSchemaSteam = FileUtilities.GetCreateStream(localNsiSchemaPath))
                                using (var nsiSchemaCryptoStream =
                                           new CryptoStream(nsiSchemaSteam, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                    using (var nsiSchemaMd5Stream = new MD5Stream(nsiSchemaCryptoStream))
                                    {
                                        using (var nsiWriter = CaUtilities.GetNsiWriter(nsiMd5Stream, version, genomeAssembly, jsonTag, reportFor))
                                            using (var schemaWriter = new StreamWriter(nsiSchemaMd5Stream))
                                            {
                                                nsiWriter.Write(intervals);
                                                schemaWriter.Write(intervalJsonSchema);
                                            }

                                        nsiMetadata       = nsiMd5Stream.GetFileMetadata();
                                        nsiSchemaMetadata = nsiSchemaMd5Stream.GetFileMetadata();
                                    }

                string nsiS3Path           = string.Join('/', config.outputDir.path.Trim('/'), nsiFileName);
                string nsiSchemaS3PathFile = nsiS3Path + SaCommon.JsonSchemaSuffix;

                s3Client.DecryptUpload(config.outputDir.bucketName, nsiS3Path, localNsiPath, aes, nsiMetadata);
                s3Client.DecryptUpload(config.outputDir.bucketName, nsiSchemaS3PathFile, localNsiSchemaPath, aes,
                                       nsiSchemaMetadata);

                outputFiles.Add(nsiFileName);
                outputFiles.Add(nsiFileName + SaCommon.JsonSchemaSuffix);
            }

            LambdaUtilities.DeleteTempOutput();

            return(CustomAnnotationLambda.GetSuccessResult(config, result, outputFiles));
        }
Пример #20
0
 public static IGeneAnnotationProvider GetGeneAnnotationProvider(AnnotationFiles files) => files?.NsiFiles == null ? null : new GeneAnnotationProvider(PersistentStreamUtils.GetStreams(files.NgaFiles));
Пример #21
0
 public static IRefMinorProvider GetRefMinorProvider(AnnotationFiles files) =>
 files == null || files.RefMinorFile == default ? null :
 new RefMinorProvider(PersistentStreamUtils.GetReadStream(files.RefMinorFile.Rma),
                      PersistentStreamUtils.GetReadStream(files.RefMinorFile.Idx));
Пример #22
0
 public static IAnnotationProvider GetLcrProvider(AnnotationFiles files) =>
 files?.LowComplexityRegionFile == null
         ? null
         : new LcrProvider(PersistentStreamUtils.GetReadStream(files.LowComplexityRegionFile));
Пример #23
0
 public static IAnnotationProvider GetConservationProvider(AnnotationFiles files) =>
 files == null || files.ConservationFile == default
         ? null
         : new ConservationScoreProvider(PersistentStreamUtils.GetReadStream(files.ConservationFile.Npd),
                                         PersistentStreamUtils.GetReadStream(files.ConservationFile.Idx));
Пример #24
0
 public static ProteinConservationProvider GetProteinConservationProvider(AnnotationFiles files) =>
 files == null || string.IsNullOrEmpty(files.ProteinConservationFile)
         ? null
         : new ProteinConservationProvider(PersistentStreamUtils.GetReadStream(files.ProteinConservationFile));
Пример #25
0
 public static ISequenceProvider GetSequenceProvider(string compressedReferencePath)
 {
     return(new ReferenceSequenceProvider(PersistentStreamUtils.GetReadStream(compressedReferencePath)));
 }
Пример #26
0
        // ReSharper disable once UnusedMember.Global
        public AnnotationResult Run(AnnotationConfig config, ILambdaContext context)
        {
            var result = new AnnotationResult {
                id = config.id
            };
            string snsTopicArn = null;
            var    runLog      = new StringBuilder();

            try
            {
                LogUtilities.UpdateLogger(context.Logger, runLog);
                LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion);
                LogUtilities.LogObject("Config", config);
                LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey });

                LambdaUtilities.GarbageCollect();
                LambdaUtilities.DeleteTempOutput();

                snsTopicArn = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey);

                string vcfUrl = config.vcfUrl;

                using (var annotationResources = GetAnnotationResources(config))
                {
                    if (annotationResources.InputStartVirtualPosition == -1)
                    {
                        return(GetSuccessOutput(result));
                    }

                    long fileOffset = VirtualPosition.From(annotationResources.InputStartVirtualPosition).FileOffset;

                    using (var preloadVcfStream = PersistentStreamUtils.GetReadStream(vcfUrl, fileOffset))
                    {
                        annotationResources.GetVariantPositions(new BlockGZipStream(preloadVcfStream, CompressionMode.Decompress), config.annotationRange.ToGenomicRange(annotationResources.SequenceProvider.RefNameToChromosome));
                    }

                    Logger.WriteLine("Scan for positions to preload complete.");

                    using (var aes = new AesCryptoServiceProvider())
                    {
                        FileMetadata jsonMetadata, jasixMetadata;
                        string       jsonPath  = Path.GetTempPath() + LambdaUrlHelper.JsonSuffix;
                        string       jasixPath = jsonPath + LambdaUrlHelper.JsonIndexSuffix;

                        using (var inputVcfStream = new BlockGZipStream(PersistentStreamUtils.GetReadStream(vcfUrl, fileOffset), CompressionMode.Decompress))
                            using (var headerStream = config.annotationRange == null ? null : new BlockGZipStream(PersistentStreamUtils.GetReadStream(vcfUrl), CompressionMode.Decompress))
                                //
                                using (var jsonFileStream = FileUtilities.GetCreateStream(jsonPath))
                                    using (var jsonCryptoStream = new CryptoStream(jsonFileStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                        using (var jsonMd5Stream = new MD5Stream(jsonCryptoStream))
                                            //
                                            using (var jasixFileStream = FileUtilities.GetCreateStream(jasixPath))
                                                using (var jasixCryptoStream = new CryptoStream(jasixFileStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                                    using (var jasixMd5Stream = new MD5Stream(jasixCryptoStream))
                                                    {
                                                        IVcfFilter vcfFilter = config.annotationRange == null
                                ? new NullVcfFilter() as IVcfFilter
                                : new VcfFilter(config.annotationRange.ToGenomicRange(annotationResources.SequenceProvider.RefNameToChromosome));

                                                        using (var jsonCompressStream = new BlockGZipStream(jsonMd5Stream, CompressionMode.Compress))
                                                        {
                                                            StreamAnnotation.Annotate(headerStream, inputVcfStream, jsonCompressStream, jasixMd5Stream, annotationResources, vcfFilter, true);
                                                        }

                                                        Logger.WriteLine("Annotation done.");

                                                        jsonMetadata  = jsonMd5Stream.GetFileMetadata();
                                                        jasixMetadata = jasixMd5Stream.GetFileMetadata();
                                                    }

                        result.filePath = S3Utilities.GetKey(config.outputDir.path, config.outputPrefix + LambdaUrlHelper.JsonSuffix);
                        string jasixKey = result.filePath + LambdaUrlHelper.JsonIndexSuffix;

                        var s3Client = config.outputDir.GetS3Client(context.RemainingTime);
                        s3Client.DecryptUpload(config.outputDir.bucketName, jasixKey, jasixPath, aes, jasixMetadata);
                        s3Client.DecryptUpload(config.outputDir.bucketName, result.filePath, jsonPath, aes, jsonMetadata);

                        Logger.WriteLine("Nirvana result files uploaded.");
                    }
                }

                LambdaUtilities.DeleteTempOutput();
                if (string.IsNullOrEmpty(result.filePath))
                {
                    throw new FileNotFoundException();
                }

                return(GetSuccessOutput(result));
            }
            catch (Exception exception)
            {
                LambdaUtilities.DeleteTempOutput();
                return(HandleException(runLog, result, exception, snsTopicArn));
            }
        }
Пример #27
0
 private static CompressedSequenceReader GetSequenceReader(string referencePath) =>
 new CompressedSequenceReader(PersistentStreamUtils.GetReadStream(referencePath));
Пример #28
0
        public static CustomResult Create(CustomConfig config, string inputBaseName, CustomResult result, IS3Client s3Client)
        {
            string ngaFileName     = inputBaseName + SaCommon.NgaFileSuffix;
            string localNgaPath    = Path.Combine(Path.GetTempPath(), ngaFileName);
            string localSchemaPath = localNgaPath + SaCommon.JsonSchemaSuffix;
            string localLogPath    = Path.Combine(Path.GetTempPath(), LogFileName);

            HttpUtilities.ValidateUrl(LambdaUrlHelper.GetUgaUrl());
            var outputFiles = new List <string>();

            using (var aes = new AesCryptoServiceProvider())
            {
                FileMetadata ngaMetadata, schemaMetadata, logMetaData;
                using (var logStream = FileUtilities.GetCreateStream(localLogPath))
                    using (var logCryptoStream = new CryptoStream(logStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                        using (var logMd5Stream = new MD5Stream(logCryptoStream))
                            //
                            using (var customTsvStream = (PersistentStream)PersistentStreamUtils.GetReadStream(config.tsvUrl))
                                using (var parser = GetGeneAnnotationsParserFromCustomTsvStream(customTsvStream))
                                    //
                                    using (var ngaStream = FileUtilities.GetCreateStream(localNgaPath))
                                        using (var ngaCryptoStream = new CryptoStream(ngaStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                            using (var ngaMd5Stream = new MD5Stream(ngaCryptoStream))
                                                //
                                                using (var schemaStream = FileUtilities.GetCreateStream(localSchemaPath))
                                                    using (var schemaCryptoStream = new CryptoStream(schemaStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                                        using (var schemaMd5Stream = new MD5Stream(schemaCryptoStream))
                                                        {
                                                            using (var ngaWriter = CaUtilities.GetNgaWriter(ngaMd5Stream, parser, config.tsvUrl))
                                                                using (var schemaWriter = new StreamWriter(schemaMd5Stream))
                                                                    using (var logWriter = new StreamWriter(logMd5Stream))
                                                                    {
                                                                        ngaWriter.Write(parser.GetItems(config.skipGeneIdValidation, logWriter));
                                                                        schemaWriter.Write(parser.JsonSchema);
                                                                    }
                                                            //all the writers have to be disposed before GetFileMetaData is called

                                                            ngaMetadata    = ngaMd5Stream.GetFileMetadata();
                                                            schemaMetadata = schemaMd5Stream.GetFileMetadata();
                                                            logMetaData    = logMd5Stream.GetFileMetadata();
                                                        }

                if (config.skipGeneIdValidation)
                {
                    string logS3Key = string.Join('/', config.outputDir.path.Trim('/'), LogFileName);
                    Logger.WriteLine("uploading log file to " + logS3Key);
                    s3Client.DecryptUpload(config.outputDir.bucketName, logS3Key, localLogPath, aes, logMetaData);
                }

                string nsaS3Path    = string.Join('/', config.outputDir.path.Trim('/'), ngaFileName);
                string schemaS3Path = nsaS3Path + SaCommon.JsonSchemaSuffix;

                s3Client.DecryptUpload(config.outputDir.bucketName, nsaS3Path, localNgaPath, aes, ngaMetadata);
                s3Client.DecryptUpload(config.outputDir.bucketName, schemaS3Path, localSchemaPath, aes, schemaMetadata);


                outputFiles.Add(ngaFileName);
                outputFiles.Add(ngaFileName + SaCommon.JsonSchemaSuffix);

                LambdaUtilities.DeleteTempOutput();

                return(CustomAnnotationLambda.GetSuccessResult(config, result, outputFiles));
            }
        }