Beispiel #1
0
 private VcfReader(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider,
                   IRefMinorProvider refMinorProvider, IVcfFilter vcfFilter)
 {
     _headerReader        = headerReader;
     _reader              = vcfLineReader;
     _variantFactory      = new VariantFactory(sequenceProvider);
     _refMinorProvider    = refMinorProvider;
     _vcfFilter           = vcfFilter;
     _refNameToChromosome = sequenceProvider.RefNameToChromosome;
 }
Beispiel #2
0
 private VcfReader(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider,
                   IRefMinorProvider refMinorProvider, IVcfFilter vcfFilter, IVariantIdCreator vidCreator, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider)
 {
     _headerReader             = headerReader;
     _reader                   = vcfLineReader;
     _variantFactory           = new VariantFactory(sequenceProvider.Sequence, vidCreator);
     _sequenceProvider         = sequenceProvider;
     _refMinorProvider         = refMinorProvider;
     _vcfFilter                = vcfFilter;
     _refNameToChromosome      = sequenceProvider.RefNameToChromosome;
     _mitoHeteroplasmyProvider = mitoHeteroplasmyProvider;
 }
Beispiel #3
0
        private static VcfReader GetVcfReader(Stream headerStream, Stream vcfStream, IAnnotationResources annotationResources,
                                              IVcfFilter vcfFilter, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider)
        {
            var vcfReader = FileUtilities.GetStreamReader(vcfStream);

            StreamReader headerReader;

            if (headerStream == null)
            {
                headerReader = vcfReader;
            }
            else
            {
                headerReader       = FileUtilities.GetStreamReader(headerStream);
                vcfStream.Position = Tabix.VirtualPosition.From(annotationResources.InputStartVirtualPosition).BlockOffset;
            }

            return(VcfReader.Create(headerReader, vcfReader, annotationResources.SequenceProvider,
                                    annotationResources.RefMinorProvider, annotationResources.Recomposer, vcfFilter, annotationResources.VidCreator, mitoHeteroplasmyProvider));
        }
Beispiel #4
0
        public static SimplePosition GetSimplePosition(string[] vcfFields, IVcfFilter vcfFilter, IDictionary <string, IChromosome> refNameToChromosome, bool isRecomposed = false)
        {
            var simplePosition = new SimplePosition(
                ReferenceNameUtilities.GetChromosome(refNameToChromosome, vcfFields[VcfCommon.ChromIndex]),
                int.Parse(vcfFields[VcfCommon.PosIndex]),
                vcfFields[VcfCommon.RefIndex],
                vcfFields[VcfCommon.AltIndex].OptimizedSplit(','));

            if (vcfFilter.PassedTheEnd(simplePosition.Chromosome, simplePosition.Start))
            {
                return(null);
            }

            simplePosition.End          = vcfFields[VcfCommon.AltIndex].OptimizedStartsWith('<') || vcfFields[VcfCommon.AltIndex] == "*" ? -1 : simplePosition.Start + simplePosition.RefAllele.Length - 1;
            simplePosition.VcfFields    = vcfFields;
            simplePosition.IsRecomposed = isRecomposed;
            simplePosition.IsDecomposed = new bool[simplePosition.AltAlleles.Length]; // false by default
            simplePosition.Vids         = new string[simplePosition.AltAlleles.Length];
            simplePosition.LinkedVids   = new List <string> [simplePosition.AltAlleles.Length];
            return(simplePosition);
        }
Beispiel #5
0
        public static VcfReader Create(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider,
                                       IRefMinorProvider refMinorProvider, IRecomposer recomposer, IVcfFilter vcfFilter, IVariantIdCreator vidCreator, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, bool enableDq = false)
        {
            var vcfReader = new VcfReader(headerReader, vcfLineReader, sequenceProvider, refMinorProvider, vcfFilter, vidCreator, mitoHeteroplasmyProvider, enableDq);

            vcfReader.ParseHeader();
            vcfReader.SetRecomposer(recomposer);
            return(vcfReader);
        }
Beispiel #6
0
        // ReSharper disable once UnusedMember.Global
        public AnnotationResult Run(AnnotationConfig config, ILambdaContext context)
        {
            var result = new AnnotationResult {
                id = config.id
            };
            string snsTopicArn = null;
            var    runLog      = new StringBuilder();

            try
            {
                LogUtilities.UpdateLogger(context.Logger, runLog);
                LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion);
                LogUtilities.LogObject("Config", config);
                LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey });

                LambdaUtilities.GarbageCollect();
                LambdaUtilities.DeleteTempOutput();

                snsTopicArn = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey);

                string vcfUrl = config.vcfUrl;

                using (var annotationResources = GetAnnotationResources(config))
                {
                    if (annotationResources.InputStartVirtualPosition == -1)
                    {
                        return(GetSuccessOutput(result));
                    }

                    long fileOffset = VirtualPosition.From(annotationResources.InputStartVirtualPosition).FileOffset;

                    using (var preloadVcfStream = PersistentStreamUtils.GetReadStream(vcfUrl, fileOffset))
                    {
                        annotationResources.GetVariantPositions(new BlockGZipStream(preloadVcfStream, CompressionMode.Decompress), config.annotationRange.ToGenomicRange(annotationResources.SequenceProvider.RefNameToChromosome));
                    }

                    Logger.WriteLine("Scan for positions to preload complete.");

                    using (var aes = new AesCryptoServiceProvider())
                    {
                        FileMetadata jsonMetadata, jasixMetadata;
                        string       jsonPath  = Path.GetTempPath() + LambdaUrlHelper.JsonSuffix;
                        string       jasixPath = jsonPath + LambdaUrlHelper.JsonIndexSuffix;

                        using (var inputVcfStream = new BlockGZipStream(PersistentStreamUtils.GetReadStream(vcfUrl, fileOffset), CompressionMode.Decompress))
                            using (var headerStream = config.annotationRange == null ? null : new BlockGZipStream(PersistentStreamUtils.GetReadStream(vcfUrl), CompressionMode.Decompress))
                                //
                                using (var jsonFileStream = FileUtilities.GetCreateStream(jsonPath))
                                    using (var jsonCryptoStream = new CryptoStream(jsonFileStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                        using (var jsonMd5Stream = new MD5Stream(jsonCryptoStream))
                                            //
                                            using (var jasixFileStream = FileUtilities.GetCreateStream(jasixPath))
                                                using (var jasixCryptoStream = new CryptoStream(jasixFileStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                                    using (var jasixMd5Stream = new MD5Stream(jasixCryptoStream))
                                                    {
                                                        IVcfFilter vcfFilter = config.annotationRange == null
                                ? new NullVcfFilter() as IVcfFilter
                                : new VcfFilter(config.annotationRange.ToGenomicRange(annotationResources.SequenceProvider.RefNameToChromosome));

                                                        using (var jsonCompressStream = new BlockGZipStream(jsonMd5Stream, CompressionMode.Compress))
                                                        {
                                                            StreamAnnotation.Annotate(headerStream, inputVcfStream, jsonCompressStream, jasixMd5Stream, annotationResources, vcfFilter, true);
                                                        }

                                                        Logger.WriteLine("Annotation done.");

                                                        jsonMetadata  = jsonMd5Stream.GetFileMetadata();
                                                        jasixMetadata = jasixMd5Stream.GetFileMetadata();
                                                    }

                        result.filePath = S3Utilities.GetKey(config.outputDir.path, config.outputPrefix + LambdaUrlHelper.JsonSuffix);
                        string jasixKey = result.filePath + LambdaUrlHelper.JsonIndexSuffix;

                        var s3Client = config.outputDir.GetS3Client(context.RemainingTime);
                        s3Client.DecryptUpload(config.outputDir.bucketName, jasixKey, jasixPath, aes, jasixMetadata);
                        s3Client.DecryptUpload(config.outputDir.bucketName, result.filePath, jsonPath, aes, jsonMetadata);

                        Logger.WriteLine("Nirvana result files uploaded.");
                    }
                }

                LambdaUtilities.DeleteTempOutput();
                if (string.IsNullOrEmpty(result.filePath))
                {
                    throw new FileNotFoundException();
                }

                return(GetSuccessOutput(result));
            }
            catch (Exception exception)
            {
                LambdaUtilities.DeleteTempOutput();
                return(HandleException(runLog, result, exception, snsTopicArn));
            }
        }
Beispiel #7
0
        public static ExitCodes Annotate(Stream headerStream, Stream inputVcfStream, Stream outputJsonStream,
                                         Stream outputJsonIndexStream, AnnotationResources annotationResources, IVcfFilter vcfFilter,
                                         bool ignoreEmptyChromosome)
        {
            var metrics = annotationResources.Metrics;

            PerformanceMetrics.ShowAnnotationHeader();

            IChromosome currentChromosome = new EmptyChromosome("dummy");
            int         numVariants       = 0;
            IMitoHeteroplasmyProvider mitoHeteroplasmyProvider = MitoHeteroplasmyReader.GetProvider();

            using (var vcfReader = GetVcfReader(headerStream, inputVcfStream, annotationResources, vcfFilter, mitoHeteroplasmyProvider))
                using (var jsonWriter = new JsonWriter(outputJsonStream, outputJsonIndexStream, annotationResources, Date.CurrentTimeStamp, vcfReader.GetSampleNames(), false))
                {
                    try
                    {
                        CheckGenomeAssembly(annotationResources, vcfReader);
                        SetMitochondrialAnnotationBehavior(annotationResources, vcfReader);

                        IPosition position;

                        while ((position = vcfReader.GetNextPosition()) != null)
                        {
                            IChromosome chromosome = position.Chromosome;
                            if (ignoreEmptyChromosome && chromosome.IsEmpty())
                            {
                                continue;
                            }

                            if (chromosome.Index != currentChromosome.Index)
                            {
                                if (!currentChromosome.IsEmpty())
                                {
                                    metrics.ShowAnnotationEntry(currentChromosome, numVariants);
                                }

                                numVariants = 0;

                                metrics.Preload.Start();
                                annotationResources.PreLoad(chromosome);
                                metrics.Preload.Stop();

                                metrics.Annotation.Start();
                                currentChromosome = chromosome;
                            }

                            var annotatedPosition = position.Variants != null?annotationResources.Annotator.Annotate(position) : null;

                            string json = annotatedPosition?.GetJsonString();
                            if (json != null)
                            {
                                jsonWriter.WritePosition(annotatedPosition.Position, json);
                            }

                            numVariants++;
                        }

                        jsonWriter.WriteGenes(annotationResources.Annotator.GetGeneAnnotations());
                    }
                    catch (Exception e)
                    {
                        e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine;
                        throw;
                    }
                }

            if (!currentChromosome.IsEmpty())
            {
                metrics.ShowAnnotationEntry(currentChromosome, numVariants);
            }

            metrics.ShowSummaryTable();

            return(ExitCodes.Success);
        }
Beispiel #8
0
 public static SimplePosition GetSimplePosition(string vcfLine, IVcfFilter vcfFilter,
                                                IDictionary <string, IChromosome> refNameToChromosome) => vcfLine == null ? null :
 GetSimplePosition(vcfLine.OptimizedSplit('\t'), vcfFilter, refNameToChromosome);
Beispiel #9
0
        public static ExitCodes Annotate(Stream headerStream, Stream inputVcfStream, Stream outputJsonStream,
                                         Stream outputJsonIndexStream, AnnotationResources annotationResources, IVcfFilter vcfFilter, bool ignoreEmptyChromosome = false)
        {
            var logger  = outputJsonStream is BlockGZipStream ? new ConsoleLogger() : (ILogger) new NullLogger();
            var metrics = new PerformanceMetrics(logger);

            using (annotationResources)
                using (var vcfReader = GetVcfReader(headerStream, inputVcfStream, annotationResources, vcfFilter))
                    using (var jsonWriter = new JsonWriter(outputJsonStream, outputJsonIndexStream, annotationResources, Date.CurrentTimeStamp, vcfReader.GetSampleNames(), false))
                    {
                        try
                        {
                            CheckGenomeAssembly(annotationResources, vcfReader);
                            SetMitochondrialAnnotationBehavior(annotationResources, vcfReader);

                            int       previousChromIndex = -1;
                            IPosition position;

                            while ((position = vcfReader.GetNextPosition()) != null)
                            {
                                if (ignoreEmptyChromosome && position.Chromosome.IsEmpty())
                                {
                                    continue;
                                }
                                if (previousChromIndex != position.Chromosome.Index)
                                {
                                    annotationResources.PreLoad(position.Chromosome);
                                }
                                previousChromIndex = UpdatePerformanceMetrics(previousChromIndex, position.Chromosome, metrics);

                                var annotatedPosition = position.Variants != null?annotationResources.Annotator.Annotate(position) : null;

                                string json = annotatedPosition?.GetJsonString();
                                if (json != null)
                                {
                                    jsonWriter.WriteJsonEntry(annotatedPosition.Position, json);
                                }

                                metrics.Increment();
                            }

                            jsonWriter.WriteAnnotatedGenes(annotationResources.Annotator.GetGeneAnnotations());
                        }
                        catch (Exception e)
                        {
                            e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine;
                            throw;
                        }
                    }

            metrics.ShowAnnotationTime();

            return(ExitCodes.Success);
        }
Beispiel #10
0
        public static SimplePosition GetSimplePosition(IChromosome chromosome, int position, string[] vcfFields, IVcfFilter vcfFilter, bool isRecomposed = false)
        {
            if (vcfFilter.PassedTheEnd(chromosome, position))
            {
                return(null);
            }

            string refAllele      = vcfFields[VcfCommon.RefIndex];
            string altAlleleField = vcfFields[VcfCommon.AltIndex];

            string[] altAlleles    = altAlleleField.OptimizedSplit(',');
            int      numAltAlleles = altAlleles.Length;

            return(new SimplePosition(chromosome, position, refAllele, altAlleles)
            {
                End = altAlleleField.OptimizedStartsWith('<') || altAlleleField == "*" ? -1 : position + refAllele.Length - 1,
                VcfFields = vcfFields,
                IsRecomposed = isRecomposed,
                IsDecomposed = new bool[numAltAlleles],
                Vids = new string[numAltAlleles],
                LinkedVids = new List <string> [numAltAlleles]
            });
        }
Beispiel #11
0
        public static VcfReader Create(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider,
                                       IRefMinorProvider refMinorProvider, IRecomposer recomposer, IVcfFilter vcfFilter, IVariantIdCreator vidCreator)
        {
            var vcfReader = new VcfReader(headerReader, vcfLineReader, sequenceProvider, refMinorProvider, vcfFilter, vidCreator);

            vcfReader.ParseHeader();
            vcfReader.SetRecomposer(recomposer);
            return(vcfReader);
        }