private VcfReader(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider, IVcfFilter vcfFilter) { _headerReader = headerReader; _reader = vcfLineReader; _variantFactory = new VariantFactory(sequenceProvider); _refMinorProvider = refMinorProvider; _vcfFilter = vcfFilter; _refNameToChromosome = sequenceProvider.RefNameToChromosome; }
private VcfReader(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider, IVcfFilter vcfFilter, IVariantIdCreator vidCreator, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider) { _headerReader = headerReader; _reader = vcfLineReader; _variantFactory = new VariantFactory(sequenceProvider.Sequence, vidCreator); _sequenceProvider = sequenceProvider; _refMinorProvider = refMinorProvider; _vcfFilter = vcfFilter; _refNameToChromosome = sequenceProvider.RefNameToChromosome; _mitoHeteroplasmyProvider = mitoHeteroplasmyProvider; }
private static VcfReader GetVcfReader(Stream headerStream, Stream vcfStream, IAnnotationResources annotationResources, IVcfFilter vcfFilter, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider) { var vcfReader = FileUtilities.GetStreamReader(vcfStream); StreamReader headerReader; if (headerStream == null) { headerReader = vcfReader; } else { headerReader = FileUtilities.GetStreamReader(headerStream); vcfStream.Position = Tabix.VirtualPosition.From(annotationResources.InputStartVirtualPosition).BlockOffset; } return(VcfReader.Create(headerReader, vcfReader, annotationResources.SequenceProvider, annotationResources.RefMinorProvider, annotationResources.Recomposer, vcfFilter, annotationResources.VidCreator, mitoHeteroplasmyProvider)); }
public static SimplePosition GetSimplePosition(string[] vcfFields, IVcfFilter vcfFilter, IDictionary <string, IChromosome> refNameToChromosome, bool isRecomposed = false) { var simplePosition = new SimplePosition( ReferenceNameUtilities.GetChromosome(refNameToChromosome, vcfFields[VcfCommon.ChromIndex]), int.Parse(vcfFields[VcfCommon.PosIndex]), vcfFields[VcfCommon.RefIndex], vcfFields[VcfCommon.AltIndex].OptimizedSplit(',')); if (vcfFilter.PassedTheEnd(simplePosition.Chromosome, simplePosition.Start)) { return(null); } simplePosition.End = vcfFields[VcfCommon.AltIndex].OptimizedStartsWith('<') || vcfFields[VcfCommon.AltIndex] == "*" ? -1 : simplePosition.Start + simplePosition.RefAllele.Length - 1; simplePosition.VcfFields = vcfFields; simplePosition.IsRecomposed = isRecomposed; simplePosition.IsDecomposed = new bool[simplePosition.AltAlleles.Length]; // false by default simplePosition.Vids = new string[simplePosition.AltAlleles.Length]; simplePosition.LinkedVids = new List <string> [simplePosition.AltAlleles.Length]; return(simplePosition); }
public static VcfReader Create(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider, IRecomposer recomposer, IVcfFilter vcfFilter, IVariantIdCreator vidCreator, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, bool enableDq = false) { var vcfReader = new VcfReader(headerReader, vcfLineReader, sequenceProvider, refMinorProvider, vcfFilter, vidCreator, mitoHeteroplasmyProvider, enableDq); vcfReader.ParseHeader(); vcfReader.SetRecomposer(recomposer); return(vcfReader); }
// ReSharper disable once UnusedMember.Global public AnnotationResult Run(AnnotationConfig config, ILambdaContext context) { var result = new AnnotationResult { id = config.id }; string snsTopicArn = null; var runLog = new StringBuilder(); try { LogUtilities.UpdateLogger(context.Logger, runLog); LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion); LogUtilities.LogObject("Config", config); LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey }); LambdaUtilities.GarbageCollect(); LambdaUtilities.DeleteTempOutput(); snsTopicArn = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey); string vcfUrl = config.vcfUrl; using (var annotationResources = GetAnnotationResources(config)) { if (annotationResources.InputStartVirtualPosition == -1) { return(GetSuccessOutput(result)); } long fileOffset = VirtualPosition.From(annotationResources.InputStartVirtualPosition).FileOffset; using (var preloadVcfStream = PersistentStreamUtils.GetReadStream(vcfUrl, fileOffset)) { annotationResources.GetVariantPositions(new BlockGZipStream(preloadVcfStream, CompressionMode.Decompress), config.annotationRange.ToGenomicRange(annotationResources.SequenceProvider.RefNameToChromosome)); } Logger.WriteLine("Scan for positions to preload complete."); using (var aes = new AesCryptoServiceProvider()) { FileMetadata jsonMetadata, jasixMetadata; string jsonPath = Path.GetTempPath() + LambdaUrlHelper.JsonSuffix; string jasixPath = jsonPath + LambdaUrlHelper.JsonIndexSuffix; using (var inputVcfStream = new BlockGZipStream(PersistentStreamUtils.GetReadStream(vcfUrl, fileOffset), CompressionMode.Decompress)) using (var headerStream = config.annotationRange == null ? null : new BlockGZipStream(PersistentStreamUtils.GetReadStream(vcfUrl), CompressionMode.Decompress)) // using (var jsonFileStream = FileUtilities.GetCreateStream(jsonPath)) using (var jsonCryptoStream = new CryptoStream(jsonFileStream, aes.CreateEncryptor(), CryptoStreamMode.Write)) using (var jsonMd5Stream = new MD5Stream(jsonCryptoStream)) // using (var jasixFileStream = FileUtilities.GetCreateStream(jasixPath)) using (var jasixCryptoStream = new CryptoStream(jasixFileStream, aes.CreateEncryptor(), CryptoStreamMode.Write)) using (var jasixMd5Stream = new MD5Stream(jasixCryptoStream)) { IVcfFilter vcfFilter = config.annotationRange == null ? new NullVcfFilter() as IVcfFilter : new VcfFilter(config.annotationRange.ToGenomicRange(annotationResources.SequenceProvider.RefNameToChromosome)); using (var jsonCompressStream = new BlockGZipStream(jsonMd5Stream, CompressionMode.Compress)) { StreamAnnotation.Annotate(headerStream, inputVcfStream, jsonCompressStream, jasixMd5Stream, annotationResources, vcfFilter, true); } Logger.WriteLine("Annotation done."); jsonMetadata = jsonMd5Stream.GetFileMetadata(); jasixMetadata = jasixMd5Stream.GetFileMetadata(); } result.filePath = S3Utilities.GetKey(config.outputDir.path, config.outputPrefix + LambdaUrlHelper.JsonSuffix); string jasixKey = result.filePath + LambdaUrlHelper.JsonIndexSuffix; var s3Client = config.outputDir.GetS3Client(context.RemainingTime); s3Client.DecryptUpload(config.outputDir.bucketName, jasixKey, jasixPath, aes, jasixMetadata); s3Client.DecryptUpload(config.outputDir.bucketName, result.filePath, jsonPath, aes, jsonMetadata); Logger.WriteLine("Nirvana result files uploaded."); } } LambdaUtilities.DeleteTempOutput(); if (string.IsNullOrEmpty(result.filePath)) { throw new FileNotFoundException(); } return(GetSuccessOutput(result)); } catch (Exception exception) { LambdaUtilities.DeleteTempOutput(); return(HandleException(runLog, result, exception, snsTopicArn)); } }
public static ExitCodes Annotate(Stream headerStream, Stream inputVcfStream, Stream outputJsonStream, Stream outputJsonIndexStream, AnnotationResources annotationResources, IVcfFilter vcfFilter, bool ignoreEmptyChromosome) { var metrics = annotationResources.Metrics; PerformanceMetrics.ShowAnnotationHeader(); IChromosome currentChromosome = new EmptyChromosome("dummy"); int numVariants = 0; IMitoHeteroplasmyProvider mitoHeteroplasmyProvider = MitoHeteroplasmyReader.GetProvider(); using (var vcfReader = GetVcfReader(headerStream, inputVcfStream, annotationResources, vcfFilter, mitoHeteroplasmyProvider)) using (var jsonWriter = new JsonWriter(outputJsonStream, outputJsonIndexStream, annotationResources, Date.CurrentTimeStamp, vcfReader.GetSampleNames(), false)) { try { CheckGenomeAssembly(annotationResources, vcfReader); SetMitochondrialAnnotationBehavior(annotationResources, vcfReader); IPosition position; while ((position = vcfReader.GetNextPosition()) != null) { IChromosome chromosome = position.Chromosome; if (ignoreEmptyChromosome && chromosome.IsEmpty()) { continue; } if (chromosome.Index != currentChromosome.Index) { if (!currentChromosome.IsEmpty()) { metrics.ShowAnnotationEntry(currentChromosome, numVariants); } numVariants = 0; metrics.Preload.Start(); annotationResources.PreLoad(chromosome); metrics.Preload.Stop(); metrics.Annotation.Start(); currentChromosome = chromosome; } var annotatedPosition = position.Variants != null?annotationResources.Annotator.Annotate(position) : null; string json = annotatedPosition?.GetJsonString(); if (json != null) { jsonWriter.WritePosition(annotatedPosition.Position, json); } numVariants++; } jsonWriter.WriteGenes(annotationResources.Annotator.GetGeneAnnotations()); } catch (Exception e) { e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine; throw; } } if (!currentChromosome.IsEmpty()) { metrics.ShowAnnotationEntry(currentChromosome, numVariants); } metrics.ShowSummaryTable(); return(ExitCodes.Success); }
public static SimplePosition GetSimplePosition(string vcfLine, IVcfFilter vcfFilter, IDictionary <string, IChromosome> refNameToChromosome) => vcfLine == null ? null : GetSimplePosition(vcfLine.OptimizedSplit('\t'), vcfFilter, refNameToChromosome);
public static ExitCodes Annotate(Stream headerStream, Stream inputVcfStream, Stream outputJsonStream, Stream outputJsonIndexStream, AnnotationResources annotationResources, IVcfFilter vcfFilter, bool ignoreEmptyChromosome = false) { var logger = outputJsonStream is BlockGZipStream ? new ConsoleLogger() : (ILogger) new NullLogger(); var metrics = new PerformanceMetrics(logger); using (annotationResources) using (var vcfReader = GetVcfReader(headerStream, inputVcfStream, annotationResources, vcfFilter)) using (var jsonWriter = new JsonWriter(outputJsonStream, outputJsonIndexStream, annotationResources, Date.CurrentTimeStamp, vcfReader.GetSampleNames(), false)) { try { CheckGenomeAssembly(annotationResources, vcfReader); SetMitochondrialAnnotationBehavior(annotationResources, vcfReader); int previousChromIndex = -1; IPosition position; while ((position = vcfReader.GetNextPosition()) != null) { if (ignoreEmptyChromosome && position.Chromosome.IsEmpty()) { continue; } if (previousChromIndex != position.Chromosome.Index) { annotationResources.PreLoad(position.Chromosome); } previousChromIndex = UpdatePerformanceMetrics(previousChromIndex, position.Chromosome, metrics); var annotatedPosition = position.Variants != null?annotationResources.Annotator.Annotate(position) : null; string json = annotatedPosition?.GetJsonString(); if (json != null) { jsonWriter.WriteJsonEntry(annotatedPosition.Position, json); } metrics.Increment(); } jsonWriter.WriteAnnotatedGenes(annotationResources.Annotator.GetGeneAnnotations()); } catch (Exception e) { e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine; throw; } } metrics.ShowAnnotationTime(); return(ExitCodes.Success); }
public static SimplePosition GetSimplePosition(IChromosome chromosome, int position, string[] vcfFields, IVcfFilter vcfFilter, bool isRecomposed = false) { if (vcfFilter.PassedTheEnd(chromosome, position)) { return(null); } string refAllele = vcfFields[VcfCommon.RefIndex]; string altAlleleField = vcfFields[VcfCommon.AltIndex]; string[] altAlleles = altAlleleField.OptimizedSplit(','); int numAltAlleles = altAlleles.Length; return(new SimplePosition(chromosome, position, refAllele, altAlleles) { End = altAlleleField.OptimizedStartsWith('<') || altAlleleField == "*" ? -1 : position + refAllele.Length - 1, VcfFields = vcfFields, IsRecomposed = isRecomposed, IsDecomposed = new bool[numAltAlleles], Vids = new string[numAltAlleles], LinkedVids = new List <string> [numAltAlleles] }); }
public static VcfReader Create(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider, IRecomposer recomposer, IVcfFilter vcfFilter, IVariantIdCreator vidCreator) { var vcfReader = new VcfReader(headerReader, vcfLineReader, sequenceProvider, refMinorProvider, vcfFilter, vidCreator); vcfReader.ParseHeader(); vcfReader.SetRecomposer(recomposer); return(vcfReader); }