Example #1
0
        private static ExitCodes ProgramExecution()
        {
            var logger     = new ConsoleLogger();
            var bundle     = DataBundle.GetDataBundle(_inputReferencePath, _inputPrefix);
            int numRefSeqs = bundle.SequenceReader.NumRefSeqs;
            var chromosome = ReferenceNameUtilities.GetChromosome(bundle.SequenceReader.RefNameToChromosome, _referenceName);

            bundle.Load(chromosome);

            string outputStub  = GetOutputStub(chromosome, bundle.Source);
            var    interval    = new ChromosomeInterval(chromosome, _referencePosition, _referenceEndPosition);
            var    transcripts = GetTranscripts(logger, bundle, interval);

            var    sift           = GetPredictionStaging(logger, "SIFT", transcripts, chromosome, bundle.SiftPredictions, bundle.SiftReader, x => x.SiftIndex, numRefSeqs);
            var    polyphen       = GetPredictionStaging(logger, "PolyPhen", transcripts, chromosome, bundle.PolyPhenPredictions, bundle.PolyPhenReader, x => x.PolyPhenIndex, numRefSeqs);
            string referenceBases = GetReferenceBases(logger, bundle.SequenceReader, interval);

            var regulatoryRegionIntervalArrays = GetRegulatoryRegionIntervalArrays(logger, bundle.TranscriptCache, interval, numRefSeqs);
            var transcriptIntervalArrays       = PredictionUtilities.UpdateTranscripts(transcripts, bundle.SiftPredictions,
                                                                                       sift.Predictions, bundle.PolyPhenPredictions, polyphen.Predictions, numRefSeqs);

            var transcriptStaging = GetTranscriptStaging(bundle.TranscriptCacheData.Header, transcriptIntervalArrays, regulatoryRegionIntervalArrays);

            WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(outputStub)), transcriptStaging, "transcript");
            WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.SiftPath(outputStub)), sift.Staging, "SIFT");
            WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(outputStub)), polyphen.Staging, "PolyPhen");
            WriteReference(logger, CacheConstants.BasesPath(outputStub), bundle.SequenceReader, chromosome,
                           referenceBases, interval.Start);

            return(ExitCodes.Success);
        }
Example #2
0
        /// <summary>
        /// creates the global database
        /// </summary>
        public void CreateTranscriptCacheFile(string outputPrefix)
        {
            if (!_hasData)
            {
                throw new GeneralException("Data was not loaded before running CreateTranscriptCacheFile");
            }

            Console.Write("- creating transcript cache file... ");
            var createBenchmark = new Benchmark();

            var globalOutputPath = CacheConstants.TranscriptPath(outputPrefix);

            var customHeader = new GlobalCustomHeader(_transcriptReader.Header.VepReleaseTicks,
                                                      _transcriptReader.Header.VepVersion);

            var header = new FileHeader(CacheConstants.Identifier, CacheConstants.SchemaVersion,
                                        CacheConstants.DataVersion, _transcriptReader.Header.TranscriptSource, _currentTimeTicks, _transcriptReader.Header.GenomeAssembly, customHeader);

            var genes = ConvertGenes();

            using (var writer = new GlobalCacheWriter(globalOutputPath, header))
            {
                var cache = new VD.GlobalCache(header, _transcripts.ToArray(), _regulatoryElements.ToArray(),
                                               genes, _introns.ToArray(), _microRnas.ToArray(), _peptideSeqs.ToArray());

                writer.Write(cache);
            }

            Console.WriteLine("{0}", Benchmark.ToHumanReadable(createBenchmark.GetElapsedTime()));
        }
        private static ExitCodes ProgramExecution()
        {
            using (var writer = GZipUtilities.GetStreamWriter(_outputFileName))
            {
                string cachePath    = CacheConstants.TranscriptPath(_inputPrefix);
                var    sequenceData = SequenceHelper.GetDictionaries(_referencePath);

                // load the cache
                Console.Write("- reading {0}... ", Path.GetFileName(cachePath));
                var cache = TranscriptCacheHelper.GetCache(cachePath, sequenceData.refIndexToChromosome);
                Console.WriteLine("found {0:N0} reference sequences. ", cache.RegulatoryRegionIntervalArrays.Length);

                Console.Write("- writing GFF entries... ");
                foreach (var intervalArray in cache.RegulatoryRegionIntervalArrays)
                {
                    if (intervalArray == null)
                    {
                        continue;
                    }
                    foreach (var interval in intervalArray.Array)
                    {
                        WriteRegulatoryFeature(writer, interval.Value);
                    }
                }
                Console.WriteLine("finished.");
            }

            return(ExitCodes.Success);
        }
        public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider, ProteinConservationProvider conservationProvider)
        {
            Name                  = "Transcript annotation provider";
            _sequence             = sequenceProvider.Sequence;
            _refNameToChromosome  = sequenceProvider.RefNameToChromosome;
            _conservationProvider = conservationProvider;

            using (var stream = PersistentStreamUtils.GetReadStream(CacheConstants.TranscriptPath(pathPrefix)))
            {
                (_transcriptCache, TranscriptIntervalArrays, VepVersion) = InitiateCache(stream, sequenceProvider.RefIndexToChromosome, sequenceProvider.Assembly);
            }

            Assembly           = _transcriptCache.Assembly;
            DataSourceVersions = _transcriptCache.DataSourceVersions;

            // TODO: this is not great. We should not be using IEnumerables if we have to resort to strange stuff like this
            if (conservationProvider != null)
            {
                DataSourceVersions = DataSourceVersions.Concat(new[] { conservationProvider.Version });
            }

            _siftStream = PersistentStreamUtils.GetReadStream(CacheConstants.SiftPath(pathPrefix));
            _siftReader = new PredictionCacheReader(_siftStream, PredictionCacheReader.SiftDescriptions);

            _polyphenStream = PersistentStreamUtils.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix));
            _polyphenReader = new PredictionCacheReader(_polyphenStream, PredictionCacheReader.PolyphenDescriptions);
        }
Example #5
0
        public void TranscriptPath_NominalCase()
        {
            const string expectedResult = "bob.transcripts.ndb";
            var          observedResult = CacheConstants.TranscriptPath("bob");

            Assert.Equal(expectedResult, observedResult);
        }
        private static ExitCodes ProgramExecution()
        {
            var sequenceData = SequenceHelper.GetDictionaries(_refSequencePath);
            var logger       = new ConsoleLogger();

            var caches = LoadTranscriptCaches(logger, CacheConstants.TranscriptPath(_inputPrefix),
                                              CacheConstants.TranscriptPath(_inputPrefix2), sequenceData.refIndexToChromosome);

            if (caches.Cache.TranscriptIntervalArrays.Length != caches.Cache2.TranscriptIntervalArrays.Length)
            {
                throw new InvalidDataException($"Expected the number of reference sequences in cache 1 ({caches.Cache.TranscriptIntervalArrays.Length}) and cache 2 ({caches.Cache2.TranscriptIntervalArrays.Length}) to be the same.");
            }

            int numRefSeqs                = caches.Cache.TranscriptIntervalArrays.Length;
            var combinedIntervalArrays    = new IntervalArray <ITranscript> [numRefSeqs];
            var siftPredictionsPerRef     = new Prediction[numRefSeqs][];
            var polyphenPredictionsPerRef = new Prediction[numRefSeqs][];

            PredictionHeader siftHeader;
            PredictionHeader polyphenHeader;

            using (var siftReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(_inputPrefix)), PredictionCacheReader.SiftDescriptions))
                using (var siftReader2 = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(_inputPrefix2)), PredictionCacheReader.SiftDescriptions))
                    using (var polyphenReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(_inputPrefix)), PredictionCacheReader.PolyphenDescriptions))
                        using (var polyphenReader2 = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(_inputPrefix2)), PredictionCacheReader.PolyphenDescriptions))
                        {
                            siftHeader     = siftReader.Header;
                            polyphenHeader = polyphenReader.Header;

                            for (ushort refIndex = 0; refIndex < numRefSeqs; refIndex++)
                            {
                                var chromosome = sequenceData.refIndexToChromosome[refIndex];

                                Console.ForegroundColor = ConsoleColor.Yellow;
                                logger.WriteLine($"\n{chromosome.UcscName}:");
                                Console.ResetColor();

                                var sift = CombinePredictions(logger, chromosome, "SIFT", siftReader, siftReader2);
                                siftPredictionsPerRef[refIndex] = sift.Predictions;

                                var polyphen = CombinePredictions(logger, chromosome, "PolyPhen", polyphenReader, polyphenReader2);
                                polyphenPredictionsPerRef[refIndex] = polyphen.Predictions;

                                var transcriptIntervalArray  = caches.Cache.TranscriptIntervalArrays[refIndex];
                                var transcriptIntervalArray2 = caches.Cache2.TranscriptIntervalArrays[refIndex];

                                combinedIntervalArrays[refIndex] = CombineTranscripts(logger, transcriptIntervalArray,
                                                                                      transcriptIntervalArray2, sift.Offset, polyphen.Offset);
                            }
                        }

            logger.WriteLine();
            WritePredictions(logger, "SIFT", CacheConstants.SiftPath(_outputPrefix), siftHeader, siftPredictionsPerRef);
            WritePredictions(logger, "PolyPhen", CacheConstants.PolyPhenPath(_outputPrefix), polyphenHeader, polyphenPredictionsPerRef);
            WriteTranscripts(logger, CloneHeader(caches.Cache.Header), combinedIntervalArrays,
                             caches.Cache.RegulatoryRegionIntervalArrays);

            return(ExitCodes.Success);
        }
Example #7
0
 public PianoAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider)
 {
     Name               = "Transcript annotation provider";
     _sequence          = sequenceProvider.Sequence;
     _transcriptCache   = InitiateCache(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(pathPrefix)), sequenceProvider.GetChromosomeIndexDictionary(), sequenceProvider.GenomeAssembly, sequenceProvider.NumRefSeqs);
     GenomeAssembly     = _transcriptCache.GenomeAssembly;
     DataSourceVersions = _transcriptCache.DataSourceVersions;
 }
Example #8
0
        private static ExitCodes ProgramExecution()
        {
            var cachePath = CacheConstants.TranscriptPath(_inputPrefix);
            var header    = GetHeaderInformation(cachePath);

            Console.WriteLine($"Versions: Schema: {header.Schema}, Data: {header.Data}, VEP: {header.Vep}");
            return(ExitCodes.Success);
        }
Example #9
0
 /// <summary>
 /// constructor
 /// </summary>
 public CacheCombiner(string inputPrefix1, string inputPrefix2, string outputPrefix)
 {
     _prefix1         = inputPrefix1;
     _prefix2         = inputPrefix2;
     _outPrefix       = outputPrefix;
     _cachePath1      = CacheConstants.TranscriptPath(inputPrefix1);
     _cachePath2      = CacheConstants.TranscriptPath(inputPrefix2);
     _outputCachePath = CacheConstants.TranscriptPath(outputPrefix);
 }
        private static void WriteTranscripts(ILogger logger, CacheHeader header,
                                             IntervalArray <ITranscript>[] transcriptIntervalArrays,
                                             IntervalArray <IRegulatoryRegion>[] regulatoryRegionIntervalArrays)
        {
            var staging = TranscriptCacheStaging.GetStaging(header, transcriptIntervalArrays, regulatoryRegionIntervalArrays);

            logger.Write("- writing transcripts... ");
            staging.Write(FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(_outputPrefix)));
            logger.WriteLine("finished.");
        }
Example #11
0
        public static ExitCodes Run(string command, string[] commandArgs)

        {
            var ops = new OptionSet
            {
                {
                    "ref|r=",
                    "compressed reference sequence file",
                    v => _compressedReference = v
                },
                {
                    "cache|c=",
                    "Transcript cache prefix",
                    v => _transcriptCachePrefix = v
                },
                {
                    "gene|g=",
                    "Gene info data file from NCBI",
                    v => _geneInfoFile = v
                },
                {
                    "in|i=",
                    "input VCF file path",
                    v => _inputFile = v
                },
                {
                    "out|o=",
                    "output directory",
                    v => _outputDirectory = v
                }
            };

            string commandLineExample = $"{command} [options]";

            var exitCode = new ConsoleAppBuilder(commandArgs, ops)
                           .Parse()
                           .CheckInputFilenameExists(_compressedReference, "compressed reference sequence file name", "--ref")
                           .HasRequiredParameter(_transcriptCachePrefix, "transcript cache file", "--cache")
                           .CheckInputFilenameExists(CacheConstants.TranscriptPath(_transcriptCachePrefix), "transcript cache prefix", "--cache")
                           .HasRequiredParameter(_inputFile, "SpliceAI VCF file", "--in")
                           .CheckInputFilenameExists(_inputFile, "SpliceAI VCF file", "--in")
                           .HasRequiredParameter(_geneInfoFile, "Gene info data file from NCBI", "--gene")
                           .CheckInputFilenameExists(_geneInfoFile, "Gene info data file from NCBI", "--gene")
                           .HasRequiredParameter(_outputDirectory, "output directory", "--out")
                           .CheckDirectoryExists(_outputDirectory, "output directory", "--out")
                           .SkipBanner()
                           .ShowHelpMenu("Creates a supplementary database containing 1000 Genomes allele frequencies", commandLineExample)
                           .ShowErrors()
                           .Execute(ProgramExecution);

            return(exitCode);
        }
Example #12
0
        static int Main(string[] args)
        {
            var ops = new OptionSet
            {
                {
                    "cache|c=",
                    "input cache {prefix}",
                    v => ConfigurationSettings.InputCachePrefix = v
                },
                {
                    "in|i=",
                    "input VCF {path}",
                    v => ConfigurationSettings.VcfPath = v
                },
                {
                    "out|o=",
                    "output {file path} ",
                    v => ConfigurationSettings.OutputFileName = v
                },
                {
                    "ref|r=",
                    "input compressed reference sequence {path}",
                    v => ConfigurationSettings.RefSequencePath = v
                },
                {
                    "force-mt",
                    "forces to annotate mitochondria variants",
                    v => ConfigurationSettings.ForceMitochondrialAnnotation = v != null
                }
            };

            var commandLineExample = "-i <vcf path> -d <cache dir> -r <ref path> -o <base output filename>";

            var piano    = new Piano();
            var exitCode = new ConsoleAppBuilder(args, ops)
                           .UseVersionProvider(new VersionProvider())
                           .Parse()
                           .CheckInputFilenameExists(ConfigurationSettings.VcfPath, "vcf", "--in", true, "-")
                           .CheckInputFilenameExists(ConfigurationSettings.RefSequencePath, "reference sequence", "--ref")
                           .CheckInputFilenameExists(CacheConstants.TranscriptPath(ConfigurationSettings.InputCachePrefix), "transcript cache", "--cache")
                           .HasRequiredParameter(ConfigurationSettings.OutputFileName, "output file stub", "--out")
                           .ShowBanner(Constants.Authors)
                           .ShowHelpMenu("peptide annotation", commandLineExample)
                           .ShowErrors()
                           .Execute(piano.ProgramExecution);

            return((int)exitCode);
        }
Example #13
0
        private static ExitCodes ProgramExecution()
        {
            string cachePath = CacheConstants.TranscriptPath(_inputPrefix);

            var(refIndexToChromosome, _, _) = SequenceHelper.GetDictionaries(_compressedReferencePath);
            var cache            = TranscriptCacheHelper.GetCache(cachePath, refIndexToChromosome);
            var geneToInternalId = InternalGenes.CreateDictionary(cache.Genes);

            using (var writer = new GffWriter(GZipUtilities.GetStreamWriter(_outputFileName)))
            {
                var creator = new GffCreator(writer, geneToInternalId);
                creator.Create(cache.TranscriptIntervalArrays);
            }

            return(ExitCodes.Success);
        }
Example #14
0
        public void Create()
        {
            using (var reader = new GlobalCacheReader(CacheConstants.TranscriptPath(_inputPrefix)))
                using (var writer = GZipUtilities.GetStreamWriter(_outPath))
                {
                    WriteVcfHeader(writer);

                    var cache = reader.Read();
                    Console.Write("- found {0} transcripts... ", cache.Transcripts.Length);
                    foreach (var transcript in cache.Transcripts)
                    {
                        CreateVcf(writer, transcript);
                    }
                    Console.WriteLine("finished.");
                }
        }
Example #15
0
        public static GlobalCache LoadCache(string cachePrefix)
        {
            var cachePath = CacheConstants.TranscriptPath(cachePrefix);

            if (!File.Exists(cachePath))
            {
                return(null);
            }

            GlobalCache transcriptCache;

            using (var reader = new GlobalCacheReader(FileUtilities.GetReadStream(cachePath)))
            {
                transcriptCache = reader.Read();
            }

            return(transcriptCache);
        }
Example #16
0
        public void Create(string outputPath)
        {
            using (var writer = GZipUtilities.GetStreamWriter(outputPath))
            {
                Console.Write("- reading {0}... ", Path.GetFileName(_cachePrefix));
                var cache = GetCache(CacheConstants.TranscriptPath(_cachePrefix));
                Console.WriteLine("found {0:N0} transcripts.", cache.Transcripts.Length);

                AddGenesToDictionary(cache.Genes);

                Console.Write("- writing GFF entries... ");
                foreach (var transcript in cache.Transcripts)
                {
                    Write(writer, _referenceNames[transcript.ReferenceIndex], transcript);
                }
                Console.WriteLine("finished.");
            }
        }
Example #17
0
        public static ExitCodes Run(string command, string[] commandArgs)
        {
            var ops = new OptionSet
            {
                {
                    "cache|c=",
                    "Cache prefix",
                    v => _cachePrefix = v
                },
                {
                    "ref|r=",
                    "Reference sequence path",
                    v => _referenceSequncePath = v
                },
                {
                    "in|i=",
                    "input tsv file",
                    v => _inputFile = v
                },
                {
                    "out|o=",
                    "output directory",
                    v => _outputDirectory = v
                }
            };

            string commandLineExample = $"{command} [options]";

            var exitCode = new ConsoleAppBuilder(commandArgs, ops)
                           .Parse()
                           .HasRequiredParameter(_outputDirectory, "output directory", "--out")
                           .CheckDirectoryExists(_outputDirectory, "output directory", "--out")
                           .HasRequiredParameter(_cachePrefix, "transcript cache prefix", "--cache")
                           .CheckInputFilenameExists(CacheConstants.TranscriptPath(_cachePrefix), "transcript cache prefix", "--cache")
                           .HasRequiredParameter(_referenceSequncePath, "reference sequence path", "--ref")
                           .CheckInputFilenameExists(_referenceSequncePath, "reference sequence path", "--ref")
                           .CheckInputFilenameExists(_inputFile, "input TSV file", "--in")
                           .SkipBanner()
                           .ShowHelpMenu("Creates a gene annotation database from gnomAD data", commandLineExample)
                           .ShowErrors()
                           .Execute(ProgramExecution);

            return(exitCode);
        }
Example #18
0
        private static ExitCodes ProgramExecution()
        {
            Source transcriptSource = ParseVepCacheDirectoryMain.GetSource(_transcriptSource);
            string cachePath        = CacheConstants.TranscriptPath(_inputPrefix);

            IDictionary <ushort, IChromosome> refIndexToChromosome =
                SequenceHelper.GetDictionaries(_compressedReferencePath).refIndexToChromosome;

            TranscriptCacheData      cache            = TranscriptCacheHelper.GetCache(cachePath, refIndexToChromosome);
            IDictionary <IGene, int> geneToInternalId = InternalGenes.CreateDictionary(cache.Genes);

            using (var writer = new GffWriter(GZipUtilities.GetStreamWriter(_outputFileName)))
            {
                var creator = new GffCreator(writer, geneToInternalId, transcriptSource);
                creator.Create(cache.TranscriptIntervalArrays);
            }

            return(ExitCodes.Success);
        }
Example #19
0
        public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider)
        {
            Name      = "Transcript annotation provider";
            _sequence = sequenceProvider.Sequence;

            var transcriptStream = PersistentStreamUtils.GetReadStream(CacheConstants.TranscriptPath(pathPrefix));

            (_transcriptCache, TranscriptIntervalArrays, VepVersion) = InitiateCache(transcriptStream, sequenceProvider.RefIndexToChromosome, sequenceProvider.Assembly);

            Assembly           = _transcriptCache.Assembly;
            DataSourceVersions = _transcriptCache.DataSourceVersions;


            var siftStream = PersistentStreamUtils.GetReadStream(CacheConstants.SiftPath(pathPrefix));

            _siftReader = new PredictionCacheReader(siftStream, PredictionCacheReader.SiftDescriptions);

            var polyphenStream = PersistentStreamUtils.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix));

            _polyphenReader = new PredictionCacheReader(polyphenStream, PredictionCacheReader.PolyphenDescriptions);
        }
Example #20
0
        protected override void ProgramExecution()
        {
            var referenceNames = GetUcscReferenceNames(ConfigurationSettings.CompressedReferencePath);

            using (var writer = GZipUtilities.GetStreamWriter(ConfigurationSettings.OutputFileName))
            {
                var cachePath = CacheConstants.TranscriptPath(ConfigurationSettings.CachePrefix);

                // load the cache
                Console.Write("- reading {0}... ", Path.GetFileName(cachePath));
                var cache = GetCache(cachePath);
                Console.WriteLine("found {0:N0} regulatory regions. ", cache.RegulatoryElements.Length);

                Console.Write("- writing GFF entries... ");
                foreach (var regulatoryFeature in cache.RegulatoryElements)
                {
                    WriteRegulatoryFeature(writer, referenceNames, regulatoryFeature);
                }
                Console.WriteLine("finished.");
            }
        }
        public IAnnotationSource CreateAnnotationSource(IAnnotatorInfo annotatorInfo, IAnnotatorPaths annotatorPaths)
        {
            var conservationScoreReader = new PhylopReader(annotatorPaths.SupplementaryAnnotation);

            var transcriptStream = FileUtilities.GetReadStream(CacheConstants.TranscriptPath(annotatorPaths.CachePrefix));
            var siftStream       = FileUtilities.GetReadStream(CacheConstants.SiftPath(annotatorPaths.CachePrefix));
            var polyPhenStream   = FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(annotatorPaths.CachePrefix));
            var referenceStream  = FileUtilities.GetReadStream(annotatorPaths.CompressedReference);

            var streams = new AnnotationSourceStreams(transcriptStream, siftStream, polyPhenStream, referenceStream);

            var caProvider = annotatorPaths.CustomAnnotation.Any()          ? new CustomAnnotationProvider(annotatorPaths.CustomAnnotation)               : null;
            var ciProvider = annotatorPaths.CustomIntervals.Any()           ? new CustomIntervalProvider(annotatorPaths.CustomIntervals)                  : null;
            var saProvider = annotatorPaths.SupplementaryAnnotation != null ? new SupplementaryAnnotationProvider(annotatorPaths.SupplementaryAnnotation) : null;

            //adding the saPath because OMIM needs it
            var annotationSource = new NirvanaAnnotationSource(streams, saProvider, conservationScoreReader, caProvider, ciProvider, annotatorPaths.SupplementaryAnnotation);

            if (annotatorInfo.BooleanArguments.Contains(AnnotatorInfoCommon.ReferenceNoCall))
            {
                annotationSource.EnableReferenceNoCalls(annotatorInfo.BooleanArguments.Contains(AnnotatorInfoCommon.TranscriptOnlyRefNoCall));
            }

            if (annotatorInfo.BooleanArguments.Contains(AnnotatorInfoCommon.EnableMitochondrialAnnotation))
            {
                annotationSource.EnableMitochondrialAnnotation();
            }

            if (annotatorInfo.BooleanArguments.Contains(AnnotatorInfoCommon.ReportAllSvOverlappingTranscripts))
            {
                annotationSource.EnableReportAllSvOverlappingTranscripts();
            }

            if (annotatorInfo.BooleanArguments.Contains(AnnotatorInfoCommon.EnableLoftee))
            {
                annotationSource.AddPlugin(new Loftee());
            }

            return(annotationSource);
        }
Example #22
0
        protected override void ValidateCommandLine()
        {
            if (ConfigurationSettings.VcfPath != "-")
            {
                CheckInputFilenameExists(ConfigurationSettings.VcfPath, "vcf", "--in");
            }

            CheckInputFilenameExists(ConfigurationSettings.CompressedReferencePath, "compressed reference sequence", "--ref");
            CheckInputFilenameExists(CacheConstants.TranscriptPath(ConfigurationSettings.InputCachePrefix), "transcript cache", "--cache");
            CheckInputFilenameExists(CacheConstants.SiftPath(ConfigurationSettings.InputCachePrefix), "SIFT cache", "--cache");
            CheckInputFilenameExists(CacheConstants.PolyPhenPath(ConfigurationSettings.InputCachePrefix), "PolyPhen cache", "--cache");
            CheckDirectoryExists(ConfigurationSettings.SupplementaryAnnotationDirectory, "supplementary annotation", "--sd", false);
            foreach (var customAnnotationDirectory in ConfigurationSettings.CustomAnnotationDirectories)
            {
                CheckDirectoryExists(customAnnotationDirectory, "custom annotation", "--ca", false);
            }

            foreach (var customAnnotationDirectory in ConfigurationSettings.CustomIntervalDirectories)
            {
                CheckDirectoryExists(customAnnotationDirectory, "custom interval", "--ci", false);
            }

            // if we're using stdout, it doesn't make sense to output the VCF and gVCF
            if (ConfigurationSettings.OutputFileName == "-")
            {
                ConfigurationSettings.Vcf        = false;
                ConfigurationSettings.Gvcf       = false;
                PerformanceMetrics.DisableOutput = true;
            }

            HasRequiredParameter(ConfigurationSettings.OutputFileName, "output file stub", "--out");

            if (ConfigurationSettings.LimitReferenceNoCallsToTranscripts)
            {
                ConfigurationSettings.EnableReferenceNoCalls = true;
            }
        }
Example #23
0
        private static ExitCodes ProgramExecution()
        {
            Dictionary <string, string> geneIdToSymbols;

            using (var cacheStream = FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_cachePrefix)))
                using (var transcriptCacheReader = new TranscriptCacheReader(cacheStream))
                    using (var refProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_referenceSequncePath)))
                    {
                        geneIdToSymbols = LoadGenesFromCache(refProvider, transcriptCacheReader);
                        Console.WriteLine($"Loaded {geneIdToSymbols.Count} gene symbols from cache.");
                    }

            var version     = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            var outFileName = $"{version.Name}_{version.Version}";

            using (var gnomadGeneParser = new GnomadGeneParser(GZipUtilities.GetAppropriateStreamReader(_inputFile), geneIdToSymbols))
                using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.NgaFileSuffix)))
                    using (var ngaWriter = new NgaWriter(stream, version, SaCommon.GnomadGeneScoreTag, SaCommon.SchemaVersion, false))
                    {
                        ngaWriter.Write(gnomadGeneParser.GetItems());
                    }

            return(ExitCodes.Success);
        }
Example #24
0
        private static ExitCodes ProgramExecution()
        {
            string transcriptPath = _inputPrefix + ".transcripts.gz";
            string siftPath       = _inputPrefix + ".sift.gz";
            string polyphenPath   = _inputPrefix + ".polyphen.gz";
            string regulatoryPath = _inputPrefix + ".regulatory.gz";

            (var refIndexToChromosome, var refNameToChromosome, int numRefSeqs) = SequenceHelper.GetDictionaries(_inputReferencePath);

            using (var transcriptReader = new MutableTranscriptReader(GZipUtilities.GetAppropriateReadStream(transcriptPath), refIndexToChromosome))
                using (var regulatoryReader = new RegulatoryRegionReader(GZipUtilities.GetAppropriateReadStream(regulatoryPath), refIndexToChromosome))
                    using (var siftReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(siftPath), refIndexToChromosome, IntermediateIoCommon.FileType.Sift))
                        using (var polyphenReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(polyphenPath), refIndexToChromosome, IntermediateIoCommon.FileType.Polyphen))
                            using (var geneReader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(ExternalFiles.UniversalGeneFilePath), refNameToChromosome))
                            {
                                var    genomeAssembly  = transcriptReader.Header.Assembly;
                                var    source          = transcriptReader.Header.Source;
                                long   vepReleaseTicks = transcriptReader.Header.VepReleaseTicks;
                                ushort vepVersion      = transcriptReader.Header.VepVersion;

                                Logger.Write("- loading universal gene archive file... ");
                                var genes      = geneReader.GetGenes();
                                var geneForest = CreateGeneForest(genes, numRefSeqs, genomeAssembly);
                                Logger.WriteLine($"{genes.Length:N0} loaded.");

                                Logger.Write("- loading regulatory region file... ");
                                var regulatoryRegions = regulatoryReader.GetRegulatoryRegions();
                                Logger.WriteLine($"{regulatoryRegions.Length:N0} loaded.");

                                Logger.Write("- loading transcript file... ");
                                var transcripts           = transcriptReader.GetTranscripts();
                                var transcriptsByRefIndex = transcripts.GetMultiValueDict(x => x.Chromosome.Index);
                                Logger.WriteLine($"{transcripts.Length:N0} loaded.");

                                MarkCanonicalTranscripts(transcripts);

                                var predictionBuilder = new PredictionCacheBuilder(genomeAssembly);
                                var predictionCaches  = predictionBuilder.CreatePredictionCaches(transcriptsByRefIndex, siftReader, polyphenReader, numRefSeqs);

                                Logger.Write("- writing SIFT prediction cache... ");
                                predictionCaches.Sift.Write(FileUtilities.GetCreateStream(CacheConstants.SiftPath(_outputCacheFilePrefix)));
                                Logger.WriteLine("finished.");

                                Logger.Write("- writing PolyPhen prediction cache... ");
                                predictionCaches.PolyPhen.Write(FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(_outputCacheFilePrefix)));
                                Logger.WriteLine("finished.");

                                var transcriptBuilder = new TranscriptCacheBuilder(genomeAssembly, source, vepReleaseTicks, vepVersion);
                                var transcriptStaging = transcriptBuilder.CreateTranscriptCache(transcripts, regulatoryRegions, geneForest, numRefSeqs);

                                Logger.Write("- writing transcript cache... ");
                                transcriptStaging.Write(FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(_outputCacheFilePrefix)));
                                Logger.WriteLine("finished.");
                            }

            return(ExitCodes.Success);
        }
 public static TranscriptCacheData GetTranscriptData(IDictionary <ushort, IChromosome> refIndexToChromosome, string transcriptCachePrefix)
 {
     using var transcriptCacheReader = new TranscriptCacheReader(
               FileUtilities.GetReadStream(CacheConstants.TranscriptPath(transcriptCachePrefix)));
     return(transcriptCacheReader.Read(refIndexToChromosome));
 }
Example #26
0
        public void TranscriptPath_Null_WithNullPrefix()
        {
            var observedResult = CacheConstants.TranscriptPath(null);

            Assert.Null(observedResult);
        }
Example #27
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            TranscriptCacheData transcriptData;

            using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix))))
            {
                transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome);
            }

            var spliceIntervals      = SpliceUtilities.GetSpliceIntervals(referenceProvider, transcriptData);
            var nirEnstToGeneSymbols = SpliceUtilities.GetEnstToGeneSymbols(referenceProvider, transcriptData);

            Dictionary <string, string> spliceAiEnstToGeneSymbols;

            using (var reader = new StreamReader(GZipUtilities.GetAppropriateReadStream(_geneInfoFile)))
            {
                spliceAiEnstToGeneSymbols = SpliceUtilities.GetSpliceAiGeneSymbols(reader);
            }

            var spliceAiToNirvanaGeneSymbols =
                SpliceUtilities.GetSymbolMapping(spliceAiEnstToGeneSymbols, nirEnstToGeneSymbols);

            Console.WriteLine($"Mapped {spliceAiToNirvanaGeneSymbols.Count} spliceAI gene symbols to Nirvana gene symbols (out of {spliceAiEnstToGeneSymbols.Count})");

            var    version     = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName = $"{version.Name}_{version.Version}";

            using (var spliceAiParser = new SpliceAiParser(
                       GZipUtilities.GetAppropriateReadStream(_inputFile),
                       referenceProvider, spliceIntervals, spliceAiToNirvanaGeneSymbols))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                        using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.SpliceAiTag, true, true, SaCommon.SchemaVersion, false))
                        {
                            nsaWriter.Write(spliceAiParser.GetItems());
                        }

            Console.WriteLine($"Total number of entries from Splice AI: {SpliceAiParser.Count}");
            return(ExitCodes.Success);
        }
Example #28
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            TranscriptCacheData transcriptData;

            using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix))))
            {
                transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome);
            }

            var spliceIntervals   = SpliceUtilities.GetSpliceIntervals(referenceProvider, transcriptData);
            var nirvanaGeneForest = SpliceUtilities.GetGeneForest(transcriptData);

            Console.WriteLine("Loaded transcripts and generated splice intervals.");

            Dictionary <string, List <string> > geneSymbolSynonyms;

            using (var geneInfoParser = new GeneInfoParser(GZipUtilities.GetAppropriateStreamReader(_geneInfoFile)))
            {
                geneSymbolSynonyms = geneInfoParser.GetGeneSymbolSynonyms();
            }

            Console.WriteLine("Loaded gene symbol synonyms");
            var    version     = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName = $"{version.Name}_{version.Version}";

            using (var spliceAiParser = new SpliceAiParser(
                       GZipUtilities.GetAppropriateReadStream(_inputFile),
                       referenceProvider, spliceIntervals, nirvanaGeneForest, geneSymbolSynonyms))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                    {
                        var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.SpliceAiTag, true, true, SaCommon.SchemaVersion, false);
                        nsaWriter.Write(spliceAiParser.GetItems());
                    }

            Console.WriteLine($"Total number of entries from Splice AI: {SpliceAiParser.Count}");
            return(ExitCodes.Success);
        }
Example #29
0
        public static int Main(string[] args)
        {
            var ops = new OptionSet
            {
                {
                    "cache|c=",
                    "input cache {prefix}",
                    v => _inputCachePrefix = v
                },
                {
                    "in|i=",
                    "input VCF {path}",
                    v => _vcfPath = v
                },
                {
                    "out|o=",
                    "output {file path}",
                    v => _outputFileName = v
                },
                {
                    "ref|r=",
                    "input compressed reference sequence {path}",
                    v => _refSequencePath = v
                },
                {
                    "sd=",
                    "input supplementary annotation {directory}",
                    v => SupplementaryAnnotationDirectories.Add(v)
                },
                {
                    "force-mt",
                    "forces to annotate mitochondrial variants",
                    v => _forceMitochondrialAnnotation = v != null
                },
                {
                    "disable-recomposition",
                    "don't recompose function relevant variants",
                    v => _disableRecomposition = v != null
                },
                {
                    "legacy-vids",
                    "enables support for legacy VIDs",
                    v => _useLegacyVids = v != null
                },
                {
                    "enable-dq",
                    "report DQ from VCF samples field",
                    v => _enableDq = v != null
                },
                {
                    "str=",
                    "user provided STR annotation TSV file",
                    v => _customStrTsv = v
                }
            };

            var exitCode = new ConsoleAppBuilder(args, ops)
                           .UseVersionProvider(new VersionProvider())
                           .Parse()
                           .CheckInputFilenameExists(_vcfPath, "vcf", "--in", true, "-")
                           .CheckInputFilenameExists(_refSequencePath, "reference sequence", "--ref")
                           .CheckInputFilenameExists(CacheConstants.TranscriptPath(_inputCachePrefix), "transcript cache", "--cache")
                           .CheckInputFilenameExists(CacheConstants.SiftPath(_inputCachePrefix), "SIFT cache", "--cache")
                           .CheckInputFilenameExists(CacheConstants.PolyPhenPath(_inputCachePrefix), "PolyPhen cache", "--cache")
                           .CheckInputFilenameExists(_customStrTsv, "custom STR annotation TSV", "--str", false)
                           .HasRequiredParameter(_outputFileName, "output file stub", "--out")
                           .DisableOutput(_outputFileName == "-")
                           .ShowBanner(Constants.Authors)
                           .ShowHelpMenu("Annotates a set of variants", "-i <vcf path> -c <cache prefix> --sd <sa dir> -r <ref path> -o <base output filename>")
                           .ShowErrors()
                           .Execute(ProgramExecution);

            return((int)exitCode);
        }
Example #30
0
        public static IRecomposer Create(ISequenceProvider sequenceProvider,
                                         string inputCachePrefix)
        {
            var transcriptIntervalArrays = ReadWriteUtilities.ReadCache(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(inputCachePrefix)), sequenceProvider.RefIndexToChromosome);

            var(geneIntervalForest, _) = ReadWriteUtilities.GetIntervalAndTranscriptsForeachGene(transcriptIntervalArrays);
            var codonInfoProvider = CodonInfoProvider.CreateCodonInfoProvider(transcriptIntervalArrays);
            var variantGenerator  = new VariantGenerator(sequenceProvider);
            var positionBuffer    = new PositionBuffer(codonInfoProvider, geneIntervalForest);

            return(new Recomposer(new PositionProcessor(positionBuffer, codonInfoProvider, variantGenerator), sequenceProvider));
        }