Пример #1
0
        private void CombinePredictionsCaches()
        {
            Console.WriteLine("Writing combined Sift...");

            var mergedSift = GetMergedPredictions(CacheConstants.SiftPath(_prefix1), CacheConstants.SiftPath(_prefix2));

            using (var writer = new PredictionCacheWriter(CacheConstants.SiftPath(_outPrefix), PredictionCacheHeader.GetHeader(DateTime.Now.Ticks, _genomeAssembly, _numRefSeq)))
            {
                var lookupTableList = new List <Prediction.Entry>();
                foreach (var predictionCach in mergedSift)
                {
                    lookupTableList.AddRange(predictionCach.LookupTable);
                }

                writer.Write(lookupTableList.ToArray(), mergedSift.Select(cache => cache.Predictions).ToArray());
            }
            Console.WriteLine("Done.");

            Console.WriteLine("writing combined polyphen");
            var mergedPolyphen = GetMergedPredictions(CacheConstants.PolyPhenPath(_prefix1), CacheConstants.PolyPhenPath(_prefix2));

            using (var writer = new PredictionCacheWriter(CacheConstants.PolyPhenPath(_outPrefix), PredictionCacheHeader.GetHeader(DateTime.Now.Ticks, _genomeAssembly, _numRefSeq)))
            {
                var lookupTableList = new List <Prediction.Entry>();
                foreach (var predictionCach in mergedPolyphen)
                {
                    lookupTableList.AddRange(predictionCach.LookupTable);
                }

                writer.Write(lookupTableList.ToArray(), mergedPolyphen.Select(cache => cache.Predictions).ToArray());
            }
            Console.WriteLine("Done");
        }
        public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider, ProteinConservationProvider conservationProvider)
        {
            Name                  = "Transcript annotation provider";
            _sequence             = sequenceProvider.Sequence;
            _refNameToChromosome  = sequenceProvider.RefNameToChromosome;
            _conservationProvider = conservationProvider;

            using (var stream = PersistentStreamUtils.GetReadStream(CacheConstants.TranscriptPath(pathPrefix)))
            {
                (_transcriptCache, TranscriptIntervalArrays, VepVersion) = InitiateCache(stream, sequenceProvider.RefIndexToChromosome, sequenceProvider.Assembly);
            }

            Assembly           = _transcriptCache.Assembly;
            DataSourceVersions = _transcriptCache.DataSourceVersions;

            // TODO: this is not great. We should not be using IEnumerables if we have to resort to strange stuff like this
            if (conservationProvider != null)
            {
                DataSourceVersions = DataSourceVersions.Concat(new[] { conservationProvider.Version });
            }

            _siftStream = PersistentStreamUtils.GetReadStream(CacheConstants.SiftPath(pathPrefix));
            _siftReader = new PredictionCacheReader(_siftStream, PredictionCacheReader.SiftDescriptions);

            _polyphenStream = PersistentStreamUtils.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix));
            _polyphenReader = new PredictionCacheReader(_polyphenStream, PredictionCacheReader.PolyphenDescriptions);
        }
Пример #3
0
        public void SiftPath_NominalCase()
        {
            const string expectedResult = "bob.sift.ndb";
            var          observedResult = CacheConstants.SiftPath("bob");

            Assert.Equal(expectedResult, observedResult);
        }
Пример #4
0
        private static ExitCodes ProgramExecution()
        {
            var logger     = new ConsoleLogger();
            var bundle     = DataBundle.GetDataBundle(_inputReferencePath, _inputPrefix);
            int numRefSeqs = bundle.SequenceReader.NumRefSeqs;
            var chromosome = ReferenceNameUtilities.GetChromosome(bundle.SequenceReader.RefNameToChromosome, _referenceName);

            bundle.Load(chromosome);

            string outputStub  = GetOutputStub(chromosome, bundle.Source);
            var    interval    = new ChromosomeInterval(chromosome, _referencePosition, _referenceEndPosition);
            var    transcripts = GetTranscripts(logger, bundle, interval);

            var    sift           = GetPredictionStaging(logger, "SIFT", transcripts, chromosome, bundle.SiftPredictions, bundle.SiftReader, x => x.SiftIndex, numRefSeqs);
            var    polyphen       = GetPredictionStaging(logger, "PolyPhen", transcripts, chromosome, bundle.PolyPhenPredictions, bundle.PolyPhenReader, x => x.PolyPhenIndex, numRefSeqs);
            string referenceBases = GetReferenceBases(logger, bundle.SequenceReader, interval);

            var regulatoryRegionIntervalArrays = GetRegulatoryRegionIntervalArrays(logger, bundle.TranscriptCache, interval, numRefSeqs);
            var transcriptIntervalArrays       = PredictionUtilities.UpdateTranscripts(transcripts, bundle.SiftPredictions,
                                                                                       sift.Predictions, bundle.PolyPhenPredictions, polyphen.Predictions, numRefSeqs);

            var transcriptStaging = GetTranscriptStaging(bundle.TranscriptCacheData.Header, transcriptIntervalArrays, regulatoryRegionIntervalArrays);

            WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(outputStub)), transcriptStaging, "transcript");
            WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.SiftPath(outputStub)), sift.Staging, "SIFT");
            WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(outputStub)), polyphen.Staging, "PolyPhen");
            WriteReference(logger, CacheConstants.BasesPath(outputStub), bundle.SequenceReader, chromosome,
                           referenceBases, interval.Start);

            return(ExitCodes.Success);
        }
        private static ExitCodes ProgramExecution()
        {
            var sequenceData = SequenceHelper.GetDictionaries(_refSequencePath);
            var logger       = new ConsoleLogger();

            var caches = LoadTranscriptCaches(logger, CacheConstants.TranscriptPath(_inputPrefix),
                                              CacheConstants.TranscriptPath(_inputPrefix2), sequenceData.refIndexToChromosome);

            if (caches.Cache.TranscriptIntervalArrays.Length != caches.Cache2.TranscriptIntervalArrays.Length)
            {
                throw new InvalidDataException($"Expected the number of reference sequences in cache 1 ({caches.Cache.TranscriptIntervalArrays.Length}) and cache 2 ({caches.Cache2.TranscriptIntervalArrays.Length}) to be the same.");
            }

            int numRefSeqs                = caches.Cache.TranscriptIntervalArrays.Length;
            var combinedIntervalArrays    = new IntervalArray <ITranscript> [numRefSeqs];
            var siftPredictionsPerRef     = new Prediction[numRefSeqs][];
            var polyphenPredictionsPerRef = new Prediction[numRefSeqs][];

            PredictionHeader siftHeader;
            PredictionHeader polyphenHeader;

            using (var siftReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(_inputPrefix)), PredictionCacheReader.SiftDescriptions))
                using (var siftReader2 = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(_inputPrefix2)), PredictionCacheReader.SiftDescriptions))
                    using (var polyphenReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(_inputPrefix)), PredictionCacheReader.PolyphenDescriptions))
                        using (var polyphenReader2 = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(_inputPrefix2)), PredictionCacheReader.PolyphenDescriptions))
                        {
                            siftHeader     = siftReader.Header;
                            polyphenHeader = polyphenReader.Header;

                            for (ushort refIndex = 0; refIndex < numRefSeqs; refIndex++)
                            {
                                var chromosome = sequenceData.refIndexToChromosome[refIndex];

                                Console.ForegroundColor = ConsoleColor.Yellow;
                                logger.WriteLine($"\n{chromosome.UcscName}:");
                                Console.ResetColor();

                                var sift = CombinePredictions(logger, chromosome, "SIFT", siftReader, siftReader2);
                                siftPredictionsPerRef[refIndex] = sift.Predictions;

                                var polyphen = CombinePredictions(logger, chromosome, "PolyPhen", polyphenReader, polyphenReader2);
                                polyphenPredictionsPerRef[refIndex] = polyphen.Predictions;

                                var transcriptIntervalArray  = caches.Cache.TranscriptIntervalArrays[refIndex];
                                var transcriptIntervalArray2 = caches.Cache2.TranscriptIntervalArrays[refIndex];

                                combinedIntervalArrays[refIndex] = CombineTranscripts(logger, transcriptIntervalArray,
                                                                                      transcriptIntervalArray2, sift.Offset, polyphen.Offset);
                            }
                        }

            logger.WriteLine();
            WritePredictions(logger, "SIFT", CacheConstants.SiftPath(_outputPrefix), siftHeader, siftPredictionsPerRef);
            WritePredictions(logger, "PolyPhen", CacheConstants.PolyPhenPath(_outputPrefix), polyphenHeader, polyphenPredictionsPerRef);
            WriteTranscripts(logger, CloneHeader(caches.Cache.Header), combinedIntervalArrays,
                             caches.Cache.RegulatoryRegionIntervalArrays);

            return(ExitCodes.Success);
        }
        public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider)
        {
            Name      = "Transcript annotation provider";
            _sequence = sequenceProvider.Sequence;

            (_transcriptCache, VepVersion) = InitiateCache(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(pathPrefix)),
                                                           sequenceProvider.RefIndexToChromosome, sequenceProvider.GenomeAssembly);

            GenomeAssembly     = _transcriptCache.GenomeAssembly;
            DataSourceVersions = _transcriptCache.DataSourceVersions;

            _siftReader     = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(pathPrefix)), PredictionCacheReader.SiftDescriptions);
            _polyphenReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix)), PredictionCacheReader.PolyphenDescriptions);
        }
Пример #7
0
        private static DataBundle GetDataBundle(string compressedSequencePath, string cachePrefix)
        {
            Console.Write("- loading global cache and reference sequence... ");
            var sequence = new CompressedSequence();

            var bundle = new DataBundle
            {
                Sequence       = sequence,
                SequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedSequencePath), sequence),
                Cache          = CacheUtilities.LoadCache(cachePrefix),
                SiftReader     = CacheUtilities.GetPredictionReader(CacheConstants.SiftPath(cachePrefix)),
                PolyPhenReader = CacheUtilities.GetPredictionReader(CacheConstants.PolyPhenPath(cachePrefix))
            };

            bundle.TranscriptForest = CacheUtilities.GetIntervalForest(bundle.Cache.Transcripts, bundle.Sequence.Renamer.NumRefSeqs);

            Console.WriteLine("finished.");

            return(bundle.Cache == null ? null : bundle);
        }
Пример #8
0
        public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider)
        {
            Name      = "Transcript annotation provider";
            _sequence = sequenceProvider.Sequence;

            var transcriptStream = PersistentStreamUtils.GetReadStream(CacheConstants.TranscriptPath(pathPrefix));

            (_transcriptCache, TranscriptIntervalArrays, VepVersion) = InitiateCache(transcriptStream, sequenceProvider.RefIndexToChromosome, sequenceProvider.Assembly);

            Assembly           = _transcriptCache.Assembly;
            DataSourceVersions = _transcriptCache.DataSourceVersions;


            var siftStream = PersistentStreamUtils.GetReadStream(CacheConstants.SiftPath(pathPrefix));

            _siftReader = new PredictionCacheReader(siftStream, PredictionCacheReader.SiftDescriptions);

            var polyphenStream = PersistentStreamUtils.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix));

            _polyphenReader = new PredictionCacheReader(polyphenStream, PredictionCacheReader.PolyphenDescriptions);
        }
Пример #9
0
        public IAnnotationSource CreateAnnotationSource(IAnnotatorInfo annotatorInfo, IAnnotatorPaths annotatorPaths)
        {
            var conservationScoreReader = new PhylopReader(annotatorPaths.SupplementaryAnnotation);

            var transcriptStream = FileUtilities.GetReadStream(CacheConstants.TranscriptPath(annotatorPaths.CachePrefix));
            var siftStream       = FileUtilities.GetReadStream(CacheConstants.SiftPath(annotatorPaths.CachePrefix));
            var polyPhenStream   = FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(annotatorPaths.CachePrefix));
            var referenceStream  = FileUtilities.GetReadStream(annotatorPaths.CompressedReference);

            var streams = new AnnotationSourceStreams(transcriptStream, siftStream, polyPhenStream, referenceStream);

            var caProvider = annotatorPaths.CustomAnnotation.Any()          ? new CustomAnnotationProvider(annotatorPaths.CustomAnnotation)               : null;
            var ciProvider = annotatorPaths.CustomIntervals.Any()           ? new CustomIntervalProvider(annotatorPaths.CustomIntervals)                  : null;
            var saProvider = annotatorPaths.SupplementaryAnnotation != null ? new SupplementaryAnnotationProvider(annotatorPaths.SupplementaryAnnotation) : null;

            //adding the saPath because OMIM needs it
            var annotationSource = new NirvanaAnnotationSource(streams, saProvider, conservationScoreReader, caProvider, ciProvider, annotatorPaths.SupplementaryAnnotation);

            if (annotatorInfo.BooleanArguments.Contains(AnnotatorInfoCommon.ReferenceNoCall))
            {
                annotationSource.EnableReferenceNoCalls(annotatorInfo.BooleanArguments.Contains(AnnotatorInfoCommon.TranscriptOnlyRefNoCall));
            }

            if (annotatorInfo.BooleanArguments.Contains(AnnotatorInfoCommon.EnableMitochondrialAnnotation))
            {
                annotationSource.EnableMitochondrialAnnotation();
            }

            if (annotatorInfo.BooleanArguments.Contains(AnnotatorInfoCommon.ReportAllSvOverlappingTranscripts))
            {
                annotationSource.EnableReportAllSvOverlappingTranscripts();
            }

            if (annotatorInfo.BooleanArguments.Contains(AnnotatorInfoCommon.EnableLoftee))
            {
                annotationSource.AddPlugin(new Loftee());
            }

            return(annotationSource);
        }
Пример #10
0
        protected override void ValidateCommandLine()
        {
            if (ConfigurationSettings.VcfPath != "-")
            {
                CheckInputFilenameExists(ConfigurationSettings.VcfPath, "vcf", "--in");
            }

            CheckInputFilenameExists(ConfigurationSettings.CompressedReferencePath, "compressed reference sequence", "--ref");
            CheckInputFilenameExists(CacheConstants.TranscriptPath(ConfigurationSettings.InputCachePrefix), "transcript cache", "--cache");
            CheckInputFilenameExists(CacheConstants.SiftPath(ConfigurationSettings.InputCachePrefix), "SIFT cache", "--cache");
            CheckInputFilenameExists(CacheConstants.PolyPhenPath(ConfigurationSettings.InputCachePrefix), "PolyPhen cache", "--cache");
            CheckDirectoryExists(ConfigurationSettings.SupplementaryAnnotationDirectory, "supplementary annotation", "--sd", false);
            foreach (var customAnnotationDirectory in ConfigurationSettings.CustomAnnotationDirectories)
            {
                CheckDirectoryExists(customAnnotationDirectory, "custom annotation", "--ca", false);
            }

            foreach (var customAnnotationDirectory in ConfigurationSettings.CustomIntervalDirectories)
            {
                CheckDirectoryExists(customAnnotationDirectory, "custom interval", "--ci", false);
            }

            // if we're using stdout, it doesn't make sense to output the VCF and gVCF
            if (ConfigurationSettings.OutputFileName == "-")
            {
                ConfigurationSettings.Vcf        = false;
                ConfigurationSettings.Gvcf       = false;
                PerformanceMetrics.DisableOutput = true;
            }

            HasRequiredParameter(ConfigurationSettings.OutputFileName, "output file stub", "--out");

            if (ConfigurationSettings.LimitReferenceNoCallsToTranscripts)
            {
                ConfigurationSettings.EnableReferenceNoCalls = true;
            }
        }
Пример #11
0
        private DataBundle GetDataBundle(string genomeAssembly, TranscriptDataSource ds)
        {
            var compressedSequencePath = GetCompressedSequencePath(_referenceDir, genomeAssembly);
            var cachePrefix            = GetCachePrefix(_cacheRoot, genomeAssembly, ds, _newVepVersion);

            var sequence = new CompressedSequence();

            var bundle = new DataBundle
            {
                Sequence       = sequence,
                SequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedSequencePath), sequence),
                Cache          = CacheUtilities.LoadCache(cachePrefix),
                SiftReader     = CacheUtilities.GetPredictionReader(CacheConstants.SiftPath(cachePrefix)),
                PolyPhenReader = CacheUtilities.GetPredictionReader(CacheConstants.PolyPhenPath(cachePrefix))
            };

            if (bundle.Cache == null)
            {
                return(null);
            }

            bundle.TranscriptForest = CacheUtilities.GetIntervalForest(bundle.Cache.Transcripts, bundle.Sequence.Renamer.NumRefSeqs);
            return(bundle);
        }
Пример #12
0
        private List <Transcript> CombinedTranscripts(GlobalCache cache1, GlobalCache cache2)
        {
            var sift1Count     = GetPredictionMatrixCount(CacheConstants.SiftPath(_prefix1));
            var polyphen1Count = GetPredictionMatrixCount(CacheConstants.PolyPhenPath(_prefix1));

            var combinedTranscripts = new List <Transcript>();

            combinedTranscripts.AddRange(cache1.Transcripts);
            foreach (var transcript in cache2.Transcripts)
            {
                combinedTranscripts.Add(new Transcript(
                                            transcript.ReferenceIndex, transcript.Start, transcript.End,
                                            transcript.Id, transcript.Version, transcript.Translation, transcript.BioType,
                                            transcript.Gene, transcript.TotalExonLength, transcript.StartExonPhase,
                                            transcript.IsCanonical, transcript.Introns, transcript.MicroRnas, transcript.CdnaMaps,
                                            transcript.SiftIndex == -1 ? -1 : transcript.SiftIndex + sift1Count[transcript.ReferenceIndex],
                                            transcript.PolyPhenIndex == -1 ? -1 : transcript.PolyPhenIndex + polyphen1Count[transcript.ReferenceIndex],
                                            transcript.TranscriptSource
                                            ));
            }
            combinedTranscripts.Sort();
            Console.WriteLine($"combined trascripts count:{combinedTranscripts.Count}");
            return(combinedTranscripts);
        }
Пример #13
0
        /// <summary>
        /// executes the program
        /// </summary>
        protected override void ProgramExecution()
        {
            var transcriptPath = ConfigurationSettings.InputPrefix + ".transcripts.gz";
            var regulatoryPath = ConfigurationSettings.InputPrefix + ".regulatory.gz";
            var genePath       = ConfigurationSettings.InputPrefix + ".genes.gz";
            var intronPath     = ConfigurationSettings.InputPrefix + ".introns.gz";
            var mirnaPath      = ConfigurationSettings.InputPrefix + ".mirnas.gz";
            var siftPath       = ConfigurationSettings.InputPrefix + ".sift.dat";
            var polyphenPath   = ConfigurationSettings.InputPrefix + ".polyphen.dat";
            var peptidePath    = ConfigurationSettings.InputPrefix + ".peptides.gz";

            var renamer = ChromosomeRenamer.GetChromosomeRenamer(FileUtilities.GetReadStream(ConfigurationSettings.InputReferencePath));

            using (var transcriptReader = new VepTranscriptReader(transcriptPath))
                using (var regulatoryReader = new VepRegulatoryReader(regulatoryPath))
                    using (var geneReader = new VepGeneReader(genePath))
                        using (var mergedGeneReader = new VepCombinedGeneReader(ConfigurationSettings.InputMergedGenesPath))
                            using (var intronReader = new VepSimpleIntervalReader(intronPath, "intron", GlobalImportCommon.FileType.Intron))
                                using (var mirnaReader = new VepSimpleIntervalReader(mirnaPath, "miRNA", GlobalImportCommon.FileType.MicroRna))
                                    using (var peptideReader = new VepSequenceReader(peptidePath, "peptide", GlobalImportCommon.FileType.Peptide))
                                    {
                                        var converter = new NirvanaDatabaseCreator(transcriptReader, regulatoryReader, geneReader,
                                                                                   mergedGeneReader, intronReader, mirnaReader, peptideReader, renamer);

                                        converter.LoadData();
                                        converter.MarkCanonicalTranscripts(ConfigurationSettings.InputLrgPath);
                                        converter.CreateTranscriptCacheFile(ConfigurationSettings.OutputCacheFilePrefix);
                                        converter.CopyPredictionCacheFile("SIFT", siftPath, CacheConstants.SiftPath(ConfigurationSettings.OutputCacheFilePrefix));
                                        converter.CopyPredictionCacheFile("PolyPhen", polyphenPath, CacheConstants.PolyPhenPath(ConfigurationSettings.OutputCacheFilePrefix));
                                    }
        }
Пример #14
0
        public static DataBundle GetDataBundle(string referencePath, string cachePrefix)
        {
            var sequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(referencePath));
            var siftReader     = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(cachePrefix)), PredictionCacheReader.SiftDescriptions);
            var polyPhenReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(cachePrefix)), PredictionCacheReader.PolyphenDescriptions);

            VC.TranscriptCacheData cacheData;
            VC.TranscriptCache     cache;
            Source source;

            using (var transcriptReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(cachePrefix))))
            {
                cacheData = transcriptReader.Read(sequenceReader.RefIndexToChromosome);
                cache     = cacheData.GetCache();
                source    = transcriptReader.Header.Source;
            }

            return(new DataBundle(sequenceReader, siftReader, polyPhenReader, cacheData, cache, source));
        }
Пример #15
0
        private static ExitCodes ProgramExecution()
        {
            string transcriptPath = _inputPrefix + ".transcripts.gz";
            string siftPath       = _inputPrefix + ".sift.gz";
            string polyphenPath   = _inputPrefix + ".polyphen.gz";
            string regulatoryPath = _inputPrefix + ".regulatory.gz";

            (var refIndexToChromosome, var refNameToChromosome, int numRefSeqs) = SequenceHelper.GetDictionaries(_inputReferencePath);

            using (var transcriptReader = new MutableTranscriptReader(GZipUtilities.GetAppropriateReadStream(transcriptPath), refIndexToChromosome))
                using (var regulatoryReader = new RegulatoryRegionReader(GZipUtilities.GetAppropriateReadStream(regulatoryPath), refIndexToChromosome))
                    using (var siftReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(siftPath), refIndexToChromosome, IntermediateIoCommon.FileType.Sift))
                        using (var polyphenReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(polyphenPath), refIndexToChromosome, IntermediateIoCommon.FileType.Polyphen))
                            using (var geneReader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(ExternalFiles.UniversalGeneFilePath), refNameToChromosome))
                            {
                                var    genomeAssembly  = transcriptReader.Header.Assembly;
                                var    source          = transcriptReader.Header.Source;
                                long   vepReleaseTicks = transcriptReader.Header.VepReleaseTicks;
                                ushort vepVersion      = transcriptReader.Header.VepVersion;

                                Logger.Write("- loading universal gene archive file... ");
                                var genes      = geneReader.GetGenes();
                                var geneForest = CreateGeneForest(genes, numRefSeqs, genomeAssembly);
                                Logger.WriteLine($"{genes.Length:N0} loaded.");

                                Logger.Write("- loading regulatory region file... ");
                                var regulatoryRegions = regulatoryReader.GetRegulatoryRegions();
                                Logger.WriteLine($"{regulatoryRegions.Length:N0} loaded.");

                                Logger.Write("- loading transcript file... ");
                                var transcripts           = transcriptReader.GetTranscripts();
                                var transcriptsByRefIndex = transcripts.GetMultiValueDict(x => x.Chromosome.Index);
                                Logger.WriteLine($"{transcripts.Length:N0} loaded.");

                                MarkCanonicalTranscripts(transcripts);

                                var predictionBuilder = new PredictionCacheBuilder(genomeAssembly);
                                var predictionCaches  = predictionBuilder.CreatePredictionCaches(transcriptsByRefIndex, siftReader, polyphenReader, numRefSeqs);

                                Logger.Write("- writing SIFT prediction cache... ");
                                predictionCaches.Sift.Write(FileUtilities.GetCreateStream(CacheConstants.SiftPath(_outputCacheFilePrefix)));
                                Logger.WriteLine("finished.");

                                Logger.Write("- writing PolyPhen prediction cache... ");
                                predictionCaches.PolyPhen.Write(FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(_outputCacheFilePrefix)));
                                Logger.WriteLine("finished.");

                                var transcriptBuilder = new TranscriptCacheBuilder(genomeAssembly, source, vepReleaseTicks, vepVersion);
                                var transcriptStaging = transcriptBuilder.CreateTranscriptCache(transcripts, regulatoryRegions, geneForest, numRefSeqs);

                                Logger.Write("- writing transcript cache... ");
                                transcriptStaging.Write(FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(_outputCacheFilePrefix)));
                                Logger.WriteLine("finished.");
                            }

            return(ExitCodes.Success);
        }
Пример #16
0
        private static int Main(string[] args)
        {
            var nirvana = new Nirvana();
            var ops     = new OptionSet
            {
                {
                    "cache|c=",
                    "input cache {prefix}",
                    v => _inputCachePrefix = v
                },
                {
                    "in|i=",
                    "input VCF {path}",
                    v => _vcfPath = v
                },
                {
                    "plugin|p=",
                    "plugin {directory}",
                    v => _pluginDirectory = v
                },
                {
                    "gvcf",
                    "enables genome vcf output",
                    v => _gvcf = v != null
                },
                {
                    "vcf",
                    "enables vcf output",
                    v => _vcf = v != null
                },
                {
                    "out|o=",
                    "output {file path}",
                    v => _outputFileName = v
                },
                {
                    "ref|r=",
                    "input compressed reference sequence {path}",
                    v => _refSequencePath = v
                },
                {
                    "sd=",
                    "input supplementary annotation {directory}",
                    v => SupplementaryAnnotationDirectories.Add(v)
                },
                {
                    "force-mt",
                    "forces to annotate mitochondrial variants",
                    v => _forceMitochondrialAnnotation = v != null
                },
                {
                    "verbose-transcripts",
                    "reports all overlapping transcripts for structural variants",
                    v => _reportAllSvOverlappingTranscripts = v != null
                },
                {
                    "disable-recomposition",
                    "don't recompose function relevant variants",
                    v => _disableRecomposition = v != null
                }
            };

            var exitCode = new ConsoleAppBuilder(args, ops)
                           .UseVersionProvider(new VersionProvider())
                           .Parse()
                           .CheckInputFilenameExists(_vcfPath, "vcf", "--in", true, "-")
                           .CheckInputFilenameExists(_refSequencePath, "reference sequence", "--ref")
                           .CheckInputFilenameExists(CacheConstants.TranscriptPath(_inputCachePrefix), "transcript cache", "--cache")
                           .CheckInputFilenameExists(CacheConstants.SiftPath(_inputCachePrefix), "SIFT cache", "--cache")
                           .CheckInputFilenameExists(CacheConstants.PolyPhenPath(_inputCachePrefix), "PolyPhen cache", "--cache")
                           .CheckEachDirectoryContainsFiles(SupplementaryAnnotationDirectories, "supplementary annotation", "--sd", "*.nsa")
                           .HasRequiredParameter(_outputFileName, "output file stub", "--out")
                           .Enable(_outputFileName == "-", () =>
            {
                _vcf  = false;
                _gvcf = false;
            })
                           .DisableOutput(_outputFileName == "-")
                           .ShowBanner(Constants.Authors)
                           .ShowHelpMenu("Annotates a set of variants", "-i <vcf path> -c <cache prefix> --sd <sa dir> -r <ref path> -o <base output filename>")
                           .ShowErrors()
                           .Execute(nirvana.ProgramExecution);

            return((int)exitCode);
        }
Пример #17
0
        public static int Main(string[] args)
        {
            var ops = new OptionSet
            {
                {
                    "cache|c=",
                    "input cache {prefix}",
                    v => _inputCachePrefix = v
                },
                {
                    "in|i=",
                    "input VCF {path}",
                    v => _vcfPath = v
                },
                {
                    "out|o=",
                    "output {file path}",
                    v => _outputFileName = v
                },
                {
                    "ref|r=",
                    "input compressed reference sequence {path}",
                    v => _refSequencePath = v
                },
                {
                    "sd=",
                    "input supplementary annotation {directory}",
                    v => SupplementaryAnnotationDirectories.Add(v)
                },
                {
                    "force-mt",
                    "forces to annotate mitochondrial variants",
                    v => _forceMitochondrialAnnotation = v != null
                },
                {
                    "disable-recomposition",
                    "don't recompose function relevant variants",
                    v => _disableRecomposition = v != null
                },
                {
                    "legacy-vids",
                    "enables support for legacy VIDs",
                    v => _useLegacyVids = v != null
                },
                {
                    "enable-dq",
                    "report DQ from VCF samples field",
                    v => _enableDq = v != null
                },
                {
                    "str=",
                    "user provided STR annotation TSV file",
                    v => _customStrTsv = v
                }
            };

            var exitCode = new ConsoleAppBuilder(args, ops)
                           .UseVersionProvider(new VersionProvider())
                           .Parse()
                           .CheckInputFilenameExists(_vcfPath, "vcf", "--in", true, "-")
                           .CheckInputFilenameExists(_refSequencePath, "reference sequence", "--ref")
                           .CheckInputFilenameExists(CacheConstants.TranscriptPath(_inputCachePrefix), "transcript cache", "--cache")
                           .CheckInputFilenameExists(CacheConstants.SiftPath(_inputCachePrefix), "SIFT cache", "--cache")
                           .CheckInputFilenameExists(CacheConstants.PolyPhenPath(_inputCachePrefix), "PolyPhen cache", "--cache")
                           .CheckInputFilenameExists(_customStrTsv, "custom STR annotation TSV", "--str", false)
                           .HasRequiredParameter(_outputFileName, "output file stub", "--out")
                           .DisableOutput(_outputFileName == "-")
                           .ShowBanner(Constants.Authors)
                           .ShowHelpMenu("Annotates a set of variants", "-i <vcf path> -c <cache prefix> --sd <sa dir> -r <ref path> -o <base output filename>")
                           .ShowErrors()
                           .Execute(ProgramExecution);

            return((int)exitCode);
        }