Пример #1
0
        public InterimTsvsMerger(IEnumerable <string> annotationFiles, IEnumerable <string> intervalFiles, string miscFile, IEnumerable <string> geneFiles, string compressedReference, string outputDirectory)
        {
            _outputDirectory = outputDirectory;

            var refSequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(compressedReference));

            _genomeAssembly      = refSequenceProvider.GenomeAssembly;
            _refNameToChromosome = refSequenceProvider.RefNameToChromosome;

            _tsvReaders      = ReaderUtilities.GetSaTsvReaders(annotationFiles);
            _miscReader      = ReaderUtilities.GetMiscTsvReader(miscFile);
            _geneReaders     = ReaderUtilities.GetGeneReaders(geneFiles);
            _intervalReaders = ReaderUtilities.GetIntervalReaders(intervalFiles);

            _saHeaders = new List <SaHeader>();
            _saHeaders.AddRange(ReaderUtilities.GetTsvHeaders(_tsvReaders));
            _saHeaders.AddRange(ReaderUtilities.GetTsvHeaders(_intervalReaders));
            _geneHeaders = ReaderUtilities.GetTsvHeaders(_geneReaders)?.ToList();

            _refNames = new HashSet <string>();
            _refNames.UnionWith(ReaderUtilities.GetRefNames(_tsvReaders));
            _refNames.UnionWith(ReaderUtilities.GetRefNames(_intervalReaders));
            if (_miscReader != null)
            {
                _refNames.UnionWith(_miscReader.RefNames);
            }

            DisplayDataSources(_saHeaders, _geneHeaders);

            MergeUtilities.CheckAssemblyConsistancy(_saHeaders);
        }
Пример #2
0
        private ExitCodes ProgramExecution()
        {
            if (!_supportedSequencingDataType.Contains(_sequencingDataType))
            {
                throw new ArgumentException($"Only the following sequencing data types are supported: {string.Join(_supportedSequencingDataType.ToString(), ", ")}");
            }

            var inputStreamReaders = Directory.GetFiles(_inputDirectory, "*.vcf.bgz").Select(fileName => GZipUtilities.GetAppropriateStreamReader(Path.Combine(_inputDirectory, fileName))).ToArray();

            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));

            if (inputStreamReaders.Length == 0)
            {
                throw new UserErrorException("input directory does not conatin any .vcf.bgz files");
            }

            var versionFiles = Directory.GetFiles(_inputDirectory, "*.version");

            if (versionFiles.Length != 1)
            {
                throw new InvalidDataException("more than one .version file found in input directory");
            }

            Console.WriteLine($"Creating gnomAD TSV file from {inputStreamReaders.Length} input files");

            var version          = DataSourceVersionReader.GetSourceVersion(versionFiles[0]);
            var gnomadTsvCreator = new GnomadTsvCreator(inputStreamReaders, referenceProvider, version, _outputDirectory, _sequencingDataType);

            gnomadTsvCreator.CreateTsvs();
            return(ExitCodes.Success);
        }
Пример #3
0
        private void CreateMitoMapVarTsv(IReadOnlyList <string> mitoMapFileNames)
        {
            if (mitoMapFileNames.Count == 0 || mitoMapFileNames.Any(string.IsNullOrEmpty))
            {
                return;
            }
            var benchMark        = new Benchmark();
            var rootDirectory    = new FileInfo(mitoMapFileNames[0]).Directory;
            var version          = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), "mitoMapVar"));
            var sequenceProvider =
                new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath));

            sequenceProvider.LoadChromosome(new Chromosome("chrM", "MT", 24));
            var mitoMapVarReaders = new List <MitoMapVariantReader>();

            foreach (var mitoMapFileName in mitoMapFileNames)
            {
                mitoMapVarReaders.Add(new MitoMapVariantReader(new FileInfo(mitoMapFileName), sequenceProvider));
            }
            var          mergedMitoMapVarItems = MitoMapVariantReader.MergeAndSort(mitoMapVarReaders);
            const string outputFilePrefix      = InterimSaCommon.MitoMapTag;

            using (var writer = new MitoMapVarTsvWriter(version, _outputDirectory, outputFilePrefix, sequenceProvider))
                TsvWriterUtilities.WriteSortedItems(mergedMitoMapVarItems, writer);
            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo(InterimSaCommon.MitoMapTag, version.Version, timeSpan);
        }
Пример #4
0
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName       = $"{version.Name}_{version.Version}";

            TranscriptCacheData transcriptData;

            using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix))))
            {
                transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome);
            }

            var(entrezToHgnc, ensemblToHgnc) = PrimateAiUtilities.GetIdToSymbols(transcriptData);

            using (var primateAiParser = new PrimateAiParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider, entrezToHgnc, ensemblToHgnc))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                        using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.PrimateAiTag, true, true, SaCommon.SchemaVersion, false))
                        {
                            nsaWriter.Write(primateAiParser.GetItems());
                        }

            return(ExitCodes.Success);
        }
Пример #5
0
        private static ExitCodes ProgramExecution()
        {
            var rootDirectory = new FileInfo(MitoMapFileNames[0]).Directory;

            if (rootDirectory == null)
            {
                return(ExitCodes.PathNotFound);
            }
            var version          = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), "mitoMapVar"));
            var sequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var chrom            = sequenceProvider.RefNameToChromosome["chrM"];

            sequenceProvider.LoadChromosome(chrom);
            MitoMapInputDb mitoMapInputDb        = MitoMapDatabaseUtilities.Create(_mitoMapDatabase);
            var            mitoMapVarReaders     = MitoMapFileNames.Select(mitoMapFileName => new MitoMapVariantReader(new FileInfo(mitoMapFileName), mitoMapInputDb, sequenceProvider)).ToList();
            var            mergedMitoMapVarItems = MitoMapVariantReader.GetMergeAndSortedItems(mitoMapVarReaders);

            string outFileName = $"{version.Name}_{version.Version}";

            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                    using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, sequenceProvider, SaCommon.MitoMapTag, false, true, SaCommon.SchemaVersion, false))
                    {
                        nsaWriter.Write(mergedMitoMapVarItems);
                    }

            return(ExitCodes.Success);
        }
Пример #6
0
        private static Dictionary <string, string> LoadGenesFromCache(ReferenceSequenceProvider refProvider, TranscriptCacheReader cacheReader)
        {
            var transcriptData = cacheReader.Read(refProvider.RefIndexToChromosome);

            var geneIdToSymbols = new Dictionary <string, string>(transcriptData.Genes.Length);

            foreach (var gene in transcriptData.Genes)
            {
                var geneId = gene.EnsemblId.WithoutVersion;
                //if(geneId == "ENSG00000272962" || geneId == "ENSG00000198743")
                //    Console.WriteLine("bug");
                if (string.IsNullOrEmpty(geneId))
                {
                    continue;
                }

                if (!geneIdToSymbols.TryAdd(geneId, gene.Symbol))
                {
                    if (geneIdToSymbols[geneId] != gene.Symbol)
                    {
                        throw new DataMisalignedException($"Multiple symbols found for {geneId}");
                    }
                }
            }

            return(geneIdToSymbols);
        }
Пример #7
0
        private static IEnumerable <AnnotationRange> GetAnnotationRanges(NirvanaConfig config, GenomeAssembly genomeAssembly)
        {
            string cachePathPrefix = LambdaUtilities.GetCachePathPrefix(genomeAssembly);

            IntervalForest <IGene>            geneIntervalForest;
            IDictionary <string, IChromosome> refNameToChromosome;
            List <long> blockOffsets;

            using (var tabixStream = PersistentStreamUtils.GetReadStream(config.tabixUrl))
                using (var tabixReader = new BinaryReader(new BlockGZipStream(tabixStream, CompressionMode.Decompress)))
                    using (var referenceStream = PersistentStreamUtils.GetReadStream(LambdaUrlHelper.GetRefUrl(genomeAssembly)))
                        using (var sequenceProvider = new ReferenceSequenceProvider(referenceStream))
                            using (var taProvider = new TranscriptAnnotationProvider(cachePathPrefix, sequenceProvider, null))
                            {
                                long vcfSize       = HttpUtilities.GetLength(config.vcfUrl);
                                int  numPartitions = Math.Max(Math.Min((int)((vcfSize - 1) / MinPartitionSize + 1), MaxNumPartitions), MinNumPartitions);

                                var tabixIndex = Reader.Read(tabixReader, sequenceProvider.RefNameToChromosome);
                                blockOffsets = PartitionUtilities.GetFileOffsets(config.vcfUrl, numPartitions, tabixIndex);

                                IntervalArray <ITranscript>[] transcriptIntervalArrays = taProvider.TranscriptIntervalArrays;
                                geneIntervalForest  = GeneForestGenerator.GetGeneForest(transcriptIntervalArrays);
                                refNameToChromosome = sequenceProvider.RefNameToChromosome;
                            }

            IEnumerable <AnnotationRange> annotationRanges = PartitionUtilities.GenerateAnnotationRanges(blockOffsets, config.vcfUrl, geneIntervalForest, refNameToChromosome);

            return(annotationRanges);
        }
Пример #8
0
        private void CreateMitoMapSvTsv(IReadOnlyList <string> mitoMapSvFileNames)
        {
            if (mitoMapSvFileNames.Count == 0 || mitoMapSvFileNames.Any(string.IsNullOrEmpty))
            {
                return;
            }
            var benchMark        = new Benchmark();
            var rootDirectory    = new FileInfo(mitoMapSvFileNames[0]).Directory;
            var version          = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), "mitoMapSV"));
            var sequenceProvider =
                new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath));

            sequenceProvider.LoadChromosome(new Chromosome("chrM", "MT", 24));
            var mitoMapSvReaders = new List <MitoMapSvReader>();

            foreach (var mitoMapFileName in mitoMapSvFileNames)
            {
                mitoMapSvReaders.Add(new MitoMapSvReader(new FileInfo(mitoMapFileName), sequenceProvider));
            }

            var mergedMitoMapItems = MitoMapSvReader.MergeAndSort(mitoMapSvReaders);

            using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                      GenomeAssembly.rCRS.ToString(), SaTsvCommon.MitoMapSchemaVersion, InterimSaCommon.MitoMapTag,
                                                      ReportFor.StructuralVariants))
                CreateSvTsv(mergedMitoMapItems, writer);
            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo(InterimSaCommon.MitoMapTag, version.Version, timeSpan);
        }
Пример #9
0
        private static ExitCodes ProgramExecution()
        {
            var rootDirectory = new FileInfo(MitoMapFileNames[0]).Directory;

            if (rootDirectory == null)
            {
                return(ExitCodes.PathNotFound);
            }
            var version          = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), "mitoMapSv"));
            var sequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var chrom            = sequenceProvider.RefNameToChromosome["chrM"];

            sequenceProvider.LoadChromosome(chrom);
            var mitoMapSvReaders      = MitoMapFileNames.Select(mitoMapFileName => new MitoMapSvReader(new FileInfo(mitoMapFileName), sequenceProvider)).ToList();
            var mergedMitoMapVarItems = MitoMapSvReader.MergeAndSort(mitoMapSvReaders);

            string outFileName = $"{version.Name}_{version.Version}";

            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SiFileSuffix)))
            {
                var nsiWriter = new NsiWriter(new ExtendedBinaryWriter(nsaStream), version, GenomeAssembly.rCRS, SaCommon.MitoMapTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion);
                nsiWriter.Write(mergedMitoMapVarItems);
            }

            return(ExitCodes.Success);
        }
Пример #10
0
 public MitoMapSvReader(FileInfo mitoMapFileInfo, ReferenceSequenceProvider sequenceProvider)
 {
     _mitoMapFileInfo  = mitoMapFileInfo;
     _dataType         = GetDataType();
     _sequenceProvider = sequenceProvider;
     _mitoGenomeModel  = new CircularGenomeModel(sequenceProvider.Sequence);
     _variantAligner   = new VariantAligner(sequenceProvider.Sequence);
 }
Пример #11
0
 public TopMedTsvCreator(StreamReader streamReader, ReferenceSequenceProvider refProvider,
                         DataSourceVersion version, string outputDirName)
 {
     _version       = version;
     _refProvider   = refProvider;
     _streamReader  = streamReader;
     _outputDirName = outputDirName;
 }
Пример #12
0
 public MitoMapSvReader(FileInfo mitoMapFileInfo, ReferenceSequenceProvider sequenceProvider)
 {
     _mitoMapFileInfo  = mitoMapFileInfo;
     _dataType         = GetDataType();
     _sequenceProvider = sequenceProvider;
     _chromosome       = sequenceProvider.RefNameToChromosome["chrM"];
     _variantAligner   = new VariantAligner(sequenceProvider.Sequence);
 }
Пример #13
0
 public GnomadTsvCreator(StreamReader[] streamReaders, ReferenceSequenceProvider refProvider,
                         DataSourceVersion version, string outputDirectory, string sequencingDataType)
 {
     _version            = version;
     _refProvider        = refProvider;
     _outputDirectory    = outputDirectory;
     _streamReaders      = streamReaders;
     _sequencingDataType = sequencingDataType;
 }
Пример #14
0
        private ExitCodes ProgramExecution()
        {
            var reader            = GZipUtilities.GetAppropriateStreamReader(_inputFileArg);
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferenceArg));

            var version          = DataSourceVersionReader.GetSourceVersion(_inputFileArg + ".version");
            var topMedTsvCreator = new TopMedTsvCreator(reader, referenceProvider, version, _outputDirArg);

            topMedTsvCreator.CreateTsvs();
            return(ExitCodes.Success);
        }
Пример #15
0
        public MiniSaExtractor(string compressedRefFile, string saPath, int begin, int end, string datasourceName = null,
                               string outputDir = null)
        {
            _begin  = begin;
            _end    = end;
            _saPath = saPath;

            var refChromDict = new ReferenceSequenceProvider(FileUtilities.GetReadStream(compressedRefFile)).RefNameToChromosome;

            //string referenceName = GetReferenceName(saPath, refChromDict);
            //_miniSaPath = GetMiniSaPath(referenceName, begin, end, datasourceName, outputDir);

            Console.WriteLine($"MiniSA output to: {_miniSaPath}");
        }
Пример #16
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));

            List <CustomInterval> intervals;
            SaJsonSchema          intervalJsonSchema;
            string            jsonTag;
            DataSourceVersion version;
            string            outputPrefix      = GetOutputPrefix(_inputFile);
            string            nsaFileName       = Path.Combine(_outputDirectory, outputPrefix + SaCommon.SaFileSuffix);
            string            nsaIndexFileName  = nsaFileName + SaCommon.IndexSufix;
            string            nsaSchemaFileName = nsaFileName + SaCommon.JsonSchemaSuffix;
            ReportFor         reportFor;

            var nsaItemCount = 0;

            using (var parser = VariantAnnotationsParser.Create(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider))
                using (var nsaStream = FileUtilities.GetCreateStream(nsaFileName))
                    using (var indexStream = FileUtilities.GetCreateStream(nsaIndexFileName))
                        using (var nsaWriter = CaUtilities.GetNsaWriter(nsaStream, indexStream, parser, CaUtilities.GetInputFileName(_inputFile), referenceProvider, out version))
                            using (var saJsonSchemaStream = FileUtilities.GetCreateStream(nsaSchemaFileName))
                                using (var schemaWriter = new StreamWriter(saJsonSchemaStream))
                                {
                                    (jsonTag, nsaItemCount, intervalJsonSchema, intervals) = CaUtilities.WriteSmallVariants(parser, nsaWriter, schemaWriter);
                                    reportFor = parser.ReportFor;
                                    if (intervals == null)
                                    {
                                        return(ExitCodes.Success);
                                    }
                                }

            if (nsaItemCount == 0)
            {
                File.Delete(nsaFileName);
                File.Delete(nsaIndexFileName);
                File.Delete(nsaSchemaFileName);
            }

            using (var nsiStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outputPrefix + SaCommon.IntervalFileSuffix)))
                using (var nsiWriter = CaUtilities.GetNsiWriter(nsiStream, version, referenceProvider.Assembly, jsonTag, reportFor))
                    using (var siJsonSchemaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outputPrefix + SaCommon.IntervalFileSuffix + SaCommon.JsonSchemaSuffix)))
                        using (var schemaWriter = new StreamWriter(siJsonSchemaStream))
                        {
                            nsiWriter.Write(intervals);
                            schemaWriter.Write(intervalJsonSchema);
                        }

            return(ExitCodes.Success);
        }
Пример #17
0
        private static ExitCodes ProgramExecution()
        {
            var    dosageMapRegionVersion = DataSourceVersionReader.GetSourceVersion(_dosageMapRegionFile + ".version");
            string outFileName            = $"{dosageMapRegionVersion.Name.Replace(' ', '_')}_{dosageMapRegionVersion.Version}";
            var    referenceProvider      = new ReferenceSequenceProvider(GZipUtilities.GetAppropriateReadStream(_inputReferencePath));

            using (var dosageSensitivityParser = new DosageMapRegionParser(GZipUtilities.GetAppropriateReadStream(_dosageMapRegionFile), referenceProvider.RefNameToChromosome))
                using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SiFileSuffix)))
                    using (var nsiWriter = new NsiWriter(stream, dosageMapRegionVersion, referenceProvider.Assembly, SaCommon.DosageSensitivityTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))
                    {
                        nsiWriter.Write(dosageSensitivityParser.GetItems());
                    }

            return(ExitCodes.Success);
        }
Пример #18
0
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFileName + ".version");
            string outFileName       = $"{version.Name}_{version.Version}";

            using (var clinGenReader = new ClinGenReader(GZipUtilities.GetAppropriateStreamReader(_inputFileName), referenceProvider.RefNameToChromosome))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SiFileSuffix)))
                    using (var nsiWriter = new NsiWriter(new ExtendedBinaryWriter(nsaStream), version, referenceProvider.Assembly, SaCommon.ClinGenTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))
                    {
                        nsiWriter.Write(clinGenReader.GetItems());
                    }

            return(ExitCodes.Success);
        }
Пример #19
0
        private static ExitCodes ProgramExecution()
        {
            var refProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_referenceSequencePath));
            var version     = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            var outFileName = $"{version.Name}_{version.Version}";

            using (var parser = new LcrRegionParser(GZipUtilities.GetAppropriateStreamReader(_inputFile), refProvider))
                using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.LcrFileSuffix)))
                    using (var writer = new NsiWriter(stream, version, refProvider.Assembly, SaCommon.LowComplexityRegionTag, ReportFor.AllVariants, SaCommon.NsiSchemaVersion))
                    {
                        writer.Write(parser.GetItems());
                    }

            return(ExitCodes.Success);
        }
Пример #20
0
        private ExitCodes ProgramExecution()
        {
            var version           = DataSourceVersionReader.GetSourceVersion(_cnvTsv + ".version");
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));

            var cnvStream      = _cnvTsv == null? null: GZipUtilities.GetAppropriateReadStream(_cnvTsv);
            var breakendStream = _breakendTsv == null ? null : GZipUtilities.GetAppropriateReadStream(_breakendTsv);

            using (var cosmicSvExtractor = new CosmicSvReader(cnvStream, breakendStream, version, _outputDir,
                                                              referenceProvider.GenomeAssembly, referenceProvider.RefNameToChromosome))
            {
                cosmicSvExtractor.CreateTsv();
            }

            return(ExitCodes.Success);
        }
Пример #21
0
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName       = $"{version.Name}_{version.Version}";

            using (var primateAiParser = new MitoHeteroplasmyParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                        using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.MitoHeteroplasmyTag, true, false, SaCommon.SchemaVersion, false))
                        {
                            nsaWriter.Write(primateAiParser.GetItems());
                        }

            return(ExitCodes.Success);
        }
Пример #22
0
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName       = $"{version.Name}_{version.Version}_{SaCommon.RefMinorTag}".Replace(' ', '_');

            using (var refMinorReader = new RefMinorReader(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.RefMinorFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.RefMinorFileSuffix + SaCommon.IndexSufix)))
                        using (var writer = new RefMinorDbWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.SchemaVersion))
                        {
                            writer.Write(refMinorReader.GetItems());
                        }

            return(ExitCodes.Success);
        }
Пример #23
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var globalMinorReader = new GlobalMinorReader(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider.RefNameToChromosome);
            var version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");

            string outFileName = $"{version.Name}_{version.Version}_globalMinor";

            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                    using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.GlobalAlleleTag, true, false, SaCommon.SchemaVersion, true))
                    {
                        nsaWriter.Write(globalMinorReader.GetItems());
                    }

            return(ExitCodes.Success);
        }
Пример #24
0
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName       = $"{version.Name}_{version.Version}";


            using (var phylopParser = new PhylopParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider.Assembly, referenceProvider.RefNameToChromosome))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.PhylopFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.PhylopFileSuffix + SaCommon.IndexSufix)))
                        using (var writer = new NpdWriter(nsaStream, indexStream, version, referenceProvider.Assembly, SaCommon.PhylopTag, SaCommon.SchemaVersion))
                        {
                            writer.Write(phylopParser.GetItems());
                        }

            return(ExitCodes.Success);
        }
Пример #25
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var cosmicReader      = new MergedCosmicReader(_vcfFile, _tsvFile, referenceProvider);
            var version           = DataSourceVersionReader.GetSourceVersion(_vcfFile + ".version");

            string outFileName = $"{version.Name}_{version.Version}";

            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                    using (var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.CosmicTag, false, true, SaCommon.SchemaVersion, false))
                    {
                        nsaWriter.Write(cosmicReader.GetItems());
                    }

            return(ExitCodes.Success);
        }
Пример #26
0
        private static void CreateNsa(string[] exomeFiles, string genomeFile, DataSourceVersion version)
        {
            Console.WriteLine($"Processing file: {genomeFile}");
            var outName = Path.GetFileNameWithoutExtension(genomeFile);

            using (var exomeReader = GetExomeReader(exomeFiles, genomeFile))
                using (var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)))
                    using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_tempDirectory, outName + SaCommon.SaFileSuffix)))
                        using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_tempDirectory, outName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                            using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.GnomadTag, true, false, SaCommon.SchemaVersion, false))
                                using (var reader = GZipUtilities.GetAppropriateStreamReader(genomeFile))
                                {
                                    var gnomadReader = new GnomadSnvReader(reader, exomeReader, referenceProvider);
                                    var count        = nsaWriter.Write(gnomadReader.GetCombinedItems());
                                    Console.WriteLine($"Wrote {count} items to NSA file.");
                                }
        }
Пример #27
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var topMedReader      = new TopMedReader(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider);
            var version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");

            string outFileName = $"{version.Name}_{version.Version}";

            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                    using (var jsonSchemaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.JsonSchemaSuffix)))
                    {
                        var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.TopMedTag, true, false, SaCommon.SchemaVersion, false);
                        nsaWriter.Write(topMedReader.GetItems());
                    }

            return(ExitCodes.Success);
        }
Пример #28
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var version           = DataSourceVersionReader.GetSourceVersion(_inputFileName + ".version");

            string outFileName = $"{version.Name}_{version.Version}".Replace(' ', '_');

            using (var reader = GZipUtilities.GetAppropriateStreamReader(_inputFileName))
                using (var oneKGenSvReader = new OneKGenSvReader(reader, referenceProvider.RefNameToChromosome))
                    using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.IntervalFileSuffix)))
                        using (var nsiWriter = new NsiWriter(nsaStream, version, referenceProvider.Assembly,
                                                             SaCommon.OnekSvTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))
                        {
                            nsiWriter.Write(oneKGenSvReader.GetItems());
                        }

            return(ExitCodes.Success);
        }
Пример #29
0
        private ExitCodes ProgramExecution()
        {
            if (!_supportedSequencingDataType.Contains(_sequencingDataType))
            {
                throw new ArgumentException($"Only the following sequencing data types are supported: {string.Join(_supportedSequencingDataType.ToString(), ", ")}");
            }
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));

            var inputFiles = Directory.GetFiles(_inputDirectory, "*.vcf.bgz");

            if (inputFiles.Length == 0)
            {
                inputFiles = Directory.GetFiles(_inputDirectory, "*.vcf.gz");
            }

            if (inputFiles.Length == 0)
            {
                throw new UserErrorException("input directory does not contain any .vcf.bgz files");
            }

            var versionFiles = Directory.GetFiles(_inputDirectory, "*.version");

            if (versionFiles.Length != 1)
            {
                throw new InvalidDataException("more than one .version file found in input directory");
            }
            var version = DataSourceVersionReader.GetSourceVersion(versionFiles[0]);

            Console.WriteLine($"Creating gnomAD TSV file from {inputFiles.Length} input files");


            string outFileName = $"{version.Name}_{version.Version}";
            var    jsonTag     = _sequencingDataType == "genome" ? SaCommon.GnomadTag : SaCommon.GnomadExomeTag;

            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                    using (var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, jsonTag, true, false, SaCommon.SchemaVersion, false))
                    {
                        nsaWriter.Write(GetItems(inputFiles, referenceProvider));
                    }

            return(ExitCodes.Success);
        }
Пример #30
0
        private static ExitCodes ProgramExecution()
        {
            var    version     = DataSourceVersionReader.GetSourceVersion(_rcvFile + ".version");
            string outFileName = $"{version.Name}_{version.Version}";

            using (var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)))
                using (var clinvarReader = new ClinVarReader(GZipUtilities.GetAppropriateReadStream(_rcvFile), GZipUtilities.GetAppropriateReadStream(_vcvFile), referenceProvider))
                    using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                        using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                            using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.ClinvarTag, false, true, SaCommon.SchemaVersion, false))
                                using (var schemaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.JsonSchemaSuffix)))
                                    using (var schemaWriter = new StreamWriter(schemaStream))
                                    {
                                        nsaWriter.Write(clinvarReader.GetItems());
                                        schemaWriter.Write(clinvarReader.JsonSchema);
                                    }

            return(ExitCodes.Success);
        }