Exemplo n.º 1
0
        private void CreateDbsnpGaTsv(string fileName)
        {
            if (fileName == null)
            {
                return;
            }

            var benchMark = new Benchmark();

            var version = DataSourceVersionReader.GetSourceVersion(fileName);

            var dbsnpWriter = new SaTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(),
                                              SaTsvCommon.DbSnpSchemaVersion, InterimSaCommon.DbsnpTag, null, true, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)));

            var globalAlleleWriter = new SaTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(),
                                                     SaTsvCommon.DbSnpSchemaVersion, InterimSaCommon.GlobalAlleleTag, "GMAF", false, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)));

            using (var tsvWriter = new DbsnpGaTsvWriter(dbsnpWriter, globalAlleleWriter))
            {
                var dbSnpReader = new DbSnpReader(GZipUtilities.GetAppropriateReadStream(fileName), _refNamesDictionary);
                TsvWriterUtilities.WriteSortedItems(dbSnpReader.GetDbSnpItems(), tsvWriter);
            }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo("DbSNP", version.Version, timeSpan);
        }
Exemplo n.º 2
0
        private void CreateMitoMapVarTsv(IReadOnlyList <string> mitoMapFileNames)
        {
            if (mitoMapFileNames.Count == 0 || mitoMapFileNames.Any(string.IsNullOrEmpty))
            {
                return;
            }
            var benchMark        = new Benchmark();
            var rootDirectory    = new FileInfo(mitoMapFileNames[0]).Directory;
            var version          = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), "mitoMapVar"));
            var sequenceProvider =
                new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath));

            sequenceProvider.LoadChromosome(new Chromosome("chrM", "MT", 24));
            var mitoMapVarReaders = new List <MitoMapVariantReader>();

            foreach (var mitoMapFileName in mitoMapFileNames)
            {
                mitoMapVarReaders.Add(new MitoMapVariantReader(new FileInfo(mitoMapFileName), sequenceProvider));
            }
            var          mergedMitoMapVarItems = MitoMapVariantReader.MergeAndSort(mitoMapVarReaders);
            const string outputFilePrefix      = InterimSaCommon.MitoMapTag;

            using (var writer = new MitoMapVarTsvWriter(version, _outputDirectory, outputFilePrefix, sequenceProvider))
                TsvWriterUtilities.WriteSortedItems(mergedMitoMapVarItems, writer);
            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo(InterimSaCommon.MitoMapTag, version.Version, timeSpan);
        }
Exemplo n.º 3
0
        private void CreateCustIntervalTsv(string fileName)
        {
            if (string.IsNullOrEmpty(fileName))
            {
                return;
            }

            var benchMark = new Benchmark();

            var version = DataSourceVersionReader.GetSourceVersion(fileName);
            var reader  = new CustomIntervalParser(new FileInfo(fileName), _refNamesDictionary);

            using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                      _genomeAssembly.ToString(), SaTsvCommon.CustIntervalSchemaVersion, reader.KeyName,
                                                      ReportFor.AllVariants))
            {
                foreach (var custInterval in reader.GetCustomIntervals())
                {
                    writer.AddEntry(custInterval.Chromosome.EnsemblName, custInterval.Start, custInterval.End, custInterval.GetJsonString());
                }
            }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo("customInterval", fileName, timeSpan);
        }
Exemplo n.º 4
0
        private void CreateMitoMapSvTsv(IReadOnlyList <string> mitoMapSvFileNames)
        {
            if (mitoMapSvFileNames.Count == 0 || mitoMapSvFileNames.Any(string.IsNullOrEmpty))
            {
                return;
            }
            var benchMark        = new Benchmark();
            var rootDirectory    = new FileInfo(mitoMapSvFileNames[0]).Directory;
            var version          = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), "mitoMapSV"));
            var sequenceProvider =
                new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath));

            sequenceProvider.LoadChromosome(new Chromosome("chrM", "MT", 24));
            var mitoMapSvReaders = new List <MitoMapSvReader>();

            foreach (var mitoMapFileName in mitoMapSvFileNames)
            {
                mitoMapSvReaders.Add(new MitoMapSvReader(new FileInfo(mitoMapFileName), sequenceProvider));
            }

            var mergedMitoMapItems = MitoMapSvReader.MergeAndSort(mitoMapSvReaders);

            using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                      GenomeAssembly.rCRS.ToString(), SaTsvCommon.MitoMapSchemaVersion, InterimSaCommon.MitoMapTag,
                                                      ReportFor.StructuralVariants))
                CreateSvTsv(mergedMitoMapItems, writer);
            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo(InterimSaCommon.MitoMapTag, version.Version, timeSpan);
        }
Exemplo n.º 5
0
        private void CreateSvTsv(string sourceName, string fileName)
        {
            if (string.IsNullOrEmpty(fileName))
            {
                return;
            }

            var benchMark = new Benchmark();
            //Console.WriteLine($"Creating TSV from {fileName}");
            var dataSource = "";
            var version    = DataSourceVersionReader.GetSourceVersion(fileName);

            switch (sourceName)
            {
            case InterimSaCommon.DgvTag:
                dataSource = "DGV";
                using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.DgvSchemaVersion, InterimSaCommon.DgvTag, ReportFor.StructuralVariants))
                {
                    var reader = new DgvReader(new FileInfo(fileName), _refNamesDictionary);
                    CreateSvTsv(reader.GetDgvItems(), writer);
                }
                break;

            case InterimSaCommon.ClinGenTag:
                dataSource = "ClinGen";
                using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.ClinGenSchemaVersion, InterimSaCommon.ClinGenTag,
                                                          ReportFor.StructuralVariants))
                {
                    var reader = new ClinGenReader(new FileInfo(fileName), _refNamesDictionary);
                    CreateSvTsv(reader.GetClinGenItems(), writer);
                }

                break;

            case InterimSaCommon.OnekSvTag:
                dataSource = "OnekSv";
                using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.OnekSvTag,
                                                          ReportFor.StructuralVariants))
                {
                    var reader = new OneKGenSvReader(new FileInfo(fileName), _refNamesDictionary);
                    CreateSvTsv(reader.GetOneKGenSvItems(), writer);
                }

                break;

            default:
                Console.WriteLine("invalid source name");
                break;
            }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo(dataSource, version.Version, timeSpan);
        }
Exemplo n.º 6
0
        public void CreateTsvs()
        {
            var benchMark = new Benchmark();

            using (var writer = new TopMedTsvWriter(_version, _outputDirName, _refProvider.GenomeAssembly, _refProvider))
                using (var reader = new TopMedReader(_streamReader, _refProvider.RefNameToChromosome))
                {
                    TsvWriterUtilities.WriteSortedItems(reader.GetGnomadItems(), writer);
                }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo("TOPMed", _version.Version, timeSpan);
        }
Exemplo n.º 7
0
        private void CreateOnekgTsv(string fileName)
        {
            if (fileName == null)
            {
                return;
            }
            var benchMark = new Benchmark();

            var version = DataSourceVersionReader.GetSourceVersion(fileName);

            using (var tsvWriter = new OnekgTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))))
            {
                var onekgReader = new OneKGenReader(new FileInfo(fileName), _refNamesDictionary);
                TsvWriterUtilities.WriteSortedItems(onekgReader.GetOneKGenItems(), tsvWriter);
            }
            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo("OneKg", version.Version, timeSpan);
        }
Exemplo n.º 8
0
        private void CreateEvsTsv(string fileName)
        {
            if (string.IsNullOrEmpty(fileName))
            {
                return;
            }
            var benchMark = new Benchmark();

            var version = DataSourceVersionReader.GetSourceVersion(fileName);

            using (var writer = new EvsTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))))
            {
                var evsReader = new EvsReader(GZipUtilities.GetAppropriateStreamReader(fileName), _refNamesDictionary);
                TsvWriterUtilities.WriteSortedItems(evsReader.GetEvsItems(), writer);
            }
            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo("EVS", version.Version, timeSpan);
        }
Exemplo n.º 9
0
        private void CreateCutomAnnoTsv(string fileName)
        {
            if (string.IsNullOrEmpty(fileName))
            {
                return;
            }

            Console.WriteLine($"Creating TSV from {fileName}");
            var version = DataSourceVersionReader.GetSourceVersion(fileName);

            var customReader = new CustomAnnotationReader(new FileInfo(fileName), _refNamesDictionary);

            using (var writer = new CustomAnnoTsvWriter(version, _outputDirectory, _genomeAssembly, customReader.IsPositional, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))))
            {
                TsvWriterUtilities.WriteSortedItems(customReader.GetCustomItems(), writer);
            }

            Console.WriteLine($"Finished {fileName}");
        }
Exemplo n.º 10
0
        public void CreateTsvs()
        {
            var benchMark = new Benchmark();

            using (var writer = new GnomadTsvWriter(_version, _outputDirectory, _refProvider.GenomeAssembly, _refProvider, _sequencingDataType))
            {
                var count = 0;

                foreach (var fileStreamReader in _streamReaders)
                {
                    var reader = new GnomadReader(fileStreamReader, _refProvider.RefNameToChromosome);
                    TsvWriterUtilities.WriteSortedItems(reader.GetGnomadItems(), writer);
                    Console.WriteLine($"ingested {count++} file in " + benchMark.GetElapsedTime());
                }
            }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo("gnomAD", _version.Version, timeSpan);
        }
Exemplo n.º 11
0
        private void CreateClinvarTsv(string fileName)
        {
            if (fileName == null)
            {
                return;
            }
            var benchMark = new Benchmark();

            var version = DataSourceVersionReader.GetSourceVersion(fileName);

            //clinvar items do not come in sorted order, hence we need to store them in an array, sort them and then flush them out
            using (var writer = new ClinvarTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))))
            {
                var clinvarReader = new ClinVarXmlReader(new FileInfo(fileName), new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)));
                TsvWriterUtilities.WriteSortedItems(clinvarReader.GetItems(), writer);
            }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo("ClinVar", version.Version, timeSpan);
        }
Exemplo n.º 12
0
        public void CreateTsv()
        {
            var          benchMark  = new Benchmark();
            const string dataSource = "COSMIC";

            if (_cnvStream != null)
            {
                using (var writer = new IntervalTsvWriter(_outputDirectory, _version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.CosmicSvSchemaVersion, InterimSaCommon.CosmicCnvTag, ReportFor.StructuralVariants))
                    using (var cnvReader = new CosmicCnvReader(_cnvStream, _refNameToChorm, _genomeAssembly))
                    {
                        foreach (var cnvEntry in cnvReader.GetEntries())
                        {
                            writer.AddEntry(cnvEntry.Chromosome.EnsemblName, cnvEntry.Start, cnvEntry.End, cnvEntry.GetJsonString());
                        }
                    }
            }


            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo(dataSource, _version.Version, timeSpan);
        }
Exemplo n.º 13
0
        private void CreateCosmicTsv(string vcfFile, string tsvFile)
        {
            if (string.IsNullOrEmpty(tsvFile) || string.IsNullOrEmpty(vcfFile))
            {
                return;
            }

            var benchMark = new Benchmark();

            var version = DataSourceVersionReader.GetSourceVersion(vcfFile);

            using (var writer = new CosmicTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))))
            {
                var tsvReader = GZipUtilities.GetAppropriateStreamReader(tsvFile);
                var vcfReader = GZipUtilities.GetAppropriateStreamReader(vcfFile);
                var reader    = new MergedCosmicReader(vcfReader, tsvReader, _refNamesDictionary);

                TsvWriterUtilities.WriteSortedItems(reader.GetCosmicItems(), writer);
            }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo("COSMIC", version.Version, timeSpan);
        }