private void CreateMitoMapVarTsv(IReadOnlyList <string> mitoMapFileNames) { if (mitoMapFileNames.Count == 0 || mitoMapFileNames.Any(string.IsNullOrEmpty)) { return; } var benchMark = new Benchmark(); var rootDirectory = new FileInfo(mitoMapFileNames[0]).Directory; var version = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), "mitoMapVar")); var sequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)); sequenceProvider.LoadChromosome(new Chromosome("chrM", "MT", 24)); var mitoMapVarReaders = new List <MitoMapVariantReader>(); foreach (var mitoMapFileName in mitoMapFileNames) { mitoMapVarReaders.Add(new MitoMapVariantReader(new FileInfo(mitoMapFileName), sequenceProvider)); } var mergedMitoMapVarItems = MitoMapVariantReader.MergeAndSort(mitoMapVarReaders); const string outputFilePrefix = InterimSaCommon.MitoMapTag; using (var writer = new MitoMapVarTsvWriter(version, _outputDirectory, outputFilePrefix, sequenceProvider)) TsvWriterUtilities.WriteSortedItems(mergedMitoMapVarItems, writer); var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo(InterimSaCommon.MitoMapTag, version.Version, timeSpan); }
private void CreateDbsnpGaTsv(string fileName) { if (fileName == null) { return; } var benchMark = new Benchmark(); var version = DataSourceVersionReader.GetSourceVersion(fileName); var dbsnpWriter = new SaTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.DbSnpSchemaVersion, InterimSaCommon.DbsnpTag, null, true, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))); var globalAlleleWriter = new SaTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.DbSnpSchemaVersion, InterimSaCommon.GlobalAlleleTag, "GMAF", false, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))); using (var tsvWriter = new DbsnpGaTsvWriter(dbsnpWriter, globalAlleleWriter)) { var dbSnpReader = new DbSnpReader(GZipUtilities.GetAppropriateReadStream(fileName), _refNamesDictionary); TsvWriterUtilities.WriteSortedItems(dbSnpReader.GetDbSnpItems(), tsvWriter); } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo("DbSNP", version.Version, timeSpan); }
private void CreateCustIntervalTsv(string fileName) { if (string.IsNullOrEmpty(fileName)) { return; } var benchMark = new Benchmark(); var version = DataSourceVersionReader.GetSourceVersion(fileName); var reader = new CustomIntervalParser(new FileInfo(fileName), _refNamesDictionary); using (var writer = new IntervalTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.CustIntervalSchemaVersion, reader.KeyName, ReportFor.AllVariants)) { foreach (var custInterval in reader.GetCustomIntervals()) { writer.AddEntry(custInterval.Chromosome.EnsemblName, custInterval.Start, custInterval.End, custInterval.GetJsonString()); } } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo("customInterval", fileName, timeSpan); }
private void CreateMitoMapSvTsv(IReadOnlyList <string> mitoMapSvFileNames) { if (mitoMapSvFileNames.Count == 0 || mitoMapSvFileNames.Any(string.IsNullOrEmpty)) { return; } var benchMark = new Benchmark(); var rootDirectory = new FileInfo(mitoMapSvFileNames[0]).Directory; var version = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), "mitoMapSV")); var sequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)); sequenceProvider.LoadChromosome(new Chromosome("chrM", "MT", 24)); var mitoMapSvReaders = new List <MitoMapSvReader>(); foreach (var mitoMapFileName in mitoMapSvFileNames) { mitoMapSvReaders.Add(new MitoMapSvReader(new FileInfo(mitoMapFileName), sequenceProvider)); } var mergedMitoMapItems = MitoMapSvReader.MergeAndSort(mitoMapSvReaders); using (var writer = new IntervalTsvWriter(_outputDirectory, version, GenomeAssembly.rCRS.ToString(), SaTsvCommon.MitoMapSchemaVersion, InterimSaCommon.MitoMapTag, ReportFor.StructuralVariants)) CreateSvTsv(mergedMitoMapItems, writer); var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo(InterimSaCommon.MitoMapTag, version.Version, timeSpan); }
private void CreateSvTsv(string sourceName, string fileName) { if (string.IsNullOrEmpty(fileName)) { return; } var benchMark = new Benchmark(); //Console.WriteLine($"Creating TSV from {fileName}"); var dataSource = ""; var version = DataSourceVersionReader.GetSourceVersion(fileName); switch (sourceName) { case InterimSaCommon.DgvTag: dataSource = "DGV"; using (var writer = new IntervalTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.DgvSchemaVersion, InterimSaCommon.DgvTag, ReportFor.StructuralVariants)) { var reader = new DgvReader(new FileInfo(fileName), _refNamesDictionary); CreateSvTsv(reader.GetDgvItems(), writer); } break; case InterimSaCommon.ClinGenTag: dataSource = "ClinGen"; using (var writer = new IntervalTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.ClinGenSchemaVersion, InterimSaCommon.ClinGenTag, ReportFor.StructuralVariants)) { var reader = new ClinGenReader(new FileInfo(fileName), _refNamesDictionary); CreateSvTsv(reader.GetClinGenItems(), writer); } break; case InterimSaCommon.OnekSvTag: dataSource = "OnekSv"; using (var writer = new IntervalTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.OnekSvTag, ReportFor.StructuralVariants)) { var reader = new OneKGenSvReader(new FileInfo(fileName), _refNamesDictionary); CreateSvTsv(reader.GetOneKGenSvItems(), writer); } break; default: Console.WriteLine("invalid source name"); break; } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo(dataSource, version.Version, timeSpan); }
public void CreateTsvs() { var benchMark = new Benchmark(); using (var writer = new TopMedTsvWriter(_version, _outputDirName, _refProvider.GenomeAssembly, _refProvider)) using (var reader = new TopMedReader(_streamReader, _refProvider.RefNameToChromosome)) { TsvWriterUtilities.WriteSortedItems(reader.GetGnomadItems(), writer); } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo("TOPMed", _version.Version, timeSpan); }
private void CreateOnekgTsv(string fileName) { if (fileName == null) { return; } var benchMark = new Benchmark(); var version = DataSourceVersionReader.GetSourceVersion(fileName); using (var tsvWriter = new OnekgTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)))) { var onekgReader = new OneKGenReader(new FileInfo(fileName), _refNamesDictionary); TsvWriterUtilities.WriteSortedItems(onekgReader.GetOneKGenItems(), tsvWriter); } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo("OneKg", version.Version, timeSpan); }
private void CreateEvsTsv(string fileName) { if (string.IsNullOrEmpty(fileName)) { return; } var benchMark = new Benchmark(); var version = DataSourceVersionReader.GetSourceVersion(fileName); using (var writer = new EvsTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)))) { var evsReader = new EvsReader(GZipUtilities.GetAppropriateStreamReader(fileName), _refNamesDictionary); TsvWriterUtilities.WriteSortedItems(evsReader.GetEvsItems(), writer); } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo("EVS", version.Version, timeSpan); }
public void CreateTsvs() { var benchMark = new Benchmark(); using (var writer = new GnomadTsvWriter(_version, _outputDirectory, _refProvider.GenomeAssembly, _refProvider, _sequencingDataType)) { var count = 0; foreach (var fileStreamReader in _streamReaders) { var reader = new GnomadReader(fileStreamReader, _refProvider.RefNameToChromosome); TsvWriterUtilities.WriteSortedItems(reader.GetGnomadItems(), writer); Console.WriteLine($"ingested {count++} file in " + benchMark.GetElapsedTime()); } } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo("gnomAD", _version.Version, timeSpan); }
private void CreateClinvarTsv(string fileName) { if (fileName == null) { return; } var benchMark = new Benchmark(); var version = DataSourceVersionReader.GetSourceVersion(fileName); //clinvar items do not come in sorted order, hence we need to store them in an array, sort them and then flush them out using (var writer = new ClinvarTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)))) { var clinvarReader = new ClinVarXmlReader(new FileInfo(fileName), new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))); TsvWriterUtilities.WriteSortedItems(clinvarReader.GetItems(), writer); } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo("ClinVar", version.Version, timeSpan); }
public void CreateTsv() { var benchMark = new Benchmark(); const string dataSource = "COSMIC"; if (_cnvStream != null) { using (var writer = new IntervalTsvWriter(_outputDirectory, _version, _genomeAssembly.ToString(), SaTsvCommon.CosmicSvSchemaVersion, InterimSaCommon.CosmicCnvTag, ReportFor.StructuralVariants)) using (var cnvReader = new CosmicCnvReader(_cnvStream, _refNameToChorm, _genomeAssembly)) { foreach (var cnvEntry in cnvReader.GetEntries()) { writer.AddEntry(cnvEntry.Chromosome.EnsemblName, cnvEntry.Start, cnvEntry.End, cnvEntry.GetJsonString()); } } } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo(dataSource, _version.Version, timeSpan); }
private void CreateCosmicTsv(string vcfFile, string tsvFile) { if (string.IsNullOrEmpty(tsvFile) || string.IsNullOrEmpty(vcfFile)) { return; } var benchMark = new Benchmark(); var version = DataSourceVersionReader.GetSourceVersion(vcfFile); using (var writer = new CosmicTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)))) { var tsvReader = GZipUtilities.GetAppropriateStreamReader(tsvFile); var vcfReader = GZipUtilities.GetAppropriateStreamReader(vcfFile); var reader = new MergedCosmicReader(vcfReader, tsvReader, _refNamesDictionary); TsvWriterUtilities.WriteSortedItems(reader.GetCosmicItems(), writer); } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo("COSMIC", version.Version, timeSpan); }