public InterimTsvsMerger(IEnumerable <string> annotationFiles, IEnumerable <string> intervalFiles, string miscFile, IEnumerable <string> geneFiles, string compressedReference, string outputDirectory) { _outputDirectory = outputDirectory; var refSequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(compressedReference)); _genomeAssembly = refSequenceProvider.GenomeAssembly; _refNameToChromosome = refSequenceProvider.RefNameToChromosome; _tsvReaders = ReaderUtilities.GetSaTsvReaders(annotationFiles); _miscReader = ReaderUtilities.GetMiscTsvReader(miscFile); _geneReaders = ReaderUtilities.GetGeneReaders(geneFiles); _intervalReaders = ReaderUtilities.GetIntervalReaders(intervalFiles); _saHeaders = new List <SaHeader>(); _saHeaders.AddRange(ReaderUtilities.GetTsvHeaders(_tsvReaders)); _saHeaders.AddRange(ReaderUtilities.GetTsvHeaders(_intervalReaders)); _geneHeaders = ReaderUtilities.GetTsvHeaders(_geneReaders)?.ToList(); _refNames = new HashSet <string>(); _refNames.UnionWith(ReaderUtilities.GetRefNames(_tsvReaders)); _refNames.UnionWith(ReaderUtilities.GetRefNames(_intervalReaders)); if (_miscReader != null) { _refNames.UnionWith(_miscReader.RefNames); } DisplayDataSources(_saHeaders, _geneHeaders); MergeUtilities.CheckAssemblyConsistancy(_saHeaders); }
private ExitCodes ProgramExecution() { if (!_supportedSequencingDataType.Contains(_sequencingDataType)) { throw new ArgumentException($"Only the following sequencing data types are supported: {string.Join(_supportedSequencingDataType.ToString(), ", ")}"); } var inputStreamReaders = Directory.GetFiles(_inputDirectory, "*.vcf.bgz").Select(fileName => GZipUtilities.GetAppropriateStreamReader(Path.Combine(_inputDirectory, fileName))).ToArray(); var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); if (inputStreamReaders.Length == 0) { throw new UserErrorException("input directory does not conatin any .vcf.bgz files"); } var versionFiles = Directory.GetFiles(_inputDirectory, "*.version"); if (versionFiles.Length != 1) { throw new InvalidDataException("more than one .version file found in input directory"); } Console.WriteLine($"Creating gnomAD TSV file from {inputStreamReaders.Length} input files"); var version = DataSourceVersionReader.GetSourceVersion(versionFiles[0]); var gnomadTsvCreator = new GnomadTsvCreator(inputStreamReaders, referenceProvider, version, _outputDirectory, _sequencingDataType); gnomadTsvCreator.CreateTsvs(); return(ExitCodes.Success); }
private void CreateMitoMapVarTsv(IReadOnlyList <string> mitoMapFileNames) { if (mitoMapFileNames.Count == 0 || mitoMapFileNames.Any(string.IsNullOrEmpty)) { return; } var benchMark = new Benchmark(); var rootDirectory = new FileInfo(mitoMapFileNames[0]).Directory; var version = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), "mitoMapVar")); var sequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)); sequenceProvider.LoadChromosome(new Chromosome("chrM", "MT", 24)); var mitoMapVarReaders = new List <MitoMapVariantReader>(); foreach (var mitoMapFileName in mitoMapFileNames) { mitoMapVarReaders.Add(new MitoMapVariantReader(new FileInfo(mitoMapFileName), sequenceProvider)); } var mergedMitoMapVarItems = MitoMapVariantReader.MergeAndSort(mitoMapVarReaders); const string outputFilePrefix = InterimSaCommon.MitoMapTag; using (var writer = new MitoMapVarTsvWriter(version, _outputDirectory, outputFilePrefix, sequenceProvider)) TsvWriterUtilities.WriteSortedItems(mergedMitoMapVarItems, writer); var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo(InterimSaCommon.MitoMapTag, version.Version, timeSpan); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; TranscriptCacheData transcriptData; using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix)))) { transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome); } var(entrezToHgnc, ensemblToHgnc) = PrimateAiUtilities.GetIdToSymbols(transcriptData); using (var primateAiParser = new PrimateAiParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider, entrezToHgnc, ensemblToHgnc)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.PrimateAiTag, true, true, SaCommon.SchemaVersion, false)) { nsaWriter.Write(primateAiParser.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var rootDirectory = new FileInfo(MitoMapFileNames[0]).Directory; if (rootDirectory == null) { return(ExitCodes.PathNotFound); } var version = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), "mitoMapVar")); var sequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var chrom = sequenceProvider.RefNameToChromosome["chrM"]; sequenceProvider.LoadChromosome(chrom); MitoMapInputDb mitoMapInputDb = MitoMapDatabaseUtilities.Create(_mitoMapDatabase); var mitoMapVarReaders = MitoMapFileNames.Select(mitoMapFileName => new MitoMapVariantReader(new FileInfo(mitoMapFileName), mitoMapInputDb, sequenceProvider)).ToList(); var mergedMitoMapVarItems = MitoMapVariantReader.GetMergeAndSortedItems(mitoMapVarReaders); string outFileName = $"{version.Name}_{version.Version}"; using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, sequenceProvider, SaCommon.MitoMapTag, false, true, SaCommon.SchemaVersion, false)) { nsaWriter.Write(mergedMitoMapVarItems); } return(ExitCodes.Success); }
private static Dictionary <string, string> LoadGenesFromCache(ReferenceSequenceProvider refProvider, TranscriptCacheReader cacheReader) { var transcriptData = cacheReader.Read(refProvider.RefIndexToChromosome); var geneIdToSymbols = new Dictionary <string, string>(transcriptData.Genes.Length); foreach (var gene in transcriptData.Genes) { var geneId = gene.EnsemblId.WithoutVersion; //if(geneId == "ENSG00000272962" || geneId == "ENSG00000198743") // Console.WriteLine("bug"); if (string.IsNullOrEmpty(geneId)) { continue; } if (!geneIdToSymbols.TryAdd(geneId, gene.Symbol)) { if (geneIdToSymbols[geneId] != gene.Symbol) { throw new DataMisalignedException($"Multiple symbols found for {geneId}"); } } } return(geneIdToSymbols); }
private static IEnumerable <AnnotationRange> GetAnnotationRanges(NirvanaConfig config, GenomeAssembly genomeAssembly) { string cachePathPrefix = LambdaUtilities.GetCachePathPrefix(genomeAssembly); IntervalForest <IGene> geneIntervalForest; IDictionary <string, IChromosome> refNameToChromosome; List <long> blockOffsets; using (var tabixStream = PersistentStreamUtils.GetReadStream(config.tabixUrl)) using (var tabixReader = new BinaryReader(new BlockGZipStream(tabixStream, CompressionMode.Decompress))) using (var referenceStream = PersistentStreamUtils.GetReadStream(LambdaUrlHelper.GetRefUrl(genomeAssembly))) using (var sequenceProvider = new ReferenceSequenceProvider(referenceStream)) using (var taProvider = new TranscriptAnnotationProvider(cachePathPrefix, sequenceProvider, null)) { long vcfSize = HttpUtilities.GetLength(config.vcfUrl); int numPartitions = Math.Max(Math.Min((int)((vcfSize - 1) / MinPartitionSize + 1), MaxNumPartitions), MinNumPartitions); var tabixIndex = Reader.Read(tabixReader, sequenceProvider.RefNameToChromosome); blockOffsets = PartitionUtilities.GetFileOffsets(config.vcfUrl, numPartitions, tabixIndex); IntervalArray <ITranscript>[] transcriptIntervalArrays = taProvider.TranscriptIntervalArrays; geneIntervalForest = GeneForestGenerator.GetGeneForest(transcriptIntervalArrays); refNameToChromosome = sequenceProvider.RefNameToChromosome; } IEnumerable <AnnotationRange> annotationRanges = PartitionUtilities.GenerateAnnotationRanges(blockOffsets, config.vcfUrl, geneIntervalForest, refNameToChromosome); return(annotationRanges); }
private void CreateMitoMapSvTsv(IReadOnlyList <string> mitoMapSvFileNames) { if (mitoMapSvFileNames.Count == 0 || mitoMapSvFileNames.Any(string.IsNullOrEmpty)) { return; } var benchMark = new Benchmark(); var rootDirectory = new FileInfo(mitoMapSvFileNames[0]).Directory; var version = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), "mitoMapSV")); var sequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)); sequenceProvider.LoadChromosome(new Chromosome("chrM", "MT", 24)); var mitoMapSvReaders = new List <MitoMapSvReader>(); foreach (var mitoMapFileName in mitoMapSvFileNames) { mitoMapSvReaders.Add(new MitoMapSvReader(new FileInfo(mitoMapFileName), sequenceProvider)); } var mergedMitoMapItems = MitoMapSvReader.MergeAndSort(mitoMapSvReaders); using (var writer = new IntervalTsvWriter(_outputDirectory, version, GenomeAssembly.rCRS.ToString(), SaTsvCommon.MitoMapSchemaVersion, InterimSaCommon.MitoMapTag, ReportFor.StructuralVariants)) CreateSvTsv(mergedMitoMapItems, writer); var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo(InterimSaCommon.MitoMapTag, version.Version, timeSpan); }
private static ExitCodes ProgramExecution() { var rootDirectory = new FileInfo(MitoMapFileNames[0]).Directory; if (rootDirectory == null) { return(ExitCodes.PathNotFound); } var version = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), "mitoMapSv")); var sequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var chrom = sequenceProvider.RefNameToChromosome["chrM"]; sequenceProvider.LoadChromosome(chrom); var mitoMapSvReaders = MitoMapFileNames.Select(mitoMapFileName => new MitoMapSvReader(new FileInfo(mitoMapFileName), sequenceProvider)).ToList(); var mergedMitoMapVarItems = MitoMapSvReader.MergeAndSort(mitoMapSvReaders); string outFileName = $"{version.Name}_{version.Version}"; using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SiFileSuffix))) { var nsiWriter = new NsiWriter(new ExtendedBinaryWriter(nsaStream), version, GenomeAssembly.rCRS, SaCommon.MitoMapTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion); nsiWriter.Write(mergedMitoMapVarItems); } return(ExitCodes.Success); }
public MitoMapSvReader(FileInfo mitoMapFileInfo, ReferenceSequenceProvider sequenceProvider) { _mitoMapFileInfo = mitoMapFileInfo; _dataType = GetDataType(); _sequenceProvider = sequenceProvider; _mitoGenomeModel = new CircularGenomeModel(sequenceProvider.Sequence); _variantAligner = new VariantAligner(sequenceProvider.Sequence); }
public TopMedTsvCreator(StreamReader streamReader, ReferenceSequenceProvider refProvider, DataSourceVersion version, string outputDirName) { _version = version; _refProvider = refProvider; _streamReader = streamReader; _outputDirName = outputDirName; }
public MitoMapSvReader(FileInfo mitoMapFileInfo, ReferenceSequenceProvider sequenceProvider) { _mitoMapFileInfo = mitoMapFileInfo; _dataType = GetDataType(); _sequenceProvider = sequenceProvider; _chromosome = sequenceProvider.RefNameToChromosome["chrM"]; _variantAligner = new VariantAligner(sequenceProvider.Sequence); }
public GnomadTsvCreator(StreamReader[] streamReaders, ReferenceSequenceProvider refProvider, DataSourceVersion version, string outputDirectory, string sequencingDataType) { _version = version; _refProvider = refProvider; _outputDirectory = outputDirectory; _streamReaders = streamReaders; _sequencingDataType = sequencingDataType; }
private ExitCodes ProgramExecution() { var reader = GZipUtilities.GetAppropriateStreamReader(_inputFileArg); var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferenceArg)); var version = DataSourceVersionReader.GetSourceVersion(_inputFileArg + ".version"); var topMedTsvCreator = new TopMedTsvCreator(reader, referenceProvider, version, _outputDirArg); topMedTsvCreator.CreateTsvs(); return(ExitCodes.Success); }
public MiniSaExtractor(string compressedRefFile, string saPath, int begin, int end, string datasourceName = null, string outputDir = null) { _begin = begin; _end = end; _saPath = saPath; var refChromDict = new ReferenceSequenceProvider(FileUtilities.GetReadStream(compressedRefFile)).RefNameToChromosome; //string referenceName = GetReferenceName(saPath, refChromDict); //_miniSaPath = GetMiniSaPath(referenceName, begin, end, datasourceName, outputDir); Console.WriteLine($"MiniSA output to: {_miniSaPath}"); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); List <CustomInterval> intervals; SaJsonSchema intervalJsonSchema; string jsonTag; DataSourceVersion version; string outputPrefix = GetOutputPrefix(_inputFile); string nsaFileName = Path.Combine(_outputDirectory, outputPrefix + SaCommon.SaFileSuffix); string nsaIndexFileName = nsaFileName + SaCommon.IndexSufix; string nsaSchemaFileName = nsaFileName + SaCommon.JsonSchemaSuffix; ReportFor reportFor; var nsaItemCount = 0; using (var parser = VariantAnnotationsParser.Create(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider)) using (var nsaStream = FileUtilities.GetCreateStream(nsaFileName)) using (var indexStream = FileUtilities.GetCreateStream(nsaIndexFileName)) using (var nsaWriter = CaUtilities.GetNsaWriter(nsaStream, indexStream, parser, CaUtilities.GetInputFileName(_inputFile), referenceProvider, out version)) using (var saJsonSchemaStream = FileUtilities.GetCreateStream(nsaSchemaFileName)) using (var schemaWriter = new StreamWriter(saJsonSchemaStream)) { (jsonTag, nsaItemCount, intervalJsonSchema, intervals) = CaUtilities.WriteSmallVariants(parser, nsaWriter, schemaWriter); reportFor = parser.ReportFor; if (intervals == null) { return(ExitCodes.Success); } } if (nsaItemCount == 0) { File.Delete(nsaFileName); File.Delete(nsaIndexFileName); File.Delete(nsaSchemaFileName); } using (var nsiStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outputPrefix + SaCommon.IntervalFileSuffix))) using (var nsiWriter = CaUtilities.GetNsiWriter(nsiStream, version, referenceProvider.Assembly, jsonTag, reportFor)) using (var siJsonSchemaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outputPrefix + SaCommon.IntervalFileSuffix + SaCommon.JsonSchemaSuffix))) using (var schemaWriter = new StreamWriter(siJsonSchemaStream)) { nsiWriter.Write(intervals); schemaWriter.Write(intervalJsonSchema); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var dosageMapRegionVersion = DataSourceVersionReader.GetSourceVersion(_dosageMapRegionFile + ".version"); string outFileName = $"{dosageMapRegionVersion.Name.Replace(' ', '_')}_{dosageMapRegionVersion.Version}"; var referenceProvider = new ReferenceSequenceProvider(GZipUtilities.GetAppropriateReadStream(_inputReferencePath)); using (var dosageSensitivityParser = new DosageMapRegionParser(GZipUtilities.GetAppropriateReadStream(_dosageMapRegionFile), referenceProvider.RefNameToChromosome)) using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SiFileSuffix))) using (var nsiWriter = new NsiWriter(stream, dosageMapRegionVersion, referenceProvider.Assembly, SaCommon.DosageSensitivityTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion)) { nsiWriter.Write(dosageSensitivityParser.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFileName + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var clinGenReader = new ClinGenReader(GZipUtilities.GetAppropriateStreamReader(_inputFileName), referenceProvider.RefNameToChromosome)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SiFileSuffix))) using (var nsiWriter = new NsiWriter(new ExtendedBinaryWriter(nsaStream), version, referenceProvider.Assembly, SaCommon.ClinGenTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion)) { nsiWriter.Write(clinGenReader.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var refProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_referenceSequencePath)); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); var outFileName = $"{version.Name}_{version.Version}"; using (var parser = new LcrRegionParser(GZipUtilities.GetAppropriateStreamReader(_inputFile), refProvider)) using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.LcrFileSuffix))) using (var writer = new NsiWriter(stream, version, refProvider.Assembly, SaCommon.LowComplexityRegionTag, ReportFor.AllVariants, SaCommon.NsiSchemaVersion)) { writer.Write(parser.GetItems()); } return(ExitCodes.Success); }
private ExitCodes ProgramExecution() { var version = DataSourceVersionReader.GetSourceVersion(_cnvTsv + ".version"); var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var cnvStream = _cnvTsv == null? null: GZipUtilities.GetAppropriateReadStream(_cnvTsv); var breakendStream = _breakendTsv == null ? null : GZipUtilities.GetAppropriateReadStream(_breakendTsv); using (var cosmicSvExtractor = new CosmicSvReader(cnvStream, breakendStream, version, _outputDir, referenceProvider.GenomeAssembly, referenceProvider.RefNameToChromosome)) { cosmicSvExtractor.CreateTsv(); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var primateAiParser = new MitoHeteroplasmyParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.MitoHeteroplasmyTag, true, false, SaCommon.SchemaVersion, false)) { nsaWriter.Write(primateAiParser.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}_{SaCommon.RefMinorTag}".Replace(' ', '_'); using (var refMinorReader = new RefMinorReader(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.RefMinorFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.RefMinorFileSuffix + SaCommon.IndexSufix))) using (var writer = new RefMinorDbWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.SchemaVersion)) { writer.Write(refMinorReader.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var globalMinorReader = new GlobalMinorReader(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider.RefNameToChromosome); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}_globalMinor"; using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.GlobalAlleleTag, true, false, SaCommon.SchemaVersion, true)) { nsaWriter.Write(globalMinorReader.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var phylopParser = new PhylopParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider.Assembly, referenceProvider.RefNameToChromosome)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.PhylopFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.PhylopFileSuffix + SaCommon.IndexSufix))) using (var writer = new NpdWriter(nsaStream, indexStream, version, referenceProvider.Assembly, SaCommon.PhylopTag, SaCommon.SchemaVersion)) { writer.Write(phylopParser.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var cosmicReader = new MergedCosmicReader(_vcfFile, _tsvFile, referenceProvider); var version = DataSourceVersionReader.GetSourceVersion(_vcfFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.CosmicTag, false, true, SaCommon.SchemaVersion, false)) { nsaWriter.Write(cosmicReader.GetItems()); } return(ExitCodes.Success); }
private static void CreateNsa(string[] exomeFiles, string genomeFile, DataSourceVersion version) { Console.WriteLine($"Processing file: {genomeFile}"); var outName = Path.GetFileNameWithoutExtension(genomeFile); using (var exomeReader = GetExomeReader(exomeFiles, genomeFile)) using (var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference))) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_tempDirectory, outName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_tempDirectory, outName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.GnomadTag, true, false, SaCommon.SchemaVersion, false)) using (var reader = GZipUtilities.GetAppropriateStreamReader(genomeFile)) { var gnomadReader = new GnomadSnvReader(reader, exomeReader, referenceProvider); var count = nsaWriter.Write(gnomadReader.GetCombinedItems()); Console.WriteLine($"Wrote {count} items to NSA file."); } }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var topMedReader = new TopMedReader(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var jsonSchemaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.JsonSchemaSuffix))) { var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.TopMedTag, true, false, SaCommon.SchemaVersion, false); nsaWriter.Write(topMedReader.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFileName + ".version"); string outFileName = $"{version.Name}_{version.Version}".Replace(' ', '_'); using (var reader = GZipUtilities.GetAppropriateStreamReader(_inputFileName)) using (var oneKGenSvReader = new OneKGenSvReader(reader, referenceProvider.RefNameToChromosome)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.IntervalFileSuffix))) using (var nsiWriter = new NsiWriter(nsaStream, version, referenceProvider.Assembly, SaCommon.OnekSvTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion)) { nsiWriter.Write(oneKGenSvReader.GetItems()); } return(ExitCodes.Success); }
private ExitCodes ProgramExecution() { if (!_supportedSequencingDataType.Contains(_sequencingDataType)) { throw new ArgumentException($"Only the following sequencing data types are supported: {string.Join(_supportedSequencingDataType.ToString(), ", ")}"); } var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var inputFiles = Directory.GetFiles(_inputDirectory, "*.vcf.bgz"); if (inputFiles.Length == 0) { inputFiles = Directory.GetFiles(_inputDirectory, "*.vcf.gz"); } if (inputFiles.Length == 0) { throw new UserErrorException("input directory does not contain any .vcf.bgz files"); } var versionFiles = Directory.GetFiles(_inputDirectory, "*.version"); if (versionFiles.Length != 1) { throw new InvalidDataException("more than one .version file found in input directory"); } var version = DataSourceVersionReader.GetSourceVersion(versionFiles[0]); Console.WriteLine($"Creating gnomAD TSV file from {inputFiles.Length} input files"); string outFileName = $"{version.Name}_{version.Version}"; var jsonTag = _sequencingDataType == "genome" ? SaCommon.GnomadTag : SaCommon.GnomadExomeTag; using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, jsonTag, true, false, SaCommon.SchemaVersion, false)) { nsaWriter.Write(GetItems(inputFiles, referenceProvider)); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var version = DataSourceVersionReader.GetSourceVersion(_rcvFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference))) using (var clinvarReader = new ClinVarReader(GZipUtilities.GetAppropriateReadStream(_rcvFile), GZipUtilities.GetAppropriateReadStream(_vcvFile), referenceProvider)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.ClinvarTag, false, true, SaCommon.SchemaVersion, false)) using (var schemaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.JsonSchemaSuffix))) using (var schemaWriter = new StreamWriter(schemaStream)) { nsaWriter.Write(clinvarReader.GetItems()); schemaWriter.Write(clinvarReader.JsonSchema); } return(ExitCodes.Success); }