public static (Dictionary <string, string> EntrezGeneIdToSymbol, Dictionary <string, string> EnsemblIdToSymbol) ParseUniversalGeneArchive(string inputReferencePath, string universalGeneArchivePath) { IDictionary <string, IChromosome> refNameToChromosome; if (inputReferencePath == null) { refNameToChromosome = null; } else { (_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(inputReferencePath); } UgaGene[] genes; using (var reader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(universalGeneArchivePath), refNameToChromosome)) { genes = reader.GetGenes(); } var entrezGeneIdToSymbol = genes.GetGeneIdToSymbol(x => x.EntrezGeneId); var ensemblIdToSymbol = genes.GetGeneIdToSymbol(x => x.EnsemblId); return(entrezGeneIdToSymbol, ensemblIdToSymbol); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; TranscriptCacheData transcriptData; using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix)))) { transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome); } var(entrezToHgnc, ensemblToHgnc) = PrimateAiUtilities.GetIdToSymbols(transcriptData); using (var primateAiParser = new PrimateAiParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider, entrezToHgnc, ensemblToHgnc)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.PrimateAiTag, true, true, SaCommon.SchemaVersion, false)) { nsaWriter.Write(primateAiParser.GetItems()); } return(ExitCodes.Success); }
private static Dictionary <int, string> GetHgncIdToGeneSymbols() { var idToSymbols = new Dictionary <int, string>(); using (var ugaStream = GZipUtilities.GetAppropriateReadStream(_ugaFile)) using (var reader = new StreamReader(ugaStream)) { string line = reader.ReadLine();//first line has the count of entries while ((line = reader.ReadLine()) != null) { var splits = line.OptimizedSplit('\t'); var symbol = splits[2]; var hgncId = int.Parse(splits[8]); if (hgncId == -1) { continue; } if (idToSymbols.TryAdd(hgncId, symbol)) { continue; } if (symbol != idToSymbols[hgncId]) { Console.WriteLine($"Different symbol for the same id({hgncId}). Existing: {idToSymbols[hgncId]}. New: {symbol}"); } } } return(idToSymbols); }
private void ParseTranscriptDumpFile(IChromosome chromosome, string filePath, ICollection <MutableTranscript> transcripts) { Console.WriteLine("- processing {0}", Path.GetFileName(filePath)); using (var reader = new DataDumperReader(GZipUtilities.GetAppropriateReadStream(filePath))) { foreach (var node in reader.GetRootNode().Value.Values) { if (!(node is ListObjectKeyValueNode transcriptNodes)) { continue; } foreach (var tNode in transcriptNodes.Values) { if (!(tNode is ObjectValueNode transcriptNode)) { throw new InvalidOperationException("Expected a transcript object value node, but the current node is not an object value."); } if (transcriptNode.Type != "Bio::EnsEMBL::Transcript") { throw new InvalidOperationException($"Expected a transcript node, but the current data type is: [{transcriptNode.Type}]"); } var transcript = ImportTranscript.Parse(transcriptNode, chromosome, _source); if (_filter.Pass(transcript)) { transcripts.Add(transcript); } } } } }
private static void ParseRegulatoryDumpFile(IChromosome chromosome, string filePath, ICollection <IRegulatoryRegion> regulatoryRegions) { Console.WriteLine("- processing {0}", Path.GetFileName(filePath)); using (var reader = new DataDumperReader(GZipUtilities.GetAppropriateReadStream(filePath))) { foreach (var ad in reader.GetRootNode().Value.Values) { if (!(ad is ObjectKeyValueNode objectKeyValue)) { continue; } foreach (var featureGroup in objectKeyValue.Value.Values) { switch (featureGroup.Key) { case "MotifFeature": // not used break; case "RegulatoryFeature": ParseRegulatoryRegions(chromosome, featureGroup, regulatoryRegions); break; default: throw new InvalidDataException("Found an unexpected feature group (" + featureGroup.Key + ") in the regulatory regions file."); } } } } }
private void CreateDbsnpGaTsv(string fileName) { if (fileName == null) { return; } var benchMark = new Benchmark(); var version = DataSourceVersionReader.GetSourceVersion(fileName); var dbsnpWriter = new SaTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.DbSnpSchemaVersion, InterimSaCommon.DbsnpTag, null, true, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))); var globalAlleleWriter = new SaTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.DbSnpSchemaVersion, InterimSaCommon.GlobalAlleleTag, "GMAF", false, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))); using (var tsvWriter = new DbsnpGaTsvWriter(dbsnpWriter, globalAlleleWriter)) { var dbSnpReader = new DbSnpReader(GZipUtilities.GetAppropriateReadStream(fileName), _refNamesDictionary); TsvWriterUtilities.WriteSortedItems(dbSnpReader.GetDbSnpItems(), tsvWriter); } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo("DbSNP", version.Version, timeSpan); }
private void AddOmimEntries(Dictionary <int, OmimImportEntry> mimIdToEntry, string omimPath) { using (var stream = GZipUtilities.GetAppropriateReadStream(omimPath)) using (var reader = new OmimReader(stream)) { reader.AddOmimEntries(mimIdToEntry); } }
public static GlobalCache Create(string refSeqCachePath, string ensemblCachePath, IDictionary <ushort, IChromosome> refIndexToChromosome, IDictionary <string, IChromosome> refNameToChromosome38) { var ensemblGenesByRef = FlattenGenes(LoadGenes(GZipUtilities.GetAppropriateReadStream(ensemblCachePath), refIndexToChromosome, refNameToChromosome38)); var refSeqGenesByRef = FlattenGenes(LoadGenes(GZipUtilities.GetAppropriateReadStream(refSeqCachePath), refIndexToChromosome, refNameToChromosome38)); return(new GlobalCache(ensemblGenesByRef, refSeqGenesByRef)); }
private static ExitCodes ProgramExecution() { var logger = new ConsoleLogger(); string transcriptPath = _inputPrefix + ".transcripts.gz"; string siftPath = _inputPrefix + ".sift.gz"; string polyphenPath = _inputPrefix + ".polyphen.gz"; string regulatoryPath = _inputPrefix + ".regulatory.gz"; (var refIndexToChromosome, var refNameToChromosome, int numRefSeqs) = SequenceHelper.GetDictionaries(_inputReferencePath); using (var transcriptReader = new MutableTranscriptReader(GZipUtilities.GetAppropriateReadStream(transcriptPath), refIndexToChromosome)) using (var regulatoryReader = new RegulatoryRegionReader(GZipUtilities.GetAppropriateReadStream(regulatoryPath), refIndexToChromosome)) using (var siftReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(siftPath), refIndexToChromosome, IntermediateIoCommon.FileType.Sift)) using (var polyphenReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(polyphenPath), refIndexToChromosome, IntermediateIoCommon.FileType.Polyphen)) using (var geneReader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(ExternalFiles.UniversalGeneFilePath), refNameToChromosome)) { var genomeAssembly = transcriptReader.Header.Assembly; var source = transcriptReader.Header.Source; long vepReleaseTicks = transcriptReader.Header.VepReleaseTicks; ushort vepVersion = transcriptReader.Header.VepVersion; logger.Write("- loading universal gene archive file... "); var genes = geneReader.GetGenes(); var geneForest = CreateGeneForest(genes, numRefSeqs, genomeAssembly); logger.WriteLine($"{genes.Length:N0} loaded."); logger.Write("- loading regulatory region file... "); var regulatoryRegions = regulatoryReader.GetRegulatoryRegions(); logger.WriteLine($"{regulatoryRegions.Length:N0} loaded."); logger.Write("- loading transcript file... "); var transcripts = transcriptReader.GetTranscripts(); var transcriptsByRefIndex = transcripts.GetMultiValueDict(x => x.Chromosome.Index); logger.WriteLine($"{transcripts.Length:N0} loaded."); MarkCanonicalTranscripts(logger, transcripts); var predictionBuilder = new PredictionCacheBuilder(logger, genomeAssembly); var predictionCaches = predictionBuilder.CreatePredictionCaches(transcriptsByRefIndex, siftReader, polyphenReader, numRefSeqs); logger.Write("- writing SIFT prediction cache... "); predictionCaches.Sift.Write(FileUtilities.GetCreateStream(CacheConstants.SiftPath(_outputCacheFilePrefix))); logger.WriteLine("finished."); logger.Write("- writing PolyPhen prediction cache... "); predictionCaches.PolyPhen.Write(FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(_outputCacheFilePrefix))); logger.WriteLine("finished."); var transcriptBuilder = new TranscriptCacheBuilder(logger, genomeAssembly, source, vepReleaseTicks, vepVersion); var transcriptStaging = transcriptBuilder.CreateTranscriptCache(transcripts, regulatoryRegions, geneForest, numRefSeqs); logger.Write("- writing transcript cache... "); transcriptStaging.Write(FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(_outputCacheFilePrefix))); logger.WriteLine("finished."); } return(ExitCodes.Success); }
public static IVcfReader GetVcfReader(string vcfPath, IDictionary <string, IChromosome> chromosomeDictionary, IRefMinorProvider refMinorProvider, bool verboseTranscript, IRecomposer recomposer) { var useStdInput = vcfPath == "-"; var peekStream = new PeekStream(useStdInput ? Console.OpenStandardInput() : GZipUtilities.GetAppropriateReadStream(vcfPath)); return(new VcfReader(peekStream, chromosomeDictionary, refMinorProvider, verboseTranscript, recomposer)); }
private static ExitCodes ProgramExecution() { using var mitoHeteroplasmyParser = new MitoHeteroplasmyParser(GZipUtilities.GetAppropriateReadStream(_inputFile)); using var tsvStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, OutFileName)); using var tsvWriter = new StreamWriter(tsvStream); tsvWriter.WriteLine(HeaderLine); foreach (var line in mitoHeteroplasmyParser.GetOutputLines()) { tsvWriter.WriteLine(line); } return(ExitCodes.Success); }
private ExitCodes ProgramExecution() { var sequenceProvider = ProviderUtilities.GetSequenceProvider(ConfigurationSettings.RefSequencePath); var transcriptAnnotationProvider = ProviderUtilities.GetTranscriptAnnotationProvider(ConfigurationSettings.InputCachePrefix, sequenceProvider); var annotator = ProviderUtilities.GetAnnotator(transcriptAnnotationProvider, sequenceProvider); var dataSourceVesions = new List <IDataSourceVersion>(); dataSourceVesions.AddRange(transcriptAnnotationProvider.DataSourceVersions); using (var outputWriter = new StreamWriter(ConfigurationSettings.OutputFileName)) using (var vcfReader = new VcfReader(GZipUtilities.GetAppropriateReadStream(ConfigurationSettings.VcfPath), sequenceProvider.GetChromosomeDictionary(), null, false)) { try { if (vcfReader.IsRcrsMitochondrion && annotator.GenomeAssembly == GenomeAssembly.GRCh37 || annotator.GenomeAssembly == GenomeAssembly.GRCh38 || ConfigurationSettings.ForceMitochondrialAnnotation) { annotator.EnableMitochondrialAnnotation(); } int previousChromIndex = -1; IPosition position; // var sortedVcfChecker = new SortedVcfChecker(); outputWriter.WriteLine(OutHeader); while ((position = vcfReader.GetNextPosition()) != null) { // sortedVcfChecker.CheckVcfOrder(position.Chromosome.UcscName); previousChromIndex = UpdatePerformanceMetrics(previousChromIndex, position.Chromosome); var annotatedPosition = annotator.Annotate(position); WriteAnnotatedPostion(annotatedPosition, outputWriter); } } catch (Exception e) { e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine; throw; } } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var dosageSensitivityVersion = DataSourceVersionReader.GetSourceVersion(_dosageSensitivityFile + ".version"); string outFileName = $"{dosageSensitivityVersion.Name.Replace(' ','_')}_{dosageSensitivityVersion.Version}"; using (var dosageSensitivityParser = new DosageSensitivityParser(GZipUtilities.GetAppropriateReadStream(_dosageSensitivityFile))) using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.NgaFileSuffix))) using (var ngaWriter = new NgaWriter(stream, dosageSensitivityVersion, SaCommon.DosageSensitivityTag, SaCommon.SchemaVersion, false)) { ngaWriter.Write(dosageSensitivityParser.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var dosageMapRegionVersion = DataSourceVersionReader.GetSourceVersion(_dosageMapRegionFile + ".version"); string outFileName = $"{dosageMapRegionVersion.Name.Replace(' ', '_')}_{dosageMapRegionVersion.Version}"; var referenceProvider = new ReferenceSequenceProvider(GZipUtilities.GetAppropriateReadStream(_inputReferencePath)); using (var dosageSensitivityParser = new DosageMapRegionParser(GZipUtilities.GetAppropriateReadStream(_dosageMapRegionFile), referenceProvider.RefNameToChromosome)) using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SiFileSuffix))) using (var nsiWriter = new NsiWriter(stream, dosageMapRegionVersion, referenceProvider.Assembly, SaCommon.DosageSensitivityTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion)) { nsiWriter.Write(dosageSensitivityParser.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var dosageSensitivityVersion = DataSourceVersionReader.GetSourceVersion(_diseaseValidityFile + ".version"); string outFileName = $"{dosageSensitivityVersion.Name.Replace(' ', '_')}_{dosageSensitivityVersion.Version}"; // read uga file to get hgnc id to gene symbols dictionary using (var diseaseValidityParser = new GeneDiseaseValidityParser(GZipUtilities.GetAppropriateReadStream(_diseaseValidityFile), GetHgncIdToGeneSymbols())) using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.GeneFileSuffix))) using (var ngaWriter = new NgaWriter(stream, dosageSensitivityVersion, SaCommon.DiseaseValidityTag, SaCommon.SchemaVersion, true)) { ngaWriter.Write(diseaseValidityParser.GetItems()); } return(ExitCodes.Success); }
private ExitCodes ProgramExecution() { var version = DataSourceVersionReader.GetSourceVersion(_cnvTsv + ".version"); var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var cnvStream = _cnvTsv == null? null: GZipUtilities.GetAppropriateReadStream(_cnvTsv); var breakendStream = _breakendTsv == null ? null : GZipUtilities.GetAppropriateReadStream(_breakendTsv); using (var cosmicSvExtractor = new CosmicSvReader(cnvStream, breakendStream, version, _outputDir, referenceProvider.GenomeAssembly, referenceProvider.RefNameToChromosome)) { cosmicSvExtractor.CreateTsv(); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var primateAiParser = new MitoHeteroplasmyParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.MitoHeteroplasmyTag, true, false, SaCommon.SchemaVersion, false)) { nsaWriter.Write(primateAiParser.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var phylopParser = new PhylopParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider.Assembly, referenceProvider.RefNameToChromosome)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.PhylopFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.PhylopFileSuffix + SaCommon.IndexSufix))) using (var writer = new NpdWriter(nsaStream, indexStream, version, referenceProvider.Assembly, SaCommon.PhylopTag, SaCommon.SchemaVersion)) { writer.Write(phylopParser.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var globalMinorReader = new GlobalMinorReader(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider.RefNameToChromosome); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}_globalMinor"; using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.GlobalAlleleTag, true, false, SaCommon.SchemaVersion, true)) { nsaWriter.Write(globalMinorReader.GetItems()); } return(ExitCodes.Success); }
EnsemblIdToSymbol) ParseUniversalGeneArchive() { var(_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(_inputReferencePath); UgaGene[] genes; using (var reader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(_universalGeneArchivePath), refNameToChromosome)) { genes = reader.GetGenes(); } var entrezGeneIdToSymbol = genes.GetGeneIdToSymbol(x => x.EntrezGeneId); var ensemblIdToSymbol = genes.GetGeneIdToSymbol(x => x.EnsemblId); return(entrezGeneIdToSymbol, ensemblIdToSymbol); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}".Replace(' ', '_'); using (var oneKGenReader = new OneKGenReader(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var writer = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.OneKgenTag, true, false, SaCommon.SchemaVersion, false)) { writer.Write(oneKGenReader.GetItems()); } return(ExitCodes.Success); }
public void GetTabixVirtualPosition_AsExpected() { var annotationConfig = new AnnotationConfig { vcfUrl = "anywhere/input.vcf.gz", tabixUrl = Resources.TopPath("Mother_chr22.genome.vcf.gz.tbi"), annotationRange = new AnnotationRange(new AnnotationPosition("chr22", 20_000_000), new AnnotationPosition("chr22", 30_000_000)) }; var tabixStream = FileUtilities.GetReadStream(annotationConfig.tabixUrl); var indexReader = new BinaryReader(GZipUtilities.GetAppropriateReadStream(annotationConfig.tabixUrl)); var expectedPosition = Reader.Read(indexReader, ChromosomeUtilities.RefNameToChromosome).GetOffset("chr22", annotationConfig.annotationRange.Start.Position); var virtualPosition = global::AnnotationLambda.AnnotationLambda.GetTabixVirtualPosition(annotationConfig.annotationRange, tabixStream, ChromosomeUtilities.RefNameToChromosome); Assert.Equal(expectedPosition, virtualPosition); }
private static Dictionary <string, GenbankEntry> GetIdToGenbank(GenomeAssembly assembly, Source source) { if (assembly != GenomeAssembly.GRCh37 || source != Source.RefSeq) { return(null); } Logger.Write("- loading the intermediate Genbank file... "); Dictionary <string, GenbankEntry> genbankDict; using (var reader = new IntermediateIO.GenbankReader(GZipUtilities.GetAppropriateReadStream(ExternalFiles.GenbankFilePath))) { genbankDict = reader.GetIdToGenbank(); } Logger.WriteLine($"{genbankDict.Count} entries loaded."); return(genbankDict); }
private static ExitCodes ProgramExecution() { var version = DataSourceVersionReader.GetSourceVersion(_rcvFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference))) using (var clinvarReader = new ClinVarReader(GZipUtilities.GetAppropriateReadStream(_rcvFile), GZipUtilities.GetAppropriateReadStream(_vcvFile), referenceProvider)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.ClinvarTag, false, true, SaCommon.SchemaVersion, false)) using (var schemaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.JsonSchemaSuffix))) using (var schemaWriter = new StreamWriter(schemaStream)) { nsaWriter.Write(clinvarReader.GetItems()); schemaWriter.Write(clinvarReader.JsonSchema); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { using var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); TranscriptCacheData transcriptData = AaConservationUtilities.GetTranscriptData(referenceProvider.RefIndexToChromosome, _transcriptCachePrefix);// we will use the transcript data to validate the protein sequence var version = DataSourceVersionReader.GetSourceVersion(_scoresFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; //read multi-alignments using (var stream = GZipUtilities.GetAppropriateReadStream(_scoresFile)) using (var parser = new ProteinConservationParser(stream)) using (var outStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + ProteinConservationCommon.FileSuffix))) using (var groupStream = FileUtilities.GetCreateStream("transcriptGroups.txt")) using (var writer = new ProteinConservationWriter(outStream, groupStream, transcriptData, version)) { writer.Write(parser.GetItems()); } return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); TranscriptCacheData transcriptData; using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix)))) { transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome); } var spliceIntervals = SpliceUtilities.GetSpliceIntervals(referenceProvider, transcriptData); var nirEnstToGeneSymbols = SpliceUtilities.GetEnstToGeneSymbols(referenceProvider, transcriptData); Dictionary <string, string> spliceAiEnstToGeneSymbols; using (var reader = new StreamReader(GZipUtilities.GetAppropriateReadStream(_geneInfoFile))) { spliceAiEnstToGeneSymbols = SpliceUtilities.GetSpliceAiGeneSymbols(reader); } var spliceAiToNirvanaGeneSymbols = SpliceUtilities.GetSymbolMapping(spliceAiEnstToGeneSymbols, nirEnstToGeneSymbols); Console.WriteLine($"Mapped {spliceAiToNirvanaGeneSymbols.Count} spliceAI gene symbols to Nirvana gene symbols (out of {spliceAiEnstToGeneSymbols.Count})"); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var spliceAiParser = new SpliceAiParser( GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider, spliceIntervals, spliceAiToNirvanaGeneSymbols)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.SpliceAiTag, true, true, SaCommon.SchemaVersion, false)) { nsaWriter.Write(spliceAiParser.GetItems()); } Console.WriteLine($"Total number of entries from Splice AI: {SpliceAiParser.Count}"); return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); TranscriptCacheData transcriptData; using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix)))) { transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome); } var spliceIntervals = SpliceUtilities.GetSpliceIntervals(referenceProvider, transcriptData); var nirvanaGeneForest = SpliceUtilities.GetGeneForest(transcriptData); Console.WriteLine("Loaded transcripts and generated splice intervals."); Dictionary <string, List <string> > geneSymbolSynonyms; using (var geneInfoParser = new GeneInfoParser(GZipUtilities.GetAppropriateStreamReader(_geneInfoFile))) { geneSymbolSynonyms = geneInfoParser.GetGeneSymbolSynonyms(); } Console.WriteLine("Loaded gene symbol synonyms"); var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var spliceAiParser = new SpliceAiParser( GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider, spliceIntervals, nirvanaGeneForest, geneSymbolSynonyms)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) { var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.SpliceAiTag, true, true, SaCommon.SchemaVersion, false); nsaWriter.Write(spliceAiParser.GetItems()); } Console.WriteLine($"Total number of entries from Splice AI: {SpliceAiParser.Count}"); return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var annotationResources = GetAnnotationResources(); string jasixFileName = _outputFileName == "-" ? null : _outputFileName + ".json.gz" + JasixCommons.FileExt; using (var inputVcfStream = _vcfPath == "-" ? Console.OpenStandardInput() : GZipUtilities.GetAppropriateReadStream(_vcfPath)) using (var outputJsonStream = _outputFileName == "-" ? Console.OpenStandardOutput() : new BlockGZipStream(FileUtilities.GetCreateStream(_outputFileName + ".json.gz"), CompressionMode.Compress)) using (var outputJsonIndexStream = jasixFileName == null ? null : FileUtilities.GetCreateStream(jasixFileName)) return(StreamAnnotation.Annotate(null, inputVcfStream, outputJsonStream, outputJsonIndexStream, annotationResources, new NullVcfFilter(), false, _enableDq)); }