예제 #1
0
        public static (Dictionary <string, string> EntrezGeneIdToSymbol, Dictionary <string, string> EnsemblIdToSymbol) ParseUniversalGeneArchive(string inputReferencePath, string universalGeneArchivePath)
        {
            IDictionary <string, IChromosome> refNameToChromosome;

            if (inputReferencePath == null)
            {
                refNameToChromosome = null;
            }
            else
            {
                (_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(inputReferencePath);
            }

            UgaGene[] genes;

            using (var reader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(universalGeneArchivePath),
                                                  refNameToChromosome))
            {
                genes = reader.GetGenes();
            }

            var entrezGeneIdToSymbol = genes.GetGeneIdToSymbol(x => x.EntrezGeneId);
            var ensemblIdToSymbol    = genes.GetGeneIdToSymbol(x => x.EnsemblId);

            return(entrezGeneIdToSymbol, ensemblIdToSymbol);
        }
예제 #2
0
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName       = $"{version.Name}_{version.Version}";

            TranscriptCacheData transcriptData;

            using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix))))
            {
                transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome);
            }

            var(entrezToHgnc, ensemblToHgnc) = PrimateAiUtilities.GetIdToSymbols(transcriptData);

            using (var primateAiParser = new PrimateAiParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider, entrezToHgnc, ensemblToHgnc))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                        using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.PrimateAiTag, true, true, SaCommon.SchemaVersion, false))
                        {
                            nsaWriter.Write(primateAiParser.GetItems());
                        }

            return(ExitCodes.Success);
        }
예제 #3
0
        private static Dictionary <int, string> GetHgncIdToGeneSymbols()
        {
            var idToSymbols = new Dictionary <int, string>();

            using (var ugaStream = GZipUtilities.GetAppropriateReadStream(_ugaFile))
                using (var reader = new StreamReader(ugaStream))
                {
                    string line = reader.ReadLine();//first line has the count of entries
                    while ((line = reader.ReadLine()) != null)
                    {
                        var splits = line.OptimizedSplit('\t');
                        var symbol = splits[2];
                        var hgncId = int.Parse(splits[8]);
                        if (hgncId == -1)
                        {
                            continue;
                        }

                        if (idToSymbols.TryAdd(hgncId, symbol))
                        {
                            continue;
                        }
                        if (symbol != idToSymbols[hgncId])
                        {
                            Console.WriteLine($"Different symbol for the same id({hgncId}). Existing: {idToSymbols[hgncId]}. New: {symbol}");
                        }
                    }
                }

            return(idToSymbols);
        }
예제 #4
0
        private void ParseTranscriptDumpFile(IChromosome chromosome, string filePath,
                                             ICollection <MutableTranscript> transcripts)
        {
            Console.WriteLine("- processing {0}", Path.GetFileName(filePath));

            using (var reader = new DataDumperReader(GZipUtilities.GetAppropriateReadStream(filePath)))
            {
                foreach (var node in reader.GetRootNode().Value.Values)
                {
                    if (!(node is ListObjectKeyValueNode transcriptNodes))
                    {
                        continue;
                    }

                    foreach (var tNode in transcriptNodes.Values)
                    {
                        if (!(tNode is ObjectValueNode transcriptNode))
                        {
                            throw new InvalidOperationException("Expected a transcript object value node, but the current node is not an object value.");
                        }
                        if (transcriptNode.Type != "Bio::EnsEMBL::Transcript")
                        {
                            throw new InvalidOperationException($"Expected a transcript node, but the current data type is: [{transcriptNode.Type}]");
                        }

                        var transcript = ImportTranscript.Parse(transcriptNode, chromosome, _source);
                        if (_filter.Pass(transcript))
                        {
                            transcripts.Add(transcript);
                        }
                    }
                }
            }
        }
예제 #5
0
        private static void ParseRegulatoryDumpFile(IChromosome chromosome, string filePath,
                                                    ICollection <IRegulatoryRegion> regulatoryRegions)
        {
            Console.WriteLine("- processing {0}", Path.GetFileName(filePath));

            using (var reader = new DataDumperReader(GZipUtilities.GetAppropriateReadStream(filePath)))
            {
                foreach (var ad in reader.GetRootNode().Value.Values)
                {
                    if (!(ad is ObjectKeyValueNode objectKeyValue))
                    {
                        continue;
                    }

                    foreach (var featureGroup in objectKeyValue.Value.Values)
                    {
                        switch (featureGroup.Key)
                        {
                        case "MotifFeature":
                            // not used
                            break;

                        case "RegulatoryFeature":
                            ParseRegulatoryRegions(chromosome, featureGroup, regulatoryRegions);
                            break;

                        default:
                            throw new InvalidDataException("Found an unexpected feature group (" + featureGroup.Key + ") in the regulatory regions file.");
                        }
                    }
                }
            }
        }
예제 #6
0
        private void CreateDbsnpGaTsv(string fileName)
        {
            if (fileName == null)
            {
                return;
            }

            var benchMark = new Benchmark();

            var version = DataSourceVersionReader.GetSourceVersion(fileName);

            var dbsnpWriter = new SaTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(),
                                              SaTsvCommon.DbSnpSchemaVersion, InterimSaCommon.DbsnpTag, null, true, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)));

            var globalAlleleWriter = new SaTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(),
                                                     SaTsvCommon.DbSnpSchemaVersion, InterimSaCommon.GlobalAlleleTag, "GMAF", false, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)));

            using (var tsvWriter = new DbsnpGaTsvWriter(dbsnpWriter, globalAlleleWriter))
            {
                var dbSnpReader = new DbSnpReader(GZipUtilities.GetAppropriateReadStream(fileName), _refNamesDictionary);
                TsvWriterUtilities.WriteSortedItems(dbSnpReader.GetDbSnpItems(), tsvWriter);
            }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo("DbSNP", version.Version, timeSpan);
        }
예제 #7
0
 private void AddOmimEntries(Dictionary <int, OmimImportEntry> mimIdToEntry, string omimPath)
 {
     using (var stream = GZipUtilities.GetAppropriateReadStream(omimPath))
         using (var reader = new OmimReader(stream))
         {
             reader.AddOmimEntries(mimIdToEntry);
         }
 }
예제 #8
0
        public static GlobalCache Create(string refSeqCachePath, string ensemblCachePath,
                                         IDictionary <ushort, IChromosome> refIndexToChromosome, IDictionary <string, IChromosome> refNameToChromosome38)
        {
            var ensemblGenesByRef = FlattenGenes(LoadGenes(GZipUtilities.GetAppropriateReadStream(ensemblCachePath), refIndexToChromosome, refNameToChromosome38));
            var refSeqGenesByRef  = FlattenGenes(LoadGenes(GZipUtilities.GetAppropriateReadStream(refSeqCachePath), refIndexToChromosome, refNameToChromosome38));

            return(new GlobalCache(ensemblGenesByRef, refSeqGenesByRef));
        }
예제 #9
0
        private static ExitCodes ProgramExecution()
        {
            var    logger         = new ConsoleLogger();
            string transcriptPath = _inputPrefix + ".transcripts.gz";
            string siftPath       = _inputPrefix + ".sift.gz";
            string polyphenPath   = _inputPrefix + ".polyphen.gz";
            string regulatoryPath = _inputPrefix + ".regulatory.gz";

            (var refIndexToChromosome, var refNameToChromosome, int numRefSeqs) = SequenceHelper.GetDictionaries(_inputReferencePath);

            using (var transcriptReader = new MutableTranscriptReader(GZipUtilities.GetAppropriateReadStream(transcriptPath), refIndexToChromosome))
                using (var regulatoryReader = new RegulatoryRegionReader(GZipUtilities.GetAppropriateReadStream(regulatoryPath), refIndexToChromosome))
                    using (var siftReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(siftPath), refIndexToChromosome, IntermediateIoCommon.FileType.Sift))
                        using (var polyphenReader = new PredictionReader(GZipUtilities.GetAppropriateReadStream(polyphenPath), refIndexToChromosome, IntermediateIoCommon.FileType.Polyphen))
                            using (var geneReader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(ExternalFiles.UniversalGeneFilePath), refNameToChromosome))
                            {
                                var    genomeAssembly  = transcriptReader.Header.Assembly;
                                var    source          = transcriptReader.Header.Source;
                                long   vepReleaseTicks = transcriptReader.Header.VepReleaseTicks;
                                ushort vepVersion      = transcriptReader.Header.VepVersion;

                                logger.Write("- loading universal gene archive file... ");
                                var genes      = geneReader.GetGenes();
                                var geneForest = CreateGeneForest(genes, numRefSeqs, genomeAssembly);
                                logger.WriteLine($"{genes.Length:N0} loaded.");

                                logger.Write("- loading regulatory region file... ");
                                var regulatoryRegions = regulatoryReader.GetRegulatoryRegions();
                                logger.WriteLine($"{regulatoryRegions.Length:N0} loaded.");

                                logger.Write("- loading transcript file... ");
                                var transcripts           = transcriptReader.GetTranscripts();
                                var transcriptsByRefIndex = transcripts.GetMultiValueDict(x => x.Chromosome.Index);
                                logger.WriteLine($"{transcripts.Length:N0} loaded.");

                                MarkCanonicalTranscripts(logger, transcripts);

                                var predictionBuilder = new PredictionCacheBuilder(logger, genomeAssembly);
                                var predictionCaches  = predictionBuilder.CreatePredictionCaches(transcriptsByRefIndex, siftReader, polyphenReader, numRefSeqs);

                                logger.Write("- writing SIFT prediction cache... ");
                                predictionCaches.Sift.Write(FileUtilities.GetCreateStream(CacheConstants.SiftPath(_outputCacheFilePrefix)));
                                logger.WriteLine("finished.");

                                logger.Write("- writing PolyPhen prediction cache... ");
                                predictionCaches.PolyPhen.Write(FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(_outputCacheFilePrefix)));
                                logger.WriteLine("finished.");

                                var transcriptBuilder = new TranscriptCacheBuilder(logger, genomeAssembly, source, vepReleaseTicks, vepVersion);
                                var transcriptStaging = transcriptBuilder.CreateTranscriptCache(transcripts, regulatoryRegions, geneForest, numRefSeqs);

                                logger.Write("- writing transcript cache... ");
                                transcriptStaging.Write(FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(_outputCacheFilePrefix)));
                                logger.WriteLine("finished.");
                            }

            return(ExitCodes.Success);
        }
예제 #10
0
        public static IVcfReader GetVcfReader(string vcfPath, IDictionary <string, IChromosome> chromosomeDictionary,
                                              IRefMinorProvider refMinorProvider, bool verboseTranscript, IRecomposer recomposer)
        {
            var useStdInput = vcfPath == "-";

            var peekStream =
                new PeekStream(useStdInput
                        ? Console.OpenStandardInput()
                        : GZipUtilities.GetAppropriateReadStream(vcfPath));

            return(new VcfReader(peekStream, chromosomeDictionary, refMinorProvider, verboseTranscript, recomposer));
        }
예제 #11
0
        private static ExitCodes ProgramExecution()
        {
            using var mitoHeteroplasmyParser = new MitoHeteroplasmyParser(GZipUtilities.GetAppropriateReadStream(_inputFile));
            using var tsvStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, OutFileName));
            using var tsvWriter = new StreamWriter(tsvStream);
            tsvWriter.WriteLine(HeaderLine);
            foreach (var line in mitoHeteroplasmyParser.GetOutputLines())
            {
                tsvWriter.WriteLine(line);
            }

            return(ExitCodes.Success);
        }
예제 #12
0
        private ExitCodes ProgramExecution()
        {
            var sequenceProvider             = ProviderUtilities.GetSequenceProvider(ConfigurationSettings.RefSequencePath);
            var transcriptAnnotationProvider =
                ProviderUtilities.GetTranscriptAnnotationProvider(ConfigurationSettings.InputCachePrefix, sequenceProvider);



            var annotator = ProviderUtilities.GetAnnotator(transcriptAnnotationProvider, sequenceProvider);

            var dataSourceVesions = new List <IDataSourceVersion>();

            dataSourceVesions.AddRange(transcriptAnnotationProvider.DataSourceVersions);


            using (var outputWriter = new StreamWriter(ConfigurationSettings.OutputFileName))
                using (var vcfReader = new VcfReader(GZipUtilities.GetAppropriateReadStream(ConfigurationSettings.VcfPath), sequenceProvider.GetChromosomeDictionary(), null, false))
                {
                    try
                    {
                        if (vcfReader.IsRcrsMitochondrion && annotator.GenomeAssembly == GenomeAssembly.GRCh37 ||
                            annotator.GenomeAssembly == GenomeAssembly.GRCh38 ||
                            ConfigurationSettings.ForceMitochondrialAnnotation)
                        {
                            annotator.EnableMitochondrialAnnotation();
                        }

                        int       previousChromIndex = -1;
                        IPosition position;
                        // var sortedVcfChecker = new SortedVcfChecker();
                        outputWriter.WriteLine(OutHeader);

                        while ((position = vcfReader.GetNextPosition()) != null)
                        {
                            // sortedVcfChecker.CheckVcfOrder(position.Chromosome.UcscName);
                            previousChromIndex = UpdatePerformanceMetrics(previousChromIndex, position.Chromosome);

                            var annotatedPosition = annotator.Annotate(position);
                            WriteAnnotatedPostion(annotatedPosition, outputWriter);
                        }
                    }
                    catch (Exception e)
                    {
                        e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine;
                        throw;
                    }
                }

            return(ExitCodes.Success);
        }
예제 #13
0
        private static ExitCodes ProgramExecution()
        {
            var dosageSensitivityVersion = DataSourceVersionReader.GetSourceVersion(_dosageSensitivityFile + ".version");

            string outFileName = $"{dosageSensitivityVersion.Name.Replace(' ','_')}_{dosageSensitivityVersion.Version}";

            using (var dosageSensitivityParser = new DosageSensitivityParser(GZipUtilities.GetAppropriateReadStream(_dosageSensitivityFile)))
                using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.NgaFileSuffix)))
                    using (var ngaWriter = new NgaWriter(stream, dosageSensitivityVersion, SaCommon.DosageSensitivityTag, SaCommon.SchemaVersion, false))
                    {
                        ngaWriter.Write(dosageSensitivityParser.GetItems());
                    }

            return(ExitCodes.Success);
        }
예제 #14
0
        private static ExitCodes ProgramExecution()
        {
            var    dosageMapRegionVersion = DataSourceVersionReader.GetSourceVersion(_dosageMapRegionFile + ".version");
            string outFileName            = $"{dosageMapRegionVersion.Name.Replace(' ', '_')}_{dosageMapRegionVersion.Version}";
            var    referenceProvider      = new ReferenceSequenceProvider(GZipUtilities.GetAppropriateReadStream(_inputReferencePath));

            using (var dosageSensitivityParser = new DosageMapRegionParser(GZipUtilities.GetAppropriateReadStream(_dosageMapRegionFile), referenceProvider.RefNameToChromosome))
                using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SiFileSuffix)))
                    using (var nsiWriter = new NsiWriter(stream, dosageMapRegionVersion, referenceProvider.Assembly, SaCommon.DosageSensitivityTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))
                    {
                        nsiWriter.Write(dosageSensitivityParser.GetItems());
                    }

            return(ExitCodes.Success);
        }
예제 #15
0
        private static ExitCodes ProgramExecution()
        {
            var dosageSensitivityVersion = DataSourceVersionReader.GetSourceVersion(_diseaseValidityFile + ".version");

            string outFileName = $"{dosageSensitivityVersion.Name.Replace(' ', '_')}_{dosageSensitivityVersion.Version}";

            // read uga file to get hgnc id to gene symbols dictionary
            using (var diseaseValidityParser = new GeneDiseaseValidityParser(GZipUtilities.GetAppropriateReadStream(_diseaseValidityFile), GetHgncIdToGeneSymbols()))
                using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.GeneFileSuffix)))
                    using (var ngaWriter = new NgaWriter(stream, dosageSensitivityVersion, SaCommon.DiseaseValidityTag, SaCommon.SchemaVersion, true))
                    {
                        ngaWriter.Write(diseaseValidityParser.GetItems());
                    }

            return(ExitCodes.Success);
        }
예제 #16
0
        private ExitCodes ProgramExecution()
        {
            var version           = DataSourceVersionReader.GetSourceVersion(_cnvTsv + ".version");
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));

            var cnvStream      = _cnvTsv == null? null: GZipUtilities.GetAppropriateReadStream(_cnvTsv);
            var breakendStream = _breakendTsv == null ? null : GZipUtilities.GetAppropriateReadStream(_breakendTsv);

            using (var cosmicSvExtractor = new CosmicSvReader(cnvStream, breakendStream, version, _outputDir,
                                                              referenceProvider.GenomeAssembly, referenceProvider.RefNameToChromosome))
            {
                cosmicSvExtractor.CreateTsv();
            }

            return(ExitCodes.Success);
        }
예제 #17
0
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName       = $"{version.Name}_{version.Version}";

            using (var primateAiParser = new MitoHeteroplasmyParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                        using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.MitoHeteroplasmyTag, true, false, SaCommon.SchemaVersion, false))
                        {
                            nsaWriter.Write(primateAiParser.GetItems());
                        }

            return(ExitCodes.Success);
        }
예제 #18
0
파일: Main.cs 프로젝트: wangdi2014/Nirvana
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName       = $"{version.Name}_{version.Version}";


            using (var phylopParser = new PhylopParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider.Assembly, referenceProvider.RefNameToChromosome))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.PhylopFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.PhylopFileSuffix + SaCommon.IndexSufix)))
                        using (var writer = new NpdWriter(nsaStream, indexStream, version, referenceProvider.Assembly, SaCommon.PhylopTag, SaCommon.SchemaVersion))
                        {
                            writer.Write(phylopParser.GetItems());
                        }

            return(ExitCodes.Success);
        }
예제 #19
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var globalMinorReader = new GlobalMinorReader(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider.RefNameToChromosome);
            var version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");

            string outFileName = $"{version.Name}_{version.Version}_globalMinor";

            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                    using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.GlobalAlleleTag, true, false, SaCommon.SchemaVersion, true))
                    {
                        nsaWriter.Write(globalMinorReader.GetItems());
                    }

            return(ExitCodes.Success);
        }
예제 #20
0
                        EnsemblIdToSymbol) ParseUniversalGeneArchive()
        {
            var(_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(_inputReferencePath);

            UgaGene[] genes;

            using (var reader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(_universalGeneArchivePath),
                                                  refNameToChromosome))
            {
                genes = reader.GetGenes();
            }

            var entrezGeneIdToSymbol = genes.GetGeneIdToSymbol(x => x.EntrezGeneId);
            var ensemblIdToSymbol    = genes.GetGeneIdToSymbol(x => x.EnsemblId);

            return(entrezGeneIdToSymbol, ensemblIdToSymbol);
        }
예제 #21
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));

            var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");

            string outFileName = $"{version.Name}_{version.Version}".Replace(' ', '_');

            using (var oneKGenReader = new OneKGenReader(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                        using (var writer = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.OneKgenTag, true, false, SaCommon.SchemaVersion, false))
                        {
                            writer.Write(oneKGenReader.GetItems());
                        }

            return(ExitCodes.Success);
        }
예제 #22
0
        public void GetTabixVirtualPosition_AsExpected()
        {
            var annotationConfig = new AnnotationConfig
            {
                vcfUrl          = "anywhere/input.vcf.gz",
                tabixUrl        = Resources.TopPath("Mother_chr22.genome.vcf.gz.tbi"),
                annotationRange = new AnnotationRange(new AnnotationPosition("chr22", 20_000_000),
                                                      new AnnotationPosition("chr22", 30_000_000))
            };

            var tabixStream = FileUtilities.GetReadStream(annotationConfig.tabixUrl);

            var indexReader      = new BinaryReader(GZipUtilities.GetAppropriateReadStream(annotationConfig.tabixUrl));
            var expectedPosition = Reader.Read(indexReader, ChromosomeUtilities.RefNameToChromosome).GetOffset("chr22", annotationConfig.annotationRange.Start.Position);

            var virtualPosition = global::AnnotationLambda.AnnotationLambda.GetTabixVirtualPosition(annotationConfig.annotationRange, tabixStream, ChromosomeUtilities.RefNameToChromosome);

            Assert.Equal(expectedPosition, virtualPosition);
        }
        private static Dictionary <string, GenbankEntry> GetIdToGenbank(GenomeAssembly assembly, Source source)
        {
            if (assembly != GenomeAssembly.GRCh37 || source != Source.RefSeq)
            {
                return(null);
            }

            Logger.Write("- loading the intermediate Genbank file... ");

            Dictionary <string, GenbankEntry> genbankDict;

            using (var reader = new IntermediateIO.GenbankReader(GZipUtilities.GetAppropriateReadStream(ExternalFiles.GenbankFilePath)))
            {
                genbankDict = reader.GetIdToGenbank();
            }

            Logger.WriteLine($"{genbankDict.Count} entries loaded.");
            return(genbankDict);
        }
예제 #24
0
        private static ExitCodes ProgramExecution()
        {
            var    version     = DataSourceVersionReader.GetSourceVersion(_rcvFile + ".version");
            string outFileName = $"{version.Name}_{version.Version}";

            using (var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)))
                using (var clinvarReader = new ClinVarReader(GZipUtilities.GetAppropriateReadStream(_rcvFile), GZipUtilities.GetAppropriateReadStream(_vcvFile), referenceProvider))
                    using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                        using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                            using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.ClinvarTag, false, true, SaCommon.SchemaVersion, false))
                                using (var schemaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.JsonSchemaSuffix)))
                                    using (var schemaWriter = new StreamWriter(schemaStream))
                                    {
                                        nsaWriter.Write(clinvarReader.GetItems());
                                        schemaWriter.Write(clinvarReader.JsonSchema);
                                    }

            return(ExitCodes.Success);
        }
예제 #25
0
        private static ExitCodes ProgramExecution()
        {
            using var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            TranscriptCacheData transcriptData = AaConservationUtilities.GetTranscriptData(referenceProvider.RefIndexToChromosome, _transcriptCachePrefix);// we will use the transcript data to validate the protein sequence

            var    version     = DataSourceVersionReader.GetSourceVersion(_scoresFile + ".version");
            string outFileName = $"{version.Name}_{version.Version}";

            //read multi-alignments
            using (var stream = GZipUtilities.GetAppropriateReadStream(_scoresFile))
                using (var parser = new ProteinConservationParser(stream))
                    using (var outStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + ProteinConservationCommon.FileSuffix)))
                        using (var groupStream = FileUtilities.GetCreateStream("transcriptGroups.txt"))
                            using (var writer = new ProteinConservationWriter(outStream, groupStream, transcriptData, version))
                            {
                                writer.Write(parser.GetItems());
                            }

            return(ExitCodes.Success);
        }
예제 #26
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            TranscriptCacheData transcriptData;

            using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix))))
            {
                transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome);
            }

            var spliceIntervals      = SpliceUtilities.GetSpliceIntervals(referenceProvider, transcriptData);
            var nirEnstToGeneSymbols = SpliceUtilities.GetEnstToGeneSymbols(referenceProvider, transcriptData);

            Dictionary <string, string> spliceAiEnstToGeneSymbols;

            using (var reader = new StreamReader(GZipUtilities.GetAppropriateReadStream(_geneInfoFile)))
            {
                spliceAiEnstToGeneSymbols = SpliceUtilities.GetSpliceAiGeneSymbols(reader);
            }

            var spliceAiToNirvanaGeneSymbols =
                SpliceUtilities.GetSymbolMapping(spliceAiEnstToGeneSymbols, nirEnstToGeneSymbols);

            Console.WriteLine($"Mapped {spliceAiToNirvanaGeneSymbols.Count} spliceAI gene symbols to Nirvana gene symbols (out of {spliceAiEnstToGeneSymbols.Count})");

            var    version     = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName = $"{version.Name}_{version.Version}";

            using (var spliceAiParser = new SpliceAiParser(
                       GZipUtilities.GetAppropriateReadStream(_inputFile),
                       referenceProvider, spliceIntervals, spliceAiToNirvanaGeneSymbols))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                        using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.SpliceAiTag, true, true, SaCommon.SchemaVersion, false))
                        {
                            nsaWriter.Write(spliceAiParser.GetItems());
                        }

            Console.WriteLine($"Total number of entries from Splice AI: {SpliceAiParser.Count}");
            return(ExitCodes.Success);
        }
예제 #27
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            TranscriptCacheData transcriptData;

            using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix))))
            {
                transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome);
            }

            var spliceIntervals   = SpliceUtilities.GetSpliceIntervals(referenceProvider, transcriptData);
            var nirvanaGeneForest = SpliceUtilities.GetGeneForest(transcriptData);

            Console.WriteLine("Loaded transcripts and generated splice intervals.");

            Dictionary <string, List <string> > geneSymbolSynonyms;

            using (var geneInfoParser = new GeneInfoParser(GZipUtilities.GetAppropriateStreamReader(_geneInfoFile)))
            {
                geneSymbolSynonyms = geneInfoParser.GetGeneSymbolSynonyms();
            }

            Console.WriteLine("Loaded gene symbol synonyms");
            var    version     = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName = $"{version.Name}_{version.Version}";

            using (var spliceAiParser = new SpliceAiParser(
                       GZipUtilities.GetAppropriateReadStream(_inputFile),
                       referenceProvider, spliceIntervals, nirvanaGeneForest, geneSymbolSynonyms))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                    {
                        var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.SpliceAiTag, true, true, SaCommon.SchemaVersion, false);
                        nsaWriter.Write(spliceAiParser.GetItems());
                    }

            Console.WriteLine($"Total number of entries from Splice AI: {SpliceAiParser.Count}");
            return(ExitCodes.Success);
        }
예제 #28
0
        private static ExitCodes ProgramExecution()
        {
            var    annotationResources = GetAnnotationResources();
            string jasixFileName       = _outputFileName == "-" ? null : _outputFileName + ".json.gz" + JasixCommons.FileExt;

            using (var inputVcfStream = _vcfPath == "-"  ? Console.OpenStandardInput() : GZipUtilities.GetAppropriateReadStream(_vcfPath))
                using (var outputJsonStream = _outputFileName == "-"  ? Console.OpenStandardOutput() : new BlockGZipStream(FileUtilities.GetCreateStream(_outputFileName + ".json.gz"), CompressionMode.Compress))
                    using (var outputJsonIndexStream = jasixFileName == null ? null : FileUtilities.GetCreateStream(jasixFileName))
                        return(StreamAnnotation.Annotate(null, inputVcfStream, outputJsonStream, outputJsonIndexStream, annotationResources, new NullVcfFilter(), false, _enableDq));
        }