Exemple #1
0
 /// <summary>
 /// constructor
 /// </summary>
 public MergeSaTests(ChromosomeRenamerFixture fixture)
 {
     _renamer       = fixture.Renamer;
     _sequence      = fixture.Sequence;
     _reader        = fixture.Reader;
     _oneKGenReader = new OneKGenReader(_renamer);
 }
Exemple #2
0
 // constructor
 public ClinVarXmlReader(FileInfo clinVarXmlFileInfo, CompressedSequenceReader reader,
                         ICompressedSequence compressedSequence)
 {
     _dataFileManager    = new DataFileManager(reader, compressedSequence);
     _compressedSequence = compressedSequence;
     _aligner            = new VariantAligner(compressedSequence);
     _clinVarXmlFileInfo = clinVarXmlFileInfo;
 }
Exemple #3
0
        public static ChromosomeRenamer GetChromosomeRenamer(Stream stream)
        {
            var sequence = new CompressedSequence();
            // ReSharper disable once UnusedVariable
            var reader = new CompressedSequenceReader(stream, sequence);

            return(sequence.Renamer);
        }
Exemple #4
0
        /// <summary>
        /// constructor
        /// </summary>
        public ChromosomeRenamerFixture()
        {
            var referenceStream = ResourceUtilities.GetReadStream(Resources.CacheGRCh37("ENSR00001584270_chr1_Ensembl84_reg.bases"));

            Sequence = new CompressedSequence();
            Reader   = new CompressedSequenceReader(referenceStream, Sequence);
            Renamer  = Sequence.Renamer;
        }
Exemple #5
0
        private static string GetReferenceBases(ILogger logger, CompressedSequenceReader reader, IChromosomeInterval interval)
        {
            logger.Write("- retrieving reference bases... ");
            reader.GetCompressedSequence(interval.Chromosome);
            string referenceBases = reader.Sequence.Substring(interval.Start, interval.End - interval.Start + 1);

            logger.WriteLine($"{referenceBases.Length} bases extracted.");

            return(referenceBases);
        }
Exemple #6
0
 private DataBundle(CompressedSequenceReader sequenceReader, PredictionCacheReader siftReader,
                    PredictionCacheReader polyPhenReader, VC.TranscriptCacheData cacheData, VC.TranscriptCache transcriptCache,
                    Source source)
 {
     SequenceReader      = sequenceReader;
     TranscriptCacheData = cacheData;
     TranscriptCache     = transcriptCache;
     Source         = source;
     SiftReader     = siftReader;
     PolyPhenReader = polyPhenReader;
 }
Exemple #7
0
        public static ICompressedSequence GetCompressedSequence(string cacheStub, string ensemblRefName)
        {
            var basesStream = ResourceUtilities.GetReadStream($"{cacheStub}.bases");
            var sequence    = new CompressedSequence();

            using (var reader = new CompressedSequenceReader(basesStream, sequence))
            {
                reader.GetCompressedSequence(ensemblRefName);
            }

            return(sequence);
        }
Exemple #8
0
        public OmimVcfCreator(string inputPrefix, string refSeqPath, string outPath)
        {
            _inputPrefix = inputPrefix;
            _outPath     = outPath;

            _compressedSequence = new CompressedSequence();
            var reader = new CompressedSequenceReader(FileUtilities.GetReadStream(refSeqPath), _compressedSequence);

            _renamer          = _compressedSequence.Renamer;
            _dataFileManager  = new DataFileManager(reader, _compressedSequence);
            _processedGeneSet = new HashSet <string>();
        }
Exemple #9
0
        private static void WriteReference(ILogger logger, string outputPath, CompressedSequenceReader reader,
                                           IChromosome chromosome, string referenceBases, int offset)
        {
            logger.Write("- writing reference bases... ");
            var cytogeneticBands = new CytogeneticBands(reader.CytogeneticBands);

            using (var writer = new CompressedSequenceWriter(FileUtilities.GetCreateStream(outputPath),
                                                             reader.ReferenceMetadataList, cytogeneticBands, reader.Assembly))
            {
                writer.Write(chromosome.EnsemblName, referenceBases, offset);
            }
            logger.WriteLine("finished.");
        }
Exemple #10
0
        /// <summary>
        /// constructor
        /// </summary>
        public SaReadWriteTests(ChromosomeRenamerFixture fixture)
        {
            _renamer  = fixture.Renamer;
            _sequence = fixture.Sequence;
            _reader   = fixture.Reader;

            // create our expected data source versions
            _expectedDataSourceVersion  = new DataSourceVersion("ClinVar", "13.5", DateTime.Parse("2015-01-19").Ticks);
            _expectedDataSourceVersions = new List <DataSourceVersion> {
                _expectedDataSourceVersion
            };

            // create our expected supplementary annotations
            var dbSnp1 = new DbSnpAnnotation
            {
                DbSnp = new List <long> {
                    1
                }
            };

            _expectedAnnotation1 = new SupplementaryAnnotationPosition(100);
            new SupplementaryPositionCreator(_expectedAnnotation1).AddExternalDataToAsa(DataSourceCommon.DataSource.DbSnp, AltAllele, dbSnp1);

            var dbSnp2 = new DbSnpAnnotation
            {
                DbSnp = new List <long> {
                    2
                }
            };

            _expectedAnnotation2 = new SupplementaryAnnotationPosition(101);
            new SupplementaryPositionCreator(_expectedAnnotation2).AddExternalDataToAsa(DataSourceCommon.DataSource.DbSnp, AltAllele, dbSnp2);

            var dbSnp3 = new DbSnpAnnotation
            {
                DbSnp = new List <long> {
                    3
                }
            };

            _expectedAnnotation3 = new SupplementaryAnnotationPosition(102);
            new SupplementaryPositionCreator(_expectedAnnotation3).AddExternalDataToAsa(DataSourceCommon.DataSource.DbSnp, AltAllele, dbSnp3);

            _expectedInterval = new SupplementaryInterval(1, 1000, "chr1", null, VariantType.copy_number_variation, null, _renamer);

            _randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // write the supplementary annotation file
            WriteSupplementaryAnnotationFile(_randomPath);
        }
Exemple #11
0
                       refNameToChromosome, int numRefSeqs) GetDictionaries(string referencePath)
        {
            IDictionary <ushort, IChromosome> refIndexToChromosome;
            IDictionary <string, IChromosome> refNameToChromosome;
            int numRefSeqs;

            using (var reader = new CompressedSequenceReader(PersistentStreamUtils.GetReadStream(referencePath)))
            {
                refIndexToChromosome = reader.RefIndexToChromosome;
                refNameToChromosome  = reader.RefNameToChromosome;
                numRefSeqs           = reader.NumRefSeqs;
            }

            return(refIndexToChromosome, refNameToChromosome, numRefSeqs);
        }
Exemple #12
0
        /// <summary>
        /// constructor
        /// </summary>
        public PianoAnnotationSource(Stream transcriptCacheStream, CompressedSequenceReader compressedSequenceReader)
        {
            OverlappingTranscripts = new List <Transcript>();
            _performanceMetrics    = PerformanceMetrics.Instance;

            _compressedSequence       = new CompressedSequence();
            _dataFileManager          = new DataFileManager(compressedSequenceReader, _compressedSequence);
            _dataFileManager.Changed += LoadData;

            _renamer    = _compressedSequence.Renamer;
            _aminoAcids = new AminoAcids();
            _vid        = new VID();

            LoadTranscriptCache(transcriptCacheStream, _renamer.NumRefSeqs, out _transcriptIntervalForest);
        }
Exemple #13
0
        private static string[] GetUcscReferenceNames(string compressedReferencePath)
        {
            string[] refNames;
            var      compressedSequence = new CompressedSequence();

            using (var reader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), compressedSequence))
            {
                refNames = new string[reader.Metadata.Count];
                for (int refIndex = 0; refIndex < reader.Metadata.Count; refIndex++)
                {
                    refNames[refIndex] = reader.Metadata[refIndex].UcscName;
                }
            }

            return(refNames);
        }
Exemple #14
0
        private static PianoAnnotationSource GetAnnotationSource(string resourcePath)
        {
            if (resourcePath == null)
            {
                return(null);
            }

            var ndbPath = $"UnitTests.Resources.{resourcePath}.ndb";
            var refPath = $"UnitTests.Resources.{resourcePath}.bases";

            var ndbStream = ResourceUtilities.GetResourceStream(ndbPath);
            var refStream = ResourceUtilities.GetResourceStream(refPath);

            var compressedSequence       = new CompressedSequence();
            var compressedSequenceReader = new CompressedSequenceReader(refStream, compressedSequence);

            return(new PianoAnnotationSource(ndbStream, compressedSequenceReader));
        }
Exemple #15
0
        public static DataBundle GetDataBundle(string referencePath, string cachePrefix)
        {
            var sequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(referencePath));
            var siftReader     = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(cachePrefix)), PredictionCacheReader.SiftDescriptions);
            var polyPhenReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(cachePrefix)), PredictionCacheReader.PolyphenDescriptions);

            VC.TranscriptCacheData cacheData;
            VC.TranscriptCache     cache;
            Source source;

            using (var transcriptReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(cachePrefix))))
            {
                cacheData = transcriptReader.Read(sequenceReader.RefIndexToChromosome);
                cache     = cacheData.GetCache();
                source    = transcriptReader.Header.Source;
            }

            return(new DataBundle(sequenceReader, siftReader, polyPhenReader, cacheData, cache, source));
        }
Exemple #16
0
        public void GetCompressedSequence()
        {
            using (var reader = new CompressedSequenceReader(
                       ResourceUtilities.GetReadStream(Resources.TopPath("TestSeq_reference.dat"))))
            {
                var sequence   = reader.Sequence;
                var chromosome = new Chromosome("chrBob", "Bob", 0);
                reader.GetCompressedSequence(chromosome);

                Assert.Equal(0, sequence.Length);

                chromosome = new Chromosome("chrTestSeq", "TestSeq", 0);
                reader.GetCompressedSequence(chromosome);

                Assert.NotNull(reader.CytogeneticBands);
                Assert.Equal(GenomeAssembly.GRCh37, reader.Assembly);
                Assert.Equal(53, sequence.Length);
            }
        }
Exemple #17
0
        public ReferenceIndex(string compressedReferencePath)
        {
            var compressedSequence       = new CompressedSequence();
            var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), compressedSequence);

            _renamer = compressedSequence.Renamer;

            var referenceMetadataList = compressedSequenceReader.Metadata;

            _referenceSequenceIndices = new Dictionary <string, ushort>();

            NumReferenceSeqs = (ushort)referenceMetadataList.Count;

            for (ushort refIndex = 0; refIndex < NumReferenceSeqs; refIndex++)
            {
                var refMetadata = referenceMetadataList[refIndex];
                AddReferenceSequence(refMetadata.UcscName, refIndex);
                AddReferenceSequence(refMetadata.EnsemblName, refIndex);
            }
        }
Exemple #18
0
        public void GetCompressedSequence()
        {
            using (var reader = new CompressedSequenceReader(ResourceUtilities.GetReadStream(Resources.TopPath("TestSeq_reference.dat"))))
            {
                Assert.Equal(GenomeAssembly.GRCh37, reader.Assembly);
                var sequence = reader.Sequence;

                var chromosome = new Chromosome("chrBob", "Bob", null, null, 1, 1);
                reader.GetCompressedSequence(chromosome);

                Assert.Null(sequence.CytogeneticBands);
                Assert.Equal(0, sequence.Length);

                chromosome = new Chromosome("chrTestSeq", "TestSeq", null, null, 1, 0);
                reader.GetCompressedSequence(chromosome);
                var bases = sequence.Substring(0, 100);

                Assert.NotNull(sequence.CytogeneticBands);
                Assert.Equal(53, sequence.Length);
                Assert.Equal("NNATGTTTCCACTTTCTCCTCATTAGANNNTAACGAATGGGTGATTTCCCTAN", bases);
            }
        }
Exemple #19
0
 public ReferenceSequenceProvider(Stream stream)
 {
     _currentChromosome = new EmptyChromosome(string.Empty);
     _sequenceReader    = new CompressedSequenceReader(stream);
     _cytogeneticBands  = new CytogeneticBands(_sequenceReader.CytogeneticBands);
 }
 /// <summary>
 /// constructor
 /// </summary>
 public ClinVarXmlReaderTests(ChromosomeRenamerFixture fixture)
 {
     _sequence = fixture.Sequence;
     _reader   = fixture.Reader;
 }
Exemple #21
0
        // constructor
        public CreateSupplementaryDatabase(
            string compressedReferencePath,
            string nsdBaseFileName,
            string dbSnpFileName        = null,
            string cosmicVcfFile        = null,
            string cosmicTsvFile        = null,
            string clinVarFileName      = null,
            string oneKGenomeAfFileName = null,
            string evsFileName          = null,
            string exacFileName         = null,
            List <string> customFiles   = null,
            string dgvFileName          = null,
            string oneKSvFileName       = null,
            string clinGenFileName      = null,
            string chrWhiteList         = null)
        {
            _nsdBaseFileName = nsdBaseFileName;
            _dataSources     = new List <DataSourceVersion>();

            _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >();
            _supplementaryIntervalList  = new List <SupplementaryInterval>();

            Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion);

            _compressedSequence = new CompressedSequence();
            var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence);

            _renamer         = _compressedSequence.Renamer;
            _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence);

            if (!string.IsNullOrEmpty(chrWhiteList))
            {
                Console.WriteLine("Creating SA for the following chromosomes only:");
                foreach (var refSeq in chrWhiteList.Split(','))
                {
                    InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq));
                    Console.Write(refSeq + ",");
                }
                Console.WriteLine();
            }
            else
            {
                InputFileParserUtilities.ChromosomeWhiteList = null;
            }

            if (dbSnpFileName != null)
            {
                AddSourceVersion(dbSnpFileName);

                var dbSnpReader     = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer);
                var dbSnpEnumerator = dbSnpReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dbSnpEnumerator);
            }

            if (cosmicVcfFile != null && cosmicTsvFile != null)
            {
                AddSourceVersion(cosmicVcfFile);

                var cosmicReader     = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer);
                var cosmicEnumerator = cosmicReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(cosmicEnumerator);
            }

            if (oneKGenomeAfFileName != null)
            {
                AddSourceVersion(oneKGenomeAfFileName);

                var oneKGenReader     = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer);
                var oneKGenEnumerator = oneKGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenEnumerator);
            }

            if (oneKSvFileName != null)
            {
                if (oneKGenomeAfFileName == null)
                {
                    AddSourceVersion(oneKSvFileName);
                }

                var oneKGenSvReader     = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer);
                var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenSvEnumerator);
            }

            if (evsFileName != null)
            {
                AddSourceVersion(evsFileName);

                var evsReader     = new EvsReader(new FileInfo(evsFileName), _renamer);
                var evsEnumerator = evsReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(evsEnumerator);
            }

            if (exacFileName != null)
            {
                AddSourceVersion(exacFileName);

                var exacReader     = new ExacReader(new FileInfo(exacFileName), _renamer);
                var exacEnumerator = exacReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(exacEnumerator);
            }

            if (clinVarFileName != null)
            {
                AddSourceVersion(clinVarFileName);

                var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence);

                var clinVarList = clinVarReader.ToList();

                clinVarList.Sort();
                Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file");

                IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinVarEnumerator);
            }

            if (dgvFileName != null)
            {
                AddSourceVersion(dgvFileName);

                var dgvReader     = new DgvReader(new FileInfo(dgvFileName), _renamer);
                var dgvEnumerator = dgvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dgvEnumerator);
            }

            if (clinGenFileName != null)
            {
                AddSourceVersion(clinGenFileName);
                var clinGenReader     = new ClinGenReader(new FileInfo(clinGenFileName), _renamer);
                var clinGenEnumerator = clinGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinGenEnumerator);
            }

            if (customFiles != null)
            {
                foreach (var customFile in customFiles)
                {
                    AddSourceVersion(customFile);

                    var customReader     = new CustomAnnotationReader(new FileInfo(customFile), _renamer);
                    var customEnumerator = customReader.GetEnumerator();
                    _iSupplementaryDataItemList.Add(customEnumerator);
                }
            }

            // initializing the IEnumerators in the list
            foreach (var iDataEnumerator in _iSupplementaryDataItemList)
            {
                if (!iDataEnumerator.MoveNext())
                {
                    _iSupplementaryDataItemList.Remove(iDataEnumerator);
                }
            }

            _additionalItemsList = new List <SupplementaryDataItem>();
        }
Exemple #22
0
 public ReferenceSequenceProvider(Stream stream)
 {
     _sequenceReader = new CompressedSequenceReader(stream);
     Sequence        = _sequenceReader.Sequence;
 }
Exemple #23
0
 /// <summary>
 /// constructor
 /// </summary>
 public DataFileManager(CompressedSequenceReader reader, ICompressedSequence compressedSequence)
 {
     _compressedSequence       = compressedSequence;
     _compressedSequenceReader = reader;
 }
Exemple #24
0
        protected override void ProgramExecution()
        {
            var    processedReferences = new HashSet <string>();
            string previousReference   = null;

            Console.WriteLine("Running Nirvana on {0}:", Path.GetFileName(ConfigurationSettings.VcfPath));

            var outputFilePath         = ConfigurationSettings.OutputFileName + ".txt.gz";
            var annotationCreationTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");
            var reader = new LiteVcfReader(ConfigurationSettings.VcfPath);

            var compressedSequence       = new CompressedSequence();
            var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(ConfigurationSettings.CompressedReferencePath), compressedSequence);
            var transcriptCacheStream    = new FileStream(CacheConstants.TranscriptPath(ConfigurationSettings.InputCachePrefix),
                                                          FileMode.Open, FileAccess.Read, FileShare.Read);

            var annotator = new PianoAnnotationSource(transcriptCacheStream, compressedSequenceReader);

            if (ConfigurationSettings.ForceMitochondrialAnnotation || reader.IsRcrsMitochondrion)
            {
                annotator.EnableMitochondrialAnnotation();
            }

            // sanity check: make sure we have annotations
            if (annotator == null)
            {
                throw new GeneralException("Unable to perform annotation because no annotation sources could be created");
            }

            using (var writer = GZipUtilities.GetStreamWriter(outputFilePath))
            {
                WriteHeader(writer, annotationCreationTime);
                string vcfLine = null;

                try
                {
                    while ((vcfLine = reader.ReadLine()) != null)

                    {
                        var vcfVariant = CreateVcfVariant(vcfLine, reader.IsGatkGenomeVcf);

                        // check if the vcf is sorted
                        if (vcfVariant == null)
                        {
                            continue;
                        }

                        var currentReference = vcfVariant.ReferenceName;
                        if (currentReference != previousReference && processedReferences.Contains(currentReference))
                        {
                            throw new FileNotSortedException(
                                      "The current input vcf file is not sorted. Please sort the vcf file before running variant annotation using a tool like vcf-sort in vcftools.");
                        }
                        if (!processedReferences.Contains(currentReference))
                        {
                            processedReferences.Add(currentReference);
                        }
                        previousReference = currentReference;

                        var annotatedVariant = annotator.Annotate(vcfVariant);

                        writer.Write(annotatedVariant.ToString());
                    }
                }
                catch (Exception e)
                {
                    // embed the vcf line
                    e.Data["VcfLine"] = vcfLine;
                    throw;
                }
            }
        }
        private static ExitCodes ProgramExecution()
        {
            var transcriptSource = GetSource(_transcriptSource);
            var sequenceReader   = new CompressedSequenceReader(FileUtilities.GetReadStream(_inputReferencePath));
            var vepRootDirectory = new VepRootDirectory(sequenceReader.RefNameToChromosome);
            var refIndexToVepDir = vepRootDirectory.GetRefIndexToVepDir(_inputVepDirectory);

            var  genomeAssembly  = GenomeAssemblyHelper.Convert(_genomeAssembly);
            long vepReleaseTicks = DateTime.Parse(_vepReleaseDate).Ticks;
            var  idToGenbank     = GetIdToGenbank(genomeAssembly, transcriptSource);

            // =========================
            // create the pre-cache file
            // =========================

            // process each VEP directory
            int numRefSeqs = sequenceReader.NumRefSeqs;
            var header     = new IntermediateIoHeader(_vepVersion, vepReleaseTicks, transcriptSource, genomeAssembly, numRefSeqs);

            string siftPath       = _outputStub + ".sift.gz";
            string polyphenPath   = _outputStub + ".polyphen.gz";
            string transcriptPath = _outputStub + ".transcripts.gz";
            string regulatoryPath = _outputStub + ".regulatory.gz";

            using (var mergeLogger = new TranscriptMergerLogger(FileUtilities.GetCreateStream(_outputStub + ".merge_transcripts.log")))
                using (var siftWriter = new PredictionWriter(GZipUtilities.GetStreamWriter(siftPath), header, IntermediateIoCommon.FileType.Sift))
                    using (var polyphenWriter = new PredictionWriter(GZipUtilities.GetStreamWriter(polyphenPath), header, IntermediateIoCommon.FileType.Polyphen))
                        using (var transcriptWriter = new MutableTranscriptWriter(GZipUtilities.GetStreamWriter(transcriptPath), header))
                            using (var regulatoryRegionWriter = new RegulatoryRegionWriter(GZipUtilities.GetStreamWriter(regulatoryPath), header))
                            {
                                var converter           = new VepCacheParser(transcriptSource);
                                var emptyPredictionDict = new Dictionary <string, List <int> >();

                                for (ushort refIndex = 0; refIndex < numRefSeqs; refIndex++)
                                {
                                    var chromosome = sequenceReader.RefIndexToChromosome[refIndex];

                                    if (!refIndexToVepDir.TryGetValue(refIndex, out string vepSubDir))
                                    {
                                        siftWriter.Write(chromosome, emptyPredictionDict);
                                        polyphenWriter.Write(chromosome, emptyPredictionDict);
                                        continue;
                                    }

                                    Console.WriteLine("Parsing reference sequence [{0}]:", chromosome.UcscName);

                                    var rawData                 = converter.ParseDumpDirectory(chromosome, vepSubDir);
                                    var mergedTranscripts       = TranscriptMerger.Merge(mergeLogger, rawData.Transcripts, idToGenbank);
                                    var mergedRegulatoryRegions = RegulatoryRegionMerger.Merge(rawData.RegulatoryRegions);

                                    int numRawTranscripts    = rawData.Transcripts.Count;
                                    int numMergedTranscripts = mergedTranscripts.Count;
                                    Console.WriteLine($"- # merged transcripts: {numMergedTranscripts}, # total transcripts: {numRawTranscripts}");

                                    WriteTranscripts(transcriptWriter, mergedTranscripts);
                                    WriteRegulatoryRegions(regulatoryRegionWriter, mergedRegulatoryRegions);
                                    WritePredictions(siftWriter, mergedTranscripts, x => x.SiftData, chromosome);
                                    WritePredictions(polyphenWriter, mergedTranscripts, x => x.PolyphenData, chromosome);
                                }
                            }

            Console.WriteLine("\n{0} directories processed.", refIndexToVepDir.Count);

            return(ExitCodes.Success);
        }
Exemple #26
0
        public static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                Console.WriteLine("Usage: ClinVarWebVarifier [clinvar xml] [compressed ref]");
                return;
            }
            int passCount = 0, failCount = 0;
            var compressedSequence       = new CompressedSequence();
            var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(args[1]), compressedSequence);


            const string clinVarWebBase = "http://www.ncbi.nlm.nih.gov/clinvar/";

            var clinVarXmlReader = new ClinVarXmlReader(new FileInfo(args[0]), compressedSequenceReader,
                                                        compressedSequence);

            Console.WriteLine("Acc\tField\tWeb\tXML");

            //the resume point. We will skip entries before this.
            var  restartAfter       = "RCV000186558";
            int  restartCount       = 0;
            bool resumeVerificaiton = false;

            foreach (var clinVarItem in clinVarXmlReader)
            {
                var rcv = clinVarItem.ID.Split('.')[0];

                if (!resumeVerificaiton)
                {
                    if (restartAfter == rcv)
                    {
                        resumeVerificaiton = true;
                        Console.WriteLine($"Restarting at {rcv}. Skipped {restartCount} entries.");
                    }
                    restartCount++;
                    continue;
                }

                // .NET Core doesn't support WebClient, we should change to HttpClient
                //using (var client = new WebClient())
                //{
                //    string xmlString;
                //    try
                //    {
                //        xmlString = client.DownloadString(clinVarWebBase + rcv);
                //    }
                //    catch (Exception e)
                //    {
                //        Console.WriteLine(e.ToString());
                //        Console.WriteLine($"no of entries passed {passCount}. Failed {failCount}");
                //        Task.Delay(11000);//wait 11 seconds
                //        continue;//we resume from the next item
                //    }


                //    if (!HasSameVersions(clinVarItem.ID, xmlString)) continue;

                //    if (!CheckPubmedIds(xmlString, clinVarItem))
                //        failCount++;


                //    if (!CheckDiseaseDbIds(xmlString, @"<a href=""https://www.ncbi.nlm.nih.gov/medgen/([A-Za-z0-9]+)", clinVarItem.MedGenIDs.First(), rcv, "MedGen"))
                //        failCount++;

                //    if (!CheckDiseaseDbIds(xmlString, @"Orphanet"">(\d+)", clinVarItem.OrphanetIDs.First(), rcv, "Orphanet"))
                //        failCount++;

                //    //if (!CheckDiseaseDbIds(xmlString, @"<a href=""http://www.omim.org/entry/(\d+)", clinVarItem.OmimID))
                //    //{
                //    //	Console.WriteLine("Missing omim ids for :" + rcv);
                //    //	break;
                //    //}

                //    passCount++;
                //}
            }
            Console.WriteLine($"no of entries passed {passCount}. Failed {failCount}");
        }