Esempio n. 1
0
        public static CacheFile Create(string ndbPath, ChromosomeRenamer renamer)
        {
            // transcript id
            var cacheFile = TryMatchFilename(ndbPath, TranscriptUpdater.GetMatch, MiniCacheType.Transcript, renamer);

            if (cacheFile != null)
            {
                return(cacheFile);
            }

            // regulatory
            cacheFile = TryMatchFilename(ndbPath, RegulatoryUpdater.GetMatch, MiniCacheType.Regulatory, renamer);
            if (cacheFile != null)
            {
                return(cacheFile);
            }

            // position
            cacheFile = TryMatchFilename(ndbPath, PositionUpdater.GetMatch, MiniCacheType.Position, renamer);
            if (cacheFile != null)
            {
                return(cacheFile);
            }

            // position range
            cacheFile = TryMatchFilename(ndbPath, PositionRangeUpdater.GetMatch, MiniCacheType.PositionRange, renamer);
            if (cacheFile != null)
            {
                return(cacheFile);
            }

            // unknown
            return(new CacheFile(ndbPath, 0, TranscriptDataSource.None, MiniCacheType.Unknown, null));
        }
Esempio n. 2
0
        /// <summary>
        /// returns a ClinVar object given the vcf line
        /// </summary>
        public static DgvItem ExtractDgvItem(string line, ChromosomeRenamer renamer)
        {
            var cols = line.Split('\t');

            if (cols.Length < 8)
            {
                return(null);
            }

            var id         = cols[0];
            var chromosome = cols[1];

            if (!InputFileParserUtilities.IsDesiredChromosome(chromosome, renamer))
            {
                return(null);
            }

            var start          = int.Parse(cols[2]);
            var end            = int.Parse(cols[3]);
            var variantType    = cols[4];
            var variantSubType = cols[5];
            var sampleSize     = int.Parse(cols[14]);
            var observedGains  = cols[15] == "" ? 0:int.Parse(cols[15]);
            var observedLosses = cols[16] == "" ? 0 : int.Parse(cols[16]);

            var seqAltType = SequenceAlterationUtilities.GetSequenceAlteration(variantType, variantSubType);

            return(new DgvItem(id, chromosome, start, end, sampleSize, observedGains, observedLosses, seqAltType));
        }
        public void BeforeInitialization()
        {
            var emptyChromosomeNamer = new ChromosomeRenamer();

            Assert.Throws <InvalidOperationException>(() => emptyChromosomeNamer.GetUcscReferenceName("1"));
            Assert.Throws <InvalidOperationException>(() => emptyChromosomeNamer.GetEnsemblReferenceName("chr1"));
        }
Esempio n. 4
0
 /// <summary>
 /// constructor
 /// </summary>
 private BreakEnd(string referenceName, int position, ChromosomeRenamer renamer)
 {
     _referenceIndex = renamer.GetReferenceIndex(referenceName);
     _referenceName  = _referenceIndex >= renamer.NumRefSeqs ? referenceName : renamer.EnsemblReferenceNames[_referenceIndex];
     Position        = position;
     _renamer        = renamer;
 }
        /// <summary>
        /// executes the program
        /// </summary>
        protected override void ProgramExecution()
        {
            var renamer = ChromosomeRenamer.GetChromosomeRenamer(FileUtilities.GetReadStream(ConfigurationSettings.CompressedReference));
            var customIntervalDbCreator = new CustomIntervalDbCreator(ConfigurationSettings.BedFile, ConfigurationSettings.OutputDirectory, renamer);

            customIntervalDbCreator.Create();
        }
Esempio n. 6
0
        internal static VariantFeature GetNextVariant(LiteVcfReader reader, ChromosomeRenamer renamer,
                                                      bool isGatkGenomeVcf = false)
        {
            var vcfLine = reader.ReadLine();

            return(GetVariant(vcfLine, renamer, isGatkGenomeVcf));
        }
Esempio n. 7
0
 // constructor
 public MergedCosmicReader(string vcfFileName, string tsvFileName, ChromosomeRenamer renamer)
 {
     _vcfFileName = vcfFileName;
     _tsvFileName = tsvFileName;
     _renamer     = renamer;
     _studies     = new Dictionary <string, HashSet <CosmicItem.CosmicStudy> >();
 }
Esempio n. 8
0
 public ReferenceSequence(string s, int offset = 0, ChromosomeRenamer renamer = null)
 {
     _sequence = s;
     _offset   = offset;
     Renamer   = renamer;
     NumBases  = s.Length;
 }
Esempio n. 9
0
        /// <summary>
        /// executes the program
        /// </summary>
        protected override void ProgramExecution()
        {
            var transcriptPath = ConfigurationSettings.InputPrefix + ".transcripts.gz";
            var regulatoryPath = ConfigurationSettings.InputPrefix + ".regulatory.gz";
            var genePath       = ConfigurationSettings.InputPrefix + ".genes.gz";
            var intronPath     = ConfigurationSettings.InputPrefix + ".introns.gz";
            var mirnaPath      = ConfigurationSettings.InputPrefix + ".mirnas.gz";
            var siftPath       = ConfigurationSettings.InputPrefix + ".sift.dat";
            var polyphenPath   = ConfigurationSettings.InputPrefix + ".polyphen.dat";
            var peptidePath    = ConfigurationSettings.InputPrefix + ".peptides.gz";

            var renamer = ChromosomeRenamer.GetChromosomeRenamer(FileUtilities.GetReadStream(ConfigurationSettings.InputReferencePath));

            using (var transcriptReader = new VepTranscriptReader(transcriptPath))
                using (var regulatoryReader = new VepRegulatoryReader(regulatoryPath))
                    using (var geneReader = new VepGeneReader(genePath))
                        using (var mergedGeneReader = new VepCombinedGeneReader(ConfigurationSettings.InputMergedGenesPath))
                            using (var intronReader = new VepSimpleIntervalReader(intronPath, "intron", GlobalImportCommon.FileType.Intron))
                                using (var mirnaReader = new VepSimpleIntervalReader(mirnaPath, "miRNA", GlobalImportCommon.FileType.MicroRna))
                                    using (var peptideReader = new VepSequenceReader(peptidePath, "peptide", GlobalImportCommon.FileType.Peptide))
                                    {
                                        var converter = new NirvanaDatabaseCreator(transcriptReader, regulatoryReader, geneReader,
                                                                                   mergedGeneReader, intronReader, mirnaReader, peptideReader, renamer);

                                        converter.LoadData();
                                        converter.MarkCanonicalTranscripts(ConfigurationSettings.InputLrgPath);
                                        converter.CreateTranscriptCacheFile(ConfigurationSettings.OutputCacheFilePrefix);
                                        converter.CopyPredictionCacheFile("SIFT", siftPath, CacheConstants.SiftPath(ConfigurationSettings.OutputCacheFilePrefix));
                                        converter.CopyPredictionCacheFile("PolyPhen", polyphenPath, CacheConstants.PolyPhenPath(ConfigurationSettings.OutputCacheFilePrefix));
                                    }
        }
Esempio n. 10
0
 /// <summary>
 /// constructor
 /// </summary>
 public MergeSaTests(ChromosomeRenamerFixture fixture)
 {
     _renamer       = fixture.Renamer;
     _sequence      = fixture.Sequence;
     _reader        = fixture.Reader;
     _oneKGenReader = new OneKGenReader(_renamer);
 }
Esempio n. 11
0
        /// <summary>
        /// constructor
        /// </summary>
        public ChromosomeRenamerFixture()
        {
            var referenceStream = ResourceUtilities.GetReadStream(Resources.CacheGRCh37("ENSR00001584270_chr1_Ensembl84_reg.bases"));

            Sequence = new CompressedSequence();
            Reader   = new CompressedSequenceReader(referenceStream, Sequence);
            Renamer  = Sequence.Renamer;
        }
Esempio n. 12
0
        /// <summary>
        /// returns a JSON object given a vcf line (no annotation is performed)
        /// </summary>
        internal static UnifiedJson GetJson(string vcfLine, ChromosomeRenamer renamer)
        {
            var variantFeature = VcfUtilities.GetVariant(vcfLine, renamer);
            var json           = new UnifiedJson(variantFeature);

            json.AddVariantData(variantFeature);
            return(json);
        }
Esempio n. 13
0
 public CustomAnnotationReader(FileInfo customFileInfo, ChromosomeRenamer renamer)
 {
     _customFileInfo = customFileInfo;
     _stringFields   = new Dictionary <string, string>();
     _stringValues   = new Dictionary <string, string>();
     _boolFields     = new Dictionary <string, string>();
     _boolValues     = new List <string>();
     _customItemList = new List <CustomItem>();
     _isPositional   = false;
     _renamer        = renamer;
 }
Esempio n. 14
0
 // constructor
 public CustomIntervalParser(FileInfo customFileInfo, ChromosomeRenamer renamer)
 {
     _type            = null;
     _customFileInfo  = customFileInfo;
     _stringFields    = new List <string>();
     _stringValues    = new Dictionary <string, string>();
     _nonstringFields = new List <string>();
     _nonstringValues = new Dictionary <string, string>();
     _fieldIndex      = new Dictionary <int, string>();
     _renamer         = renamer;
 }
Esempio n. 15
0
        /// <summary>
        /// constructor
        /// </summary>
        public BreakEnd(string referenceName, string referenceName2, int position, int position2, char isSuffix,
                        char isSuffix2, ChromosomeRenamer renamer) : this(referenceName, position, renamer)
        {
            ReferenceIndex2 = renamer.GetReferenceIndex(referenceName2);
            _referenceName2 = ReferenceIndex2 >= renamer.NumRefSeqs ? referenceName2 : renamer.EnsemblReferenceNames[ReferenceIndex2];
            Position2       = position2;
            IsSuffix        = isSuffix;
            IsSuffix2       = isSuffix2;

            _orientation  = IsSuffix == '+' ? '-' : '+';
            _orientation2 = IsSuffix2 == '+' ? '+' : '-';
        }
Esempio n. 16
0
        /// <summary>
        /// constructor
        /// </summary>
        public VariantFeature(VcfVariant variant, ChromosomeRenamer renamer, VID vid)
        {
            _renamer = renamer;
            _vid     = vid;

            AlternateAlleles = new List <VariantAlternateAllele>();
            if (variant.IsGatkGenomeVcf)
            {
                EnableGatkGenomeVcfFix();
            }
            ParseVcfLine(variant.Fields);
        }
Esempio n. 17
0
        public OmimVcfCreator(string inputPrefix, string refSeqPath, string outPath)
        {
            _inputPrefix = inputPrefix;
            _outPath     = outPath;

            _compressedSequence = new CompressedSequence();
            var reader = new CompressedSequenceReader(FileUtilities.GetReadStream(refSeqPath), _compressedSequence);

            _renamer          = _compressedSequence.Renamer;
            _dataFileManager  = new DataFileManager(reader, _compressedSequence);
            _processedGeneSet = new HashSet <string>();
        }
Esempio n. 18
0
 public static bool IsDesiredChromosome(string chromosome, ChromosomeRenamer renamer)
 {
     if (ChromosomeWhiteList == null)
     {
         return(true);
     }
     if (ChromosomeWhiteList.Count == 0)
     {
         return(true);
     }
     return(ChromosomeWhiteList.Contains(renamer.GetEnsemblReferenceName(chromosome)));
 }
Esempio n. 19
0
        /// <summary>
        /// constructor
        /// </summary>
        public SaReadWriteTests(ChromosomeRenamerFixture fixture)
        {
            _renamer  = fixture.Renamer;
            _sequence = fixture.Sequence;
            _reader   = fixture.Reader;

            // create our expected data source versions
            _expectedDataSourceVersion  = new DataSourceVersion("ClinVar", "13.5", DateTime.Parse("2015-01-19").Ticks);
            _expectedDataSourceVersions = new List <DataSourceVersion> {
                _expectedDataSourceVersion
            };

            // create our expected supplementary annotations
            var dbSnp1 = new DbSnpAnnotation
            {
                DbSnp = new List <long> {
                    1
                }
            };

            _expectedAnnotation1 = new SupplementaryAnnotationPosition(100);
            new SupplementaryPositionCreator(_expectedAnnotation1).AddExternalDataToAsa(DataSourceCommon.DataSource.DbSnp, AltAllele, dbSnp1);

            var dbSnp2 = new DbSnpAnnotation
            {
                DbSnp = new List <long> {
                    2
                }
            };

            _expectedAnnotation2 = new SupplementaryAnnotationPosition(101);
            new SupplementaryPositionCreator(_expectedAnnotation2).AddExternalDataToAsa(DataSourceCommon.DataSource.DbSnp, AltAllele, dbSnp2);

            var dbSnp3 = new DbSnpAnnotation
            {
                DbSnp = new List <long> {
                    3
                }
            };

            _expectedAnnotation3 = new SupplementaryAnnotationPosition(102);
            new SupplementaryPositionCreator(_expectedAnnotation3).AddExternalDataToAsa(DataSourceCommon.DataSource.DbSnp, AltAllele, dbSnp3);

            _expectedInterval = new SupplementaryInterval(1, 1000, "chr1", null, VariantType.copy_number_variation, null, _renamer);

            _randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            // write the supplementary annotation file
            WriteSupplementaryAnnotationFile(_randomPath);
        }
Esempio n. 20
0
        /// <summary>
        /// creates a new annotation source with data from the micro-cache file
        /// </summary>
        internal static IAnnotationSource GetAnnotationSource(string cachePath, ISupplementaryAnnotationReader saReader,
                                                              IConservationScoreReader conservationScoreReader          = null,
                                                              ISupplementaryAnnotationProvider customAnnotationProvider = null,
                                                              ISupplementaryAnnotationProvider customIntervalProvider   = null)
        {
            var streams    = GetAnnotationSourceStreams(cachePath);
            var renamer    = ChromosomeRenamer.GetChromosomeRenamer(GetReadStream($"{cachePath}.bases"));
            var saProvider = new MockSupplementaryAnnotationProvider(saReader, renamer);

            PerformanceMetrics.DisableOutput = true;

            return(new NirvanaAnnotationSource(streams, saProvider, conservationScoreReader, customAnnotationProvider,
                                               customIntervalProvider, null));
        }
Esempio n. 21
0
        public CustomIntervalDbCreator(string bedFile, string outputDirectory, ChromosomeRenamer renamer)
        {
            if (bedFile == null)
            {
                return;
            }

            _renamer         = renamer;
            _outputDirectory = outputDirectory;
            _observedRefSeq  = new HashSet <string>();

            _intervalParser = new CustomIntervalParser(new FileInfo(bedFile), renamer);

            _dataVersion = AddSourceVersion(bedFile);
        }
Esempio n. 22
0
        private void ProcessGenomeAssemblyDir(string gaDir)
        {
            var genomeAssembly         = Path.GetFileName(gaDir);
            var compressedSequencePath = GetCompressedSequencePath(_referenceDir, genomeAssembly);
            var renamer               = ChromosomeRenamer.GetChromosomeRenamer(FileUtilities.GetReadStream(compressedSequencePath));
            var cacheFiles            = GetCacheFiles(gaDir, renamer);
            var transcriptDataSources = GetTranscriptDataSources(cacheFiles);

            Console.WriteLine("GenomeAssembly dir: {0}", gaDir);
            foreach (var ds in transcriptDataSources)
            {
                ProcessTranscriptDataSource(cacheFiles, genomeAssembly, ds);
                Console.WriteLine();
            }
        }
Esempio n. 23
0
        private static Tuple <ushort, int, int> GetTuple(string vcfLine, ChromosomeRenamer renamer, int flankingLength = 0)
        {
            var fields = vcfLine.Split('\t');

            if (fields.Length < VcfCommon.MinNumColumns)
            {
                throw new GeneralException($"Expected at least {VcfCommon.MinNumColumns} fields in the vcf string: [{vcfLine}]");
            }

            var vcfVariant = new VcfVariant(fields, vcfLine, false);
            var variant    = new VariantFeature(vcfVariant, renamer, new VID());

            return(new Tuple <ushort, int, int>(variant.ReferenceIndex, variant.VcfReferenceBegin - flankingLength,
                                                variant.VcfReferenceEnd + flankingLength));
        }
Esempio n. 24
0
        /// <summary>
        /// constructor
        /// </summary>
        public PianoAnnotationSource(Stream transcriptCacheStream, CompressedSequenceReader compressedSequenceReader)
        {
            OverlappingTranscripts = new List <Transcript>();
            _performanceMetrics    = PerformanceMetrics.Instance;

            _compressedSequence       = new CompressedSequence();
            _dataFileManager          = new DataFileManager(compressedSequenceReader, _compressedSequence);
            _dataFileManager.Changed += LoadData;

            _renamer    = _compressedSequence.Renamer;
            _aminoAcids = new AminoAcids();
            _vid        = new VID();

            LoadTranscriptCache(transcriptCacheStream, _renamer.NumRefSeqs, out _transcriptIntervalForest);
        }
Esempio n. 25
0
        public override SupplementaryInterval GetSupplementaryInterval(ChromosomeRenamer renamer)
        {
            if (!IsInterval)
            {
                return(null);
            }

            var intValues    = new Dictionary <string, int>();
            var doubleValues = new Dictionary <string, double>();
            var freqValues   = new Dictionary <string, double>();
            var stringValues = new Dictionary <string, string>();
            var boolValues   = new List <string>();
            var stringLists  = new Dictionary <string, IEnumerable <string> >();

            var suppInterval = new SupplementaryInterval(Start, End, Chromosome, null, VariantType,
                                                         "ClinGen", renamer, intValues, doubleValues, freqValues, stringValues, boolValues, stringLists);

            if (Id != null)
            {
                suppInterval.AddStringValue("id", Id);
            }
            if (ClinicalInterpretation != ClinicalInterpretation.unknown)
            {
                suppInterval.AddStringValue("clinicalInterpretation", GetClinicalDescription(ClinicalInterpretation));
            }
            if (Phenotypes != null)
            {
                suppInterval.AddStringList("phenotypes", Phenotypes);
            }
            if (PhenotypeIds != null)
            {
                suppInterval.AddStringList("phenotypeIds", PhenotypeIds);
            }
            if (ObservedGains != 0)
            {
                suppInterval.AddIntValue("observedGains", ObservedGains);
            }
            if (ObservedLosses != 0)
            {
                suppInterval.AddIntValue("observedLosses", ObservedLosses);
            }
            if (Validated)
            {
                suppInterval.AddBoolValue("validated");
            }

            return(suppInterval);
        }
Esempio n. 26
0
        private static List <CacheFile> GetCacheFiles(string gaDir, ChromosomeRenamer renamer)
        {
            var ndbFiles   = Directory.GetFiles(gaDir, "*.ndb");
            var cacheFiles = new List <CacheFile>();

            foreach (var ndbPath in ndbFiles)
            {
                var cacheFile = CacheFile.Create(ndbPath, renamer);
                if (cacheFile.Type == MiniCacheType.Unknown)
                {
                    continue;
                }
                cacheFiles.Add(cacheFile);
            }

            return(cacheFiles);
        }
Esempio n. 27
0
        private static OneKGenItem ExtractOneKGenSvItem(string line, ChromosomeRenamer renamer)
        {
            var cols = line.Split('\t');

            if (cols.Length < 8)
            {
                return(null);
            }

            var id         = cols[0];
            var chromosome = cols[1];

            if (!InputFileParserUtilities.IsDesiredChromosome(chromosome, renamer))
            {
                return(null);
            }

            var start       = int.Parse(cols[2]);
            var end         = int.Parse(cols[3]);
            var variantType = cols[4];

            var observedGains  = int.Parse(cols[6]);
            var observedLosses = int.Parse(cols[7]);

            var allFrequency = cols[8].Equals("0")? null:cols[8];
            var easFrequency = cols[62].Equals("0") ? null : cols[62];
            var eurFrequency = cols[64].Equals("0") ? null : cols[64];
            var afrFrequency = cols[66].Equals("0") ? null : cols[66];
            var amrFrequency = cols[68].Equals("0") ? null : cols[68];
            var sasFrequency = cols[70].Equals("0") ? null : cols[70];

            var allAlleleNumber = int.Parse(cols[5]);
            var easAlleleNumber = int.Parse(cols[61]);
            var eurAlleleNumber = int.Parse(cols[63]);
            var afrAlleleNumber = int.Parse(cols[65]);
            var amrAlleleNumber = int.Parse(cols[67]);
            var sasAlleleNumber = int.Parse(cols[69]);


            //var seqAltType = SequenceAlteration.GetSequenceAlteration(variantType);
            return(new OneKGenItem(chromosome, start, id, null, null, null,
                                   afrFrequency, allFrequency, amrFrequency, easFrequency, eurFrequency, sasFrequency,
                                   null, null, null, null, null, null,
                                   allAlleleNumber, afrAlleleNumber, amrAlleleNumber, eurAlleleNumber, easAlleleNumber, sasAlleleNumber,
                                   variantType, end, null, null, observedGains, observedLosses));
        }
Esempio n. 28
0
        public void AddReferenceNameEnsemblEmpty()
        {
            const string ucscReferenceName = "chr1";

            var emptyChromosomeNamer = new ChromosomeRenamer();
            var referenceMetadata    = new List <ReferenceMetadata>
            {
                new ReferenceMetadata(null, ucscReferenceName, true)
            };

            emptyChromosomeNamer.AddReferenceMetadata(referenceMetadata);

            var observedUcscReferenceName    = emptyChromosomeNamer.GetUcscReferenceName(null);
            var observedEnsemblReferenceName = emptyChromosomeNamer.GetEnsemblReferenceName(ucscReferenceName);

            Assert.Equal(ucscReferenceName, observedEnsemblReferenceName);
            Assert.Null(observedUcscReferenceName);
        }
Esempio n. 29
0
        public void TestClinGenUnifier()
        {
            ClinGenUnifier clinGenUnifier = new ClinGenUnifier(TestClinGenFile);

            clinGenUnifier.Unify();

            string randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            clinGenUnifier.Write(randomPath);

            var testUnifiedClinGenFile = new FileInfo(randomPath);
            ChromosomeRenamer renamer  = null; // need to fix
            var clinGenReader          = new ClinGenReader(testUnifiedClinGenFile, renamer);
            var expectedItems          = CreateTruthClinGenItemSequence();

            Assert.True(clinGenReader.SequenceEqual(expectedItems));

            File.Delete(randomPath);
        }
Esempio n. 30
0
        /// <summary>
        /// converts the FASTA file to a compressed reference file
        /// </summary>
        public void Convert(string inputFastaPath, string inputCytogeneticBandpath, string inputChromosomeNamesPath,
                            string outputCompressedPath, GenomeAssembly genomeAssembly)
        {
            Console.Write("- getting reference metadata... ");
            var referenceMetaDataList = GetReferenceMetadata(inputChromosomeNamesPath);

            Console.WriteLine("{0} references found.", referenceMetaDataList.Count);

            var renamer = new ChromosomeRenamer();

            renamer.AddReferenceMetadata(referenceMetaDataList);

            // pre-allocate the cytogenetic bands
            Console.Write("- getting cytogenetic bands... ");
            var cytogeneticBands = GetCytogeneticBands(inputCytogeneticBandpath, renamer);

            Console.WriteLine("finished.\n");

            // parse the reference
            using (var fastaReader = new FastaReader(inputFastaPath))
            {
                using (var writer = new CompressedSequenceWriter(outputCompressedPath, referenceMetaDataList, cytogeneticBands, genomeAssembly))
                {
                    Console.WriteLine("Converting the following reference sequences:");

                    while (true)
                    {
                        var referenceSequence = fastaReader.GetReferenceSequence();
                        if (referenceSequence == null)
                        {
                            break;
                        }

                        Console.WriteLine("- {0} ({1:n0} bytes)", referenceSequence.Name, referenceSequence.Bases.Length);

                        writer.Write(referenceSequence.Name, referenceSequence.Bases);
                    }
                }
            }

            Console.WriteLine("\nFile size: {0}", new FileInfo(outputCompressedPath).Length);
        }