public void DisableOriginalOnFailedLookup()
        {
            var observedReferenceName = _renamer.GetUcscReferenceName("dummy", false);

            Assert.Equal(null, observedReferenceName);

            observedReferenceName = _renamer.GetEnsemblReferenceName("dummy", false);
            Assert.Equal(null, observedReferenceName);
        }
        public void BeforeInitialization()
        {
            var emptyChromosomeNamer = new ChromosomeRenamer();

            Assert.Throws <InvalidOperationException>(() => emptyChromosomeNamer.GetUcscReferenceName("1"));
            Assert.Throws <InvalidOperationException>(() => emptyChromosomeNamer.GetEnsemblReferenceName("chr1"));
        }
Esempio n. 3
0
        public void GetUnknownDesiredReferenceName()
        {
            const string expectedChromosomeName = "O";

            var observedUcscReferenceName = _renamer.GetUcscReferenceName(expectedChromosomeName);

            Assert.Equal(expectedChromosomeName, observedUcscReferenceName);

            var observedEnsemblReferenceName = _renamer.GetEnsemblReferenceName(expectedChromosomeName);

            Assert.Equal(expectedChromosomeName, observedEnsemblReferenceName);
        }
Esempio n. 4
0
 public static bool IsDesiredChromosome(string chromosome, ChromosomeRenamer renamer)
 {
     if (ChromosomeWhiteList == null)
     {
         return(true);
     }
     if (ChromosomeWhiteList.Count == 0)
     {
         return(true);
     }
     return(ChromosomeWhiteList.Contains(renamer.GetEnsemblReferenceName(chromosome)));
 }
        public void AddReferenceNameEnsemblEmpty()
        {
            const string ucscReferenceName = "chr1";

            var emptyChromosomeNamer = new ChromosomeRenamer();
            var referenceMetadata    = new List <ReferenceMetadata>
            {
                new ReferenceMetadata(null, ucscReferenceName, true)
            };

            emptyChromosomeNamer.AddReferenceMetadata(referenceMetadata);

            var observedUcscReferenceName    = emptyChromosomeNamer.GetUcscReferenceName(null);
            var observedEnsemblReferenceName = emptyChromosomeNamer.GetEnsemblReferenceName(ucscReferenceName);

            Assert.Equal(ucscReferenceName, observedEnsemblReferenceName);
            Assert.Null(observedUcscReferenceName);
        }
Esempio n. 6
0
        private void OpenNewSaWriter()
        {
            var currentEnsemblRefName = _renamer.GetEnsemblReferenceName(_currentRefName);
            var currentUcscRefName    = _renamer.GetUcscReferenceName(_currentRefName);

            if (InputFileParserUtilities.ProcessedReferences.Contains(currentEnsemblRefName))
            {
                throw new Exception($"usorted file, for chromsome {_currentRefName}, SA will be rewritten");
            }

            InputFileParserUtilities.ProcessedReferences.Add(currentEnsemblRefName);
            var saPath = Path.Combine(_nsdBaseFileName, currentUcscRefName + ".nsa");

            _saWriter = new SupplementaryAnnotationWriter(saPath, _currentRefName, _dataSources, _compressedSequence.GenomeAssembly);
            Console.WriteLine("Populating {0} data...", currentUcscRefName);

            _creationBench.Reset();
            _numSaWritten = 0;
        }
Esempio n. 7
0
        private IEnumerable <ClinGenItem> GetClinGenItems()
        {
            using (var reader = GZipUtilities.GetAppropriateStreamReader(_clinGenFileInfo.FullName))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    if (IsClinGenHeader(line))
                    {
                        continue;
                    }

                    var    cols      = line.Split('\t');
                    string id        = cols[0];
                    string ucscChrom = cols[1];
                    string chrom     = _renamer.GetEnsemblReferenceName(ucscChrom);

                    if (!InputFileParserUtilities.IsDesiredChromosome(chrom, _renamer))
                    {
                        continue;
                    }

                    int  start              = int.Parse(cols[2]);
                    int  end                = int.Parse(cols[3]);
                    int  observedGains      = int.Parse(cols[4]);
                    int  observedLosses     = int.Parse(cols[5]);
                    var  variantType        = GetVariantType(cols[6]);
                    var  clinInterpretation = GetClinInterpretation(cols[7]);
                    bool validated          = cols[8].Equals("True");
                    var  phenotypes         = cols[9] == "" ? null : new HashSet <string>(cols[9].Split(','));
                    var  phenotypeIds       = cols[10] == "" ? null : new HashSet <string>(cols[10].Split(','));

                    var currentItem = new ClinGenItem(id, chrom, start, end, variantType, observedGains, observedLosses,
                                                      clinInterpretation, validated, phenotypes, phenotypeIds);
                    yield return(currentItem);
                }
            }
        }
Esempio n. 8
0
        /// <summary>
        /// constructs a VID based on the supplied feature
        /// </summary>
        public string Create(ChromosomeRenamer renamer, string referenceName, VariantAlternateAllele altAllele)
        {
            referenceName = renamer.GetEnsemblReferenceName(referenceName);
            string vid;

            switch (altAllele.NirvanaVariantType)
            {
            case VariantType.SNV:
                vid = $"{referenceName}:{altAllele.Start}:{altAllele.AlternateAllele}";
                break;

            case VariantType.insertion:
                vid = altAllele.IsStructuralVariant
                        ? $"{referenceName}:{altAllele.Start}:{altAllele.End}:INS"
                        : $"{referenceName}:{altAllele.Start}:{altAllele.End}:{GetInsertedAltAllele(altAllele.AlternateAllele)}";
                break;

            case VariantType.deletion:
                vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}";
                break;

            case VariantType.MNV:
            case VariantType.indel:
                vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:{GetInsertedAltAllele(altAllele.AlternateAllele)}";
                break;

            case VariantType.duplication:
                vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:DUP";
                break;

            case VariantType.tandem_duplication:
                vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:TDUP";
                break;

            case VariantType.translocation_breakend:
                vid = altAllele.BreakEnds?[0].ToString();
                break;

            case VariantType.inversion:
                vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:Inverse";
                break;

            case VariantType.mobile_element_insertion:
                vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:MEI";
                break;

            case VariantType.copy_number_gain:
            case VariantType.copy_number_loss:
            case VariantType.copy_number_variation:
                vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:{altAllele.CopyNumber}";
                break;

            case VariantType.reference_no_call:
                vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:NC";
                break;

            default:
                vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}";
                break;
            }

            return(vid);
        }
Esempio n. 9
0
        // constructor
        public CreateSupplementaryDatabase(
            string compressedReferencePath,
            string nsdBaseFileName,
            string dbSnpFileName        = null,
            string cosmicVcfFile        = null,
            string cosmicTsvFile        = null,
            string clinVarFileName      = null,
            string oneKGenomeAfFileName = null,
            string evsFileName          = null,
            string exacFileName         = null,
            List <string> customFiles   = null,
            string dgvFileName          = null,
            string oneKSvFileName       = null,
            string clinGenFileName      = null,
            string chrWhiteList         = null)
        {
            _nsdBaseFileName = nsdBaseFileName;
            _dataSources     = new List <DataSourceVersion>();

            _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >();
            _supplementaryIntervalList  = new List <SupplementaryInterval>();

            Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion);

            _compressedSequence = new CompressedSequence();
            var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence);

            _renamer         = _compressedSequence.Renamer;
            _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence);

            if (!string.IsNullOrEmpty(chrWhiteList))
            {
                Console.WriteLine("Creating SA for the following chromosomes only:");
                foreach (var refSeq in chrWhiteList.Split(','))
                {
                    InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq));
                    Console.Write(refSeq + ",");
                }
                Console.WriteLine();
            }
            else
            {
                InputFileParserUtilities.ChromosomeWhiteList = null;
            }

            if (dbSnpFileName != null)
            {
                AddSourceVersion(dbSnpFileName);

                var dbSnpReader     = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer);
                var dbSnpEnumerator = dbSnpReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dbSnpEnumerator);
            }

            if (cosmicVcfFile != null && cosmicTsvFile != null)
            {
                AddSourceVersion(cosmicVcfFile);

                var cosmicReader     = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer);
                var cosmicEnumerator = cosmicReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(cosmicEnumerator);
            }

            if (oneKGenomeAfFileName != null)
            {
                AddSourceVersion(oneKGenomeAfFileName);

                var oneKGenReader     = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer);
                var oneKGenEnumerator = oneKGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenEnumerator);
            }

            if (oneKSvFileName != null)
            {
                if (oneKGenomeAfFileName == null)
                {
                    AddSourceVersion(oneKSvFileName);
                }

                var oneKGenSvReader     = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer);
                var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenSvEnumerator);
            }

            if (evsFileName != null)
            {
                AddSourceVersion(evsFileName);

                var evsReader     = new EvsReader(new FileInfo(evsFileName), _renamer);
                var evsEnumerator = evsReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(evsEnumerator);
            }

            if (exacFileName != null)
            {
                AddSourceVersion(exacFileName);

                var exacReader     = new ExacReader(new FileInfo(exacFileName), _renamer);
                var exacEnumerator = exacReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(exacEnumerator);
            }

            if (clinVarFileName != null)
            {
                AddSourceVersion(clinVarFileName);

                var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence);

                var clinVarList = clinVarReader.ToList();

                clinVarList.Sort();
                Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file");

                IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinVarEnumerator);
            }

            if (dgvFileName != null)
            {
                AddSourceVersion(dgvFileName);

                var dgvReader     = new DgvReader(new FileInfo(dgvFileName), _renamer);
                var dgvEnumerator = dgvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dgvEnumerator);
            }

            if (clinGenFileName != null)
            {
                AddSourceVersion(clinGenFileName);
                var clinGenReader     = new ClinGenReader(new FileInfo(clinGenFileName), _renamer);
                var clinGenEnumerator = clinGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinGenEnumerator);
            }

            if (customFiles != null)
            {
                foreach (var customFile in customFiles)
                {
                    AddSourceVersion(customFile);

                    var customReader     = new CustomAnnotationReader(new FileInfo(customFile), _renamer);
                    var customEnumerator = customReader.GetEnumerator();
                    _iSupplementaryDataItemList.Add(customEnumerator);
                }
            }

            // initializing the IEnumerators in the list
            foreach (var iDataEnumerator in _iSupplementaryDataItemList)
            {
                if (!iDataEnumerator.MoveNext())
                {
                    _iSupplementaryDataItemList.Remove(iDataEnumerator);
                }
            }

            _additionalItemsList = new List <SupplementaryDataItem>();
        }