public void DisableOriginalOnFailedLookup() { var observedReferenceName = _renamer.GetUcscReferenceName("dummy", false); Assert.Equal(null, observedReferenceName); observedReferenceName = _renamer.GetEnsemblReferenceName("dummy", false); Assert.Equal(null, observedReferenceName); }
public void BeforeInitialization() { var emptyChromosomeNamer = new ChromosomeRenamer(); Assert.Throws <InvalidOperationException>(() => emptyChromosomeNamer.GetUcscReferenceName("1")); Assert.Throws <InvalidOperationException>(() => emptyChromosomeNamer.GetEnsemblReferenceName("chr1")); }
public void GetUnknownDesiredReferenceName() { const string expectedChromosomeName = "O"; var observedUcscReferenceName = _renamer.GetUcscReferenceName(expectedChromosomeName); Assert.Equal(expectedChromosomeName, observedUcscReferenceName); var observedEnsemblReferenceName = _renamer.GetEnsemblReferenceName(expectedChromosomeName); Assert.Equal(expectedChromosomeName, observedEnsemblReferenceName); }
public static bool IsDesiredChromosome(string chromosome, ChromosomeRenamer renamer) { if (ChromosomeWhiteList == null) { return(true); } if (ChromosomeWhiteList.Count == 0) { return(true); } return(ChromosomeWhiteList.Contains(renamer.GetEnsemblReferenceName(chromosome))); }
public void AddReferenceNameEnsemblEmpty() { const string ucscReferenceName = "chr1"; var emptyChromosomeNamer = new ChromosomeRenamer(); var referenceMetadata = new List <ReferenceMetadata> { new ReferenceMetadata(null, ucscReferenceName, true) }; emptyChromosomeNamer.AddReferenceMetadata(referenceMetadata); var observedUcscReferenceName = emptyChromosomeNamer.GetUcscReferenceName(null); var observedEnsemblReferenceName = emptyChromosomeNamer.GetEnsemblReferenceName(ucscReferenceName); Assert.Equal(ucscReferenceName, observedEnsemblReferenceName); Assert.Null(observedUcscReferenceName); }
private void OpenNewSaWriter() { var currentEnsemblRefName = _renamer.GetEnsemblReferenceName(_currentRefName); var currentUcscRefName = _renamer.GetUcscReferenceName(_currentRefName); if (InputFileParserUtilities.ProcessedReferences.Contains(currentEnsemblRefName)) { throw new Exception($"usorted file, for chromsome {_currentRefName}, SA will be rewritten"); } InputFileParserUtilities.ProcessedReferences.Add(currentEnsemblRefName); var saPath = Path.Combine(_nsdBaseFileName, currentUcscRefName + ".nsa"); _saWriter = new SupplementaryAnnotationWriter(saPath, _currentRefName, _dataSources, _compressedSequence.GenomeAssembly); Console.WriteLine("Populating {0} data...", currentUcscRefName); _creationBench.Reset(); _numSaWritten = 0; }
private IEnumerable <ClinGenItem> GetClinGenItems() { using (var reader = GZipUtilities.GetAppropriateStreamReader(_clinGenFileInfo.FullName)) { string line; while ((line = reader.ReadLine()) != null) { if (IsClinGenHeader(line)) { continue; } var cols = line.Split('\t'); string id = cols[0]; string ucscChrom = cols[1]; string chrom = _renamer.GetEnsemblReferenceName(ucscChrom); if (!InputFileParserUtilities.IsDesiredChromosome(chrom, _renamer)) { continue; } int start = int.Parse(cols[2]); int end = int.Parse(cols[3]); int observedGains = int.Parse(cols[4]); int observedLosses = int.Parse(cols[5]); var variantType = GetVariantType(cols[6]); var clinInterpretation = GetClinInterpretation(cols[7]); bool validated = cols[8].Equals("True"); var phenotypes = cols[9] == "" ? null : new HashSet <string>(cols[9].Split(',')); var phenotypeIds = cols[10] == "" ? null : new HashSet <string>(cols[10].Split(',')); var currentItem = new ClinGenItem(id, chrom, start, end, variantType, observedGains, observedLosses, clinInterpretation, validated, phenotypes, phenotypeIds); yield return(currentItem); } } }
/// <summary> /// constructs a VID based on the supplied feature /// </summary> public string Create(ChromosomeRenamer renamer, string referenceName, VariantAlternateAllele altAllele) { referenceName = renamer.GetEnsemblReferenceName(referenceName); string vid; switch (altAllele.NirvanaVariantType) { case VariantType.SNV: vid = $"{referenceName}:{altAllele.Start}:{altAllele.AlternateAllele}"; break; case VariantType.insertion: vid = altAllele.IsStructuralVariant ? $"{referenceName}:{altAllele.Start}:{altAllele.End}:INS" : $"{referenceName}:{altAllele.Start}:{altAllele.End}:{GetInsertedAltAllele(altAllele.AlternateAllele)}"; break; case VariantType.deletion: vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}"; break; case VariantType.MNV: case VariantType.indel: vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:{GetInsertedAltAllele(altAllele.AlternateAllele)}"; break; case VariantType.duplication: vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:DUP"; break; case VariantType.tandem_duplication: vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:TDUP"; break; case VariantType.translocation_breakend: vid = altAllele.BreakEnds?[0].ToString(); break; case VariantType.inversion: vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:Inverse"; break; case VariantType.mobile_element_insertion: vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:MEI"; break; case VariantType.copy_number_gain: case VariantType.copy_number_loss: case VariantType.copy_number_variation: vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:{altAllele.CopyNumber}"; break; case VariantType.reference_no_call: vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}:NC"; break; default: vid = $"{referenceName}:{altAllele.Start}:{altAllele.End}"; break; } return(vid); }
// constructor public CreateSupplementaryDatabase( string compressedReferencePath, string nsdBaseFileName, string dbSnpFileName = null, string cosmicVcfFile = null, string cosmicTsvFile = null, string clinVarFileName = null, string oneKGenomeAfFileName = null, string evsFileName = null, string exacFileName = null, List <string> customFiles = null, string dgvFileName = null, string oneKSvFileName = null, string clinGenFileName = null, string chrWhiteList = null) { _nsdBaseFileName = nsdBaseFileName; _dataSources = new List <DataSourceVersion>(); _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >(); _supplementaryIntervalList = new List <SupplementaryInterval>(); Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion); _compressedSequence = new CompressedSequence(); var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence); _renamer = _compressedSequence.Renamer; _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence); if (!string.IsNullOrEmpty(chrWhiteList)) { Console.WriteLine("Creating SA for the following chromosomes only:"); foreach (var refSeq in chrWhiteList.Split(',')) { InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq)); Console.Write(refSeq + ","); } Console.WriteLine(); } else { InputFileParserUtilities.ChromosomeWhiteList = null; } if (dbSnpFileName != null) { AddSourceVersion(dbSnpFileName); var dbSnpReader = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer); var dbSnpEnumerator = dbSnpReader.GetEnumerator(); _iSupplementaryDataItemList.Add(dbSnpEnumerator); } if (cosmicVcfFile != null && cosmicTsvFile != null) { AddSourceVersion(cosmicVcfFile); var cosmicReader = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer); var cosmicEnumerator = cosmicReader.GetEnumerator(); _iSupplementaryDataItemList.Add(cosmicEnumerator); } if (oneKGenomeAfFileName != null) { AddSourceVersion(oneKGenomeAfFileName); var oneKGenReader = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer); var oneKGenEnumerator = oneKGenReader.GetEnumerator(); _iSupplementaryDataItemList.Add(oneKGenEnumerator); } if (oneKSvFileName != null) { if (oneKGenomeAfFileName == null) { AddSourceVersion(oneKSvFileName); } var oneKGenSvReader = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer); var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator(); _iSupplementaryDataItemList.Add(oneKGenSvEnumerator); } if (evsFileName != null) { AddSourceVersion(evsFileName); var evsReader = new EvsReader(new FileInfo(evsFileName), _renamer); var evsEnumerator = evsReader.GetEnumerator(); _iSupplementaryDataItemList.Add(evsEnumerator); } if (exacFileName != null) { AddSourceVersion(exacFileName); var exacReader = new ExacReader(new FileInfo(exacFileName), _renamer); var exacEnumerator = exacReader.GetEnumerator(); _iSupplementaryDataItemList.Add(exacEnumerator); } if (clinVarFileName != null) { AddSourceVersion(clinVarFileName); var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence); var clinVarList = clinVarReader.ToList(); clinVarList.Sort(); Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file"); IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator(); _iSupplementaryDataItemList.Add(clinVarEnumerator); } if (dgvFileName != null) { AddSourceVersion(dgvFileName); var dgvReader = new DgvReader(new FileInfo(dgvFileName), _renamer); var dgvEnumerator = dgvReader.GetEnumerator(); _iSupplementaryDataItemList.Add(dgvEnumerator); } if (clinGenFileName != null) { AddSourceVersion(clinGenFileName); var clinGenReader = new ClinGenReader(new FileInfo(clinGenFileName), _renamer); var clinGenEnumerator = clinGenReader.GetEnumerator(); _iSupplementaryDataItemList.Add(clinGenEnumerator); } if (customFiles != null) { foreach (var customFile in customFiles) { AddSourceVersion(customFile); var customReader = new CustomAnnotationReader(new FileInfo(customFile), _renamer); var customEnumerator = customReader.GetEnumerator(); _iSupplementaryDataItemList.Add(customEnumerator); } } // initializing the IEnumerators in the list foreach (var iDataEnumerator in _iSupplementaryDataItemList) { if (!iDataEnumerator.MoveNext()) { _iSupplementaryDataItemList.Remove(iDataEnumerator); } } _additionalItemsList = new List <SupplementaryDataItem>(); }