public void MergeDbSnpClinVar() { const string vcfLine = "1 225592188 rs387906416 TAGAAGA CTTCTAG . . RS=387906416;RSPOS=225592188;RV;dbSNPBuildID=137;SSR=0;SAO=1;VP=0x050060000605000002110800;GENEINFO=LBR:3930;WGT=1;VC=MNV;PM;NSN;REF;ASP;LSD;OM"; var dbsnpReader = new DbSnpReader(_renamer); var dbSnpItems = dbsnpReader.ExtractItem(vcfLine); var sa = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(225592188)); foreach (var dbSnpItem in dbSnpItems) { dbSnpItem.SetSupplementaryAnnotations(sa); } var xmlReader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000087262.xml")), _reader, _sequence); foreach (var clinVarItem in xmlReader) { var sa1 = new SupplementaryPositionCreator(new SupplementaryAnnotationPosition(225592188)); clinVarItem.SetSupplementaryAnnotations(sa1); sa.MergeSaCreator(sa1); } Assert.Equal(1, sa.SaPosition.ClinVarItems.Count); foreach (var clinVarEntry in sa.SaPosition.ClinVarItems) { Assert.Equal(clinVarEntry.ID, "RCV000087262.3"); Assert.Equal(clinVarEntry.MedGenIDs.First(), "C0030779"); Assert.Equal(clinVarEntry.Phenotypes.First(), "Pelger-Huët anomaly"); } }
public void Skip_entries_with_inconsistant_start_end() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chr9", "9", 1), 132903739, "AAACGCTCATAGAGTAACTGGTTGTGCAGTAAAAGCAACTGGTCTCAAACGCTCATAGAGTAACTGGTTGTGCAGTAAAAGCAACTGGTCTC"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000342164.xml"), sequenceProvider); Assert.False(reader.GetItems().Any()); }
public void MultiVariantEntry() { var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000007484.xml")), _reader, _sequence); foreach (var clinVarItem in reader) { switch (clinVarItem.Start) { case 8045031: Assert.Equal("G", clinVarItem.ReferenceAllele); Assert.Equal("A", clinVarItem.AltAllele); break; case 8021911: Assert.Equal("GTGCTGGACGGTGTCCCT", clinVarItem.AltAllele); var sa = new SupplementaryAnnotationPosition(clinVarItem.Start); var saCreator = new SupplementaryPositionCreator(sa); clinVarItem.SetSupplementaryAnnotations(saCreator); Assert.Equal("iGTGCTGGACGGTGTCCCT", clinVarItem.SaAltAllele); break; default: throw new InvalidDataException($"Unexpected clinvar item start point : {clinVarItem.Start}"); } } }
public void BasicReadTest() { var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000077146.xml")), _reader, _sequence); foreach (var clinVarItem in reader) { Assert.Equal("RCV000077146.3", clinVarItem.ID); switch (clinVarItem.ID) { case "RCV000077146.3": Assert.Equal("17", clinVarItem.Chromosome); Assert.Equal(41234419, clinVarItem.Start); Assert.Equal("A", clinVarItem.ReferenceAllele); Assert.Equal("C", clinVarItem.AltAllele); Assert.Equal(ClinVarXmlReader.ParseDate("2016-07-31"), clinVarItem.LastUpdatedDate); Assert.True(clinVarItem.AlleleOrigins.SequenceEqual(new List <string> { "germline" })); Assert.Equal("C2676676", clinVarItem.MedGenIDs.First()); Assert.Equal("145", clinVarItem.OrphanetIDs.First()); Assert.Equal("604370", clinVarItem.OmimIDs.First()); Assert.Equal("Breast-ovarian cancer, familial 1", clinVarItem.Phenotypes.First()); Assert.Null(clinVarItem.PubmedIds); break; } } }
public void BasicReadTest() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chr17", "17", 16), 41234419, "A"); var reader = new ClinVarXmlReader(new FileInfo(Resources.ClinvarXmlFiles("RCV000077146.xml")), sequenceProvider); Assert.True(reader.GetItems().Any()); foreach (var clinVarItem in reader.GetItems()) { Assert.Equal("RCV000077146.3", clinVarItem.Id); switch (clinVarItem.Id) { case "RCV000077146.3": Assert.Equal("17", clinVarItem.Chromosome.EnsemblName); Assert.Equal(41234419, clinVarItem.Start); Assert.Equal("A", clinVarItem.ReferenceAllele); Assert.Equal("C", clinVarItem.AlternateAllele); Assert.Equal(ClinVarXmlReader.ParseDate("2016-07-31"), clinVarItem.LastUpdatedDate); Assert.True(clinVarItem.AlleleOrigins.SequenceEqual(new List <string> { "germline" })); Assert.Equal("C2676676", clinVarItem.MedGenIDs.First()); Assert.Equal("145", clinVarItem.OrphanetIDs.First()); Assert.Equal("604370", clinVarItem.OmimIDs.First()); Assert.Equal("Breast-ovarian cancer, familial 1", clinVarItem.Phenotypes.First()); Assert.Null(clinVarItem.PubmedIds); break; } } }
public void SkipMicrosatellitesWithoutAltAllele() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chr22", "22", 1), 46191240, "ATTCT"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000001054.xml"), sequenceProvider); Assert.False(reader.GetItems().Any()); }
public void EmptyRefAndAlt() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chr2", "2", 3), 31805881, "G"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000083638.xml"), sequenceProvider); Assert.False(reader.GetItems().Any()); }
public void RemoveDuplicationWithWrongRefSequence() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chr3", "3", 1), 10183702, "GCGGCCGCGGCCCG"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000267121.xml"), sequenceProvider); Assert.False(reader.GetItems().Any()); }
public void Discard_entries_with_unknown_variant_type() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chrX", "X", 0), 66765160, "CAG"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000485802.xml"), sequenceProvider); Assert.False(reader.GetItems().Any()); }
public void SkipMicrosattelite() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chr16", "16", 15), 87637894, "CTG"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000005426.xml"), sequenceProvider); Assert.False(reader.GetItems().Any()); }
public void RCV000435546_NotMissing() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chr12", "12", 11), 110221557, "CGCGG"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000435546.xml"), sequenceProvider); var clinVarItems = reader.GetItems(); Assert.True(clinVarItems.Any()); }
public void MissingClinvarInsertionShift2() { var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000017510.xml")), _reader, _sequence); foreach (var clinVarItem in reader) { Assert.Equal(9324413, clinVarItem.Start); } }
public void NoClinVarItem_due_to_ref_mismatch() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chr10", "10", 10), 90982267, "A"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000000101.xml"), sequenceProvider); Assert.False(reader.GetItems().Any()); }
public void PubmedTest5() { var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000000734.xml")), _reader, _sequence); foreach (var clinVarItem in reader) { Assert.Null(clinVarItem.PubmedIds); } }
public void ClinvarInsertion() { var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000153339.xml")), _reader, _sequence); foreach (var clinVarItem in reader) { Assert.Equal(122318387, clinVarItem.Start); } }
public void OmitOmimFromAltPhenotypes() { var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000030349.xml")), _reader, _sequence); foreach (var clinVarItem in reader) { Assert.Equal(1, clinVarItem.OmimIDs.Count()); } }
public void AlternatePhenotype() { var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000032707.xml")), _reader, _sequence); foreach (var clinVarItem in reader) { Assert.NotNull(clinVarItem.Phenotypes); } }
public void SkipMicrosattelite() { var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000005426.xml")), _reader, _sequence); var clinvarItems = reader.GetEnumerator(); Assert.Null(clinvarItems.Current); clinvarItems.Dispose(); }
public void EmptyRefAndAlt() { var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000083638.xml")), _reader, _sequence); var clinvarItems = reader.GetEnumerator(); Assert.Null(clinvarItems.Current); clinvarItems.Dispose(); }
public void Multiple_significance_from_explanation() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh38, new Chromosome("chr19", "19", 19), 12665750, "T"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000001752.xml"), sequenceProvider); var clinvarItems = reader.GetItems().ToList(); Assert.Equal(new[] { "pathogenic", "uncertain significance" }, clinvarItems[0].Significances); }
public void Mising_entry() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chr13", "13", 12), 36888396, "C"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000171474.xml"), sequenceProvider); var clinvarItems = reader.GetItems().ToList(); Assert.Equal("", clinvarItems[0].RefAllele); }
public void Multiple_significance() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh38, new Chromosome("chr15", "15", 15), 72349076, "T"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000169296.xml"), sequenceProvider); var clinvarItems = reader.GetItems().ToList(); Assert.Equal(new[] { "pathogenic", "likely pathogenic" }, clinvarItems[0].Significances); }
public void MissingClinvarInsertion2() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh38, new Chromosome("chr9", "9", 1), 132903739, "AAACGCTCATAGAGTAACTGGTTGTGCAGTAAAAGCAACTGGTCTCAAACGCTCATAGAGTAACTGGTTGTGCAGTAAAAGCAACTGGTCTC"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000342164.xml"), sequenceProvider); var clinvarItems = reader.GetItems().ToList(); Assert.Single(clinvarItems); }
public void Alternate_phenotypes() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chr2", "2", 1), 204732740, "G"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000537563.xml"), sequenceProvider); var clinvarItems = reader.GetItems().ToList(); Assert.Single(clinvarItems[0].Phenotypes); }
public void NonEnglishChars() { //NIR-900 var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000087262.xml")), _reader, _sequence); foreach (var clinVarItem in reader) { Assert.Equal("Pelger-Huët anomaly", clinVarItem.Phenotypes.First()); } }
public void MultipleEntryRecordVariant2() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chr1", "1", 1), 8021910, "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000007484.xml"), sequenceProvider); var clinvarItems = reader.GetItems().ToList(); Assert.Single(clinvarItems); }
public void MissingAltAllele() { var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000120902.xml")), _reader, _sequence); foreach (var clinVarItem in reader) { Assert.Equal("C", clinVarItem.ReferenceAllele); Assert.Equal("G", clinVarItem.AltAllele); } }
public void NonEnglishChars() { var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, new Chromosome("chr1", "1", 0), 225592188, "TAGAAGA"); var reader = new ClinVarXmlReader(Resources.ClinvarXmlFiles("RCV000087262.xml"), sequenceProvider); Assert.True(reader.GetItems().Any()); foreach (var clinVarItem in reader.GetItems()) { Assert.Equal("Pelger-Huët anomaly", clinVarItem.Phenotypes.First()); } }
public void PubmedTest4() { var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000021819.xml")), _reader, _sequence); foreach (var clinVarItem in reader) { Assert.True(clinVarItem.PubmedIds.SequenceEqual(new List <long> { 8099202 })); } }
public void Remove9DigitsPubmedId() { var reader = new ClinVarXmlReader(new FileInfo(Resources.TopPath("RCV000207504.xml")), _reader, _sequence); foreach (var clinVarItem in reader) { Assert.True(clinVarItem.PubmedIds.SequenceEqual(new List <long> { 16329078, 16372351, 19213030, 21438134, 25741868 })); } }