public void With_rnaEdits_reverse_deletion_utr() { //NM_001317107.1 chr14:22138125-22139232 var genomicSeq = new SimpleSequence("ATATGGTATGTAACTTATTCTTTGCAAGGCGCTTCTTTAATTTGGAGCACCACGTATCCTAAGGACGTAGACATTTTCATTTTTCTTCTTTTCTCTCTTTTCTCCCCACTAACTTGTTTAAGGCACTCTTCATTTCTTCATTCCTAAGGGTATAGATAATGGGGTTCAGCAGGGGGGTGACTGCAGTGAAAAACACAGATACTGCCTTGTCCTCTGGGAGGCTGGTGGATGGGCGGGAATAGATGAAGATGCAGTGTCCCAGGAACAGTGTAACTACAGTGAGATGGGCTGCACAGGTGGACAGGGCCTTCCACTTGCCCTTGGAGATCTGCTGCCTCAGACTCACCAGGATGACTGCGTAGGACACCACCAGGACCACAAAACAGACCACGGAGATCAATCCACTGTTGGAGACAATGAGGATCTCAAGGACGTGGGTGTGTCAATGCAGGCCAGCTTGATCACCTGAGGTACATCACAGAAGAAGTTGTCAATCTCATCAGGACCACAGTAGGGCAGCTTGATGGTAAGGGAGGTGAGGGCTATGGAGTGGATGGTCCCTCCTGTCCAGAGGGCCACAGCCAGCAGCACACATACCTTCCAGTTCATCACTATCATGTACTGCAGGGGTTTACAGATGGCCACATACCGATCATAGGCCATGACGGTGAGGAGGAAGATCTCTGTGCAGGCAAAGAGGTGCAGGAAGAACATCTGGGTCACACAGGCATCAAAAGAGATGAGCTTTTCCTCTGACCACACGTCTCTCAGCATCTTGGGGACAGTGACAGTGGAGTGGCAGACATCAATAAAGGACAGGTTGCTGAGGAAGAAATACATGGGAGTATGGAGCCGGTGGTCATAGATAATAGTTATGACAATGAGAACATTCCCAATCAGTGTCAGGACATAAAAAATGAGGAACATGGAAAACATAGCTATCCGTGCCTTATGATTTACAGATAAACCTCTAAGCCGAAAATATGTCACTAAAGAAGTTTGATTGAGTAGGATGGCCTCTTCCATTCTCTTTGTTAGACAACCTGTAAAGAATTAGAAAAAAAGTCTAATATAACACAGTATCTGCATCAATCATTTGGTCATTTAA", 22138125 - 1); var codingRegion = new CodingRegion(22138201, 22139150, 83, 1030, 948); var regions = new ITranscriptRegion[] { new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138125, 22138561, 670, 1106), new TranscriptRegion(TranscriptRegionType.Gap, 1, 22138562, 22138563, 669, 670), new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138564, 22139232, 1, 669) }; var rnaEdits = new IRnaEdit[] { new RnaEdit(905, 905, "T"), new RnaEdit(796, 796, "C"), new RnaEdit(679, 679, "A"), new RnaEdit(670, 671, "") }; const byte startExonPhase = 0; var codingSequence = new CdnaSequence(genomicSeq, codingRegion, regions, true, rnaEdits); var expectedCodingSeq = "ATGGAAGAGGCCATCCTACTCAATCAAACTTCTTTAGTGACATATTTTCGGCTTAGAGGTTTATCTGTAAATCATAAGGCACGGATAGCTATGTTTTCCATGTTCCTCATTTTTTATGTCCTGACACTGATTGGGAATGTTCTCATTGTCATAACTATTATCTATGACCACCGGCTCCATACTCCCATGTATTTCTTCCTCAGCAACCTGTCCTTTATTGATGTCTGCCACTCCACTGTCACTGTCCCCAAGATGCTGAGAGACGTGTGGTCAGAGGAAAAGCTCATCTCTTTTGATGCCTGTGTGACCCAGATGTTCTTCCTGCACCTCTTTGCCTGCACAGAGATCTTCCTCCTCACCGTCATGGCCTATGATCGGTATGTGGCCATCTGTAAACCCCTGCAGTACATGATAGTGATGAACTGGAAGGTATGTGTGCTGCTGGCTGTGGCCCTCTGGACAGGAGGGACCATCCACTCCATAGCCCTCACCTCCCTTACCATCAAGCTGCCCTACTGTGGTCCTGATGAGATTGACAACTTCTTCTGTGATGTACCTCAGGTGATCAAGCTGGCCTGCATTGACACCCACGTCATTGAGATCCTCATTGTCTCCAACAGTGGATTGATCTCCGTGGTCTGTTTTGTGGTCCTGGTGGTGTCCTACGCAGTCATCCTGGTGAGTCTGAGGCAGCAGATCTCCAAGGGCAAGCGGAAGGCCCTGTCCACCTGTGCAGCCCATCTCACTGTAGTTACACTGTTCCTGGGACACTGCATCTTCATCTATTCCCGCCCATCCACCAGCCTCCCAGAGGACAAGGTAGTATCTGTGTTTTTCACTGCAGTCACCCCCCTGCTGAACCCCATTATCTATACCCTTAGGAATGAAGAAATGAAGAGTGCCTTAAACAAGTTAGTGGGGAGAAAAGAGAGAAAAGAAGAAAAATGA"; Assert.Equal(expectedCodingSeq, codingSequence.GetCdnaSequence().Substring(codingRegion.CdnaStart - 1, codingRegion.Length)); }
public void Translation_EndToEnd() { ICodingRegion expectedCodingRegion = new CodingRegion(100, 200, 300, 400, 101); const string expectedProteinId = "ENSP00000446475.7"; const string expectedPeptideSeq = "VEIDSD"; string[] peptideSeqs = { expectedPeptideSeq }; ITranslation expectedTranslation = new Translation(expectedCodingRegion, CompactId.Convert(expectedProteinId, 7), expectedPeptideSeq); ITranslation observedTranslation; using (var ms = new MemoryStream()) { using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true)) { expectedTranslation.Write(writer, 0); } ms.Position = 0; using (var reader = new BufferedBinaryReader(ms)) { observedTranslation = Translation.Read(reader, peptideSeqs); } } Assert.NotNull(observedTranslation); Assert.Equal(expectedCodingRegion.CdnaStart, observedTranslation.CodingRegion.CdnaStart); Assert.Equal(expectedProteinId, observedTranslation.ProteinId.WithVersion); Assert.Equal(expectedPeptideSeq, observedTranslation.PeptideSeq); }
public void Create() { // ENST00000374673.3 var sequence = new SimpleSequence( "GGGGTGTGTCTCCAGGGCCTTCCGCACTCAGCCAGGGAGAGCAAACAAACAGGCTTGGGGGACTGGGGAGGGGGGAAAGCGGAGGGGCAGGGTAGGGGCGGGGCAGGAGTGGAAGGCGGGGCAGGAGCAAGCGGCCTGGGCAGGGCAAGGGGGCCTCAGCTGGACCCTCGGATACTCACGGCAGTTGGCTTCATCAGTTCGGTCCTCACAGTCAAAGTCACCATCGCAGCGCCACAGCTTGAGGGCACAATGTCCATTCCCGCAGGGGAACTCGTTGGGCTCACAGGGTGGCGGGGGGCCTAGGAGACCGGGCAGGGGTCAGCAGCATCCTCCCGGGCCAGCTTCCTGCTCCCCGCACCCACCTGCACCCCTGCCGGTGCGCACCACAGTCTAGCTCATCGCTGCCGTCCTCGCAGTCCTCCTGTCCGTCGCAGAGGTAGTCTCTGGGGATGCAGTGCCCATTGCGGCATGCGGCCTCCTGGGGCCCACAGGGCAGGGGCCTGACGGAACCGGGAAGCAGGGGCTGAGGAGCGTGGGTGACTGGTGGCTGTCGCATGATGGTTGTCTCTGGCCGGGGCGGTAAAGATGTCGTCTCCACAAGGAGAGAGAATGTGGGGCTGATACCCAGGACTGGCTCCTCTGTGGATAGATTCCGCTTGGCATTTGGCAGAAGCAGATGGCTCCTCACCTGCTCCTTGTCCCCAACCCTCCCCAGGCCCACCCTGTACTCCCCAACACCACTCCCTGCCACCCCCTGCCTGGCTCTGTCATCACCCTTCCTATGCCCCCATCCTCTGCCTGCACCAAACCCTCATAGTCCTTGATGGGCTCCAAGACCCAGGTGTAGGACCCTGGCCCTCCCCTGGCACCCAAACCACTCGTGGCCCCGGACATCCCCTCACCACAATTGAGCTCATCAGACATGTCCCTGCAGTCGGGCCGCCGGTCACAGCGATACTCCAGGGCCACACACTCATTGTAGCTGTGGCAGGCAAACTCGGCCTCCGTGCAGGCTCTTGGGAACTGGGGCACTGCAGGTGGAAAGGAAGCAGACTGGAGTCAGAGGCGGCAGGAGGCAGGTGCGGGAAGCTGTAGGTGCTGTGTGGCTGGAGTGGGCTCCAGGGCCCTGTGTCAGGCAGCTCGGTTTCTGGCAGGCACAACGAGGGCAAGCAGCACACACTAGACACATCCACAGCACACGTGGGGCATGGGACATGCGGCAGTGGCCTCCCCCATCTCTAAAACAGACCCCACACACAGTTGACATGCCACACGCATGCAACCACCACACCACACACATGCAGGCCACAGCCTGGCCCAGTGAGGACAAAGAAGGAGGGGAGAAGGGAGTGCCCAGCTGTCTTGGGCTGTGCCCAGCCAGCCATCTTGCCCACACCCTTCTTTCCTCTCCATCCTTTAAAAAATTTTTTTCTCTCTTCTTTTTTATTTTTTTAGAGACAGGGTCTTGCTACGTTGCCCAAGCTGGTCTCGAACTCTTTGCCTCAAGCAATCCTCCCGTCTTGGCCTCCCAAAGTGCTGGGGTTACAGGCGTGAGCCCCTGCACCCGGCCTCCTCTCCAACCTTAACTTCTCTAGGAACCTGGCTGGGCCTCGGCCTGGCTTACACTCTCACCTGGTGTCACTGCGACCGCCACAGCGGCCGGCGGGGGTGGGGGGGTCTGTGCTGGAAAGGAAGATGTGATCAGTGGCTGTTCCACCTGGGAGCCGGGAGCTGAGGGCTGCAGGGCTGGGCCACATTCCACCATCCCTAGCCAGGAGGACTTATTGAAAAGTGAGAGAGGAGGGCTGGACCCCCAGCAGTCTTTAGACCTGGGCCTGATGATGCAGAAGAGCAAGCTTGATCTCTGGGTGCAATAATTAAGGGTTTTTGTTTGTTTGTCTTGTTTTAGAGGCAGGGTTTTGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCATGATCCTAGCTCACTGCAGCCTCAAACTCCTGGGCTCCGGTGATCCTC", 22213528); var codingRegion = new CodingRegion(22213728, 22215214, 1, 538, 538); var regions = new ITranscriptRegion[] { new TranscriptRegion(TranscriptRegionType.Exon, 4, 22213728, 22213827, 439, 538), new TranscriptRegion(TranscriptRegionType.Intron, 3, 22213828, 22213912, 438, 439), new TranscriptRegion(TranscriptRegionType.Exon, 3, 22213913, 22214167, 184, 438), new TranscriptRegion(TranscriptRegionType.Intron, 2, 22214166, 22214430, 183, 184), new TranscriptRegion(TranscriptRegionType.Exon, 2, 22214431, 22214559, 55, 183), new TranscriptRegion(TranscriptRegionType.Intron, 1, 22214560, 22215160, 54, 55), new TranscriptRegion(TranscriptRegionType.Exon, 1, 22215161, 22215214, 1, 54) }; const string expectedResults = "NCAGCACAGACCCCCCCACCCCCGCCGGCCGCTGTGGCGGTCGCAGTGACACCAGTGCCCCAGTTCCCAAGAGCCTGCACGGAGGCCGAGTTTGCCTGCCACAGCTACAATGAGTGTGTGGCCCTGGAGTATCGCTGTGACCGGCGGCCCGACTGCAGGGACATGTCTGATGAGCTCAATTGTGAGGAGCCAGTCCTGGGTATCAGCCCCACATTCTCTCTCCTTGTGGAGACGACATCTTTACCGCCCCGGCCAGAGACAACCATCATGCGACAGCCACCAGTCACCCACGCTCCTCAGCCCCTGCTTCCCGGTTCCGTCAGGCCCCTGCCCTGTGGGCCCCAGGAGGCCGCATGCCGCAATGGGCACTGCATCCCCAGAGACTACCTCTGCGACGGACAGGAGGACTGCGAGGACGGCAGCGATGAGCTAGACTGTGGCCCCCCGCCACCCTGTGAGCCCAACGAGTTCCCCTGCGGGAATGGACATTGTGCCCTCAAGCTGTGGCGCTGCGATGGTGACTTTGACTGTGAGGACCG"; var codingSequence = new CodingSequence(sequence, codingRegion, regions, true, 1); var observedResults = codingSequence.Substring(0, expectedResults.Length); Assert.Equal(expectedResults, observedResults); }
public void RnaEdits_in_coding_sequence_reverse_insertion() { //NM_000682.6, chrom: chr2:96778623-96781984 var genomicSeq = new SimpleSequence("CTTATTACAAAATATCCTTTATTGATAAAATAGCTCAGAGTTTAAAAAAAAAAAAAACACCACCTGCATGTCGCAATAAGAGGTCACAGGCAAGAACACTGGGGGTCCCATGGGGCGCACACAAGACCGGCCAGCAGAGGGTCACAGTCAGTCCCTCTCCTGGCCCAGCTCCCCACCACATCCCAGGGCGATACTCTGGCCTCAACAACCCACTGAGGACCAAGCTGGGAAGCCTCCCACACCCCAGGAAGGACTCTTTTTGGTCCCCTCCATTCTCTCTACACCCAGAAAACTCCCTCGGTGCCCTTCCAAATCTAGCAGGTCCATCTGGCCCATTCCCCCGACACCTGCCAAGCTAAGATGCCTACTGGCCCAATGTTGAAGCCAGGCCCTCTCCAAGGGAAGGCCGATAAACCTCCTTTCCACACTTCCAACTGTTCTGGGTGCCAGGTTTTGGGGTGGGACTGAGAACCAGGAAGCAGGGGTCCTCAATGCACAGCCCCATCAGCATTGCGGGGAGCAGCGTGGCTGGGTCCGAGGCAGTCCACAAGCACCCACCTGGGGGGATCAGTTGTGGTTCACAAGGACTCATTTGGGGCTTGGAGACCTGGCCGGGCACTCCAGTGGGAGGCTCCCCTAGGGGCGCACCAGGCTCTGATGCCAGTACCCCACCTGGGGGCGCTGCCACCTGTCACAGGCTCTCATCTTAGACTGTTGCCGAGGTGTGGATATTTTGAGCTGTCTTGGGGAGACAATTTGCCTCCTTGATGACAAAAGACTTATCCCCCACTGGGGAGACCCAAGCCACTAAAAACCCTCTTGGTGTTGCCGGTGAAATGTCGAAACGTTGTCATGTAGCGTAATAACTCAGACCTTTGCAGCCAGAAGAACACATTCTCAAAGAGATCCTTTAACTTGAAATAGTGATTCTGTCTGCCACTCCCGGCTTCCAGTTCGGGGTAGGAATTCACACACCCCAGGGACAGAACAAAAGTCTACAGGAAGACAGGTGGTGGTAAACACAGAGGAAAGGGATTTTTATATCACCATATAATCACATTTTTGGTTCTCTAGTGTGTTCCCCCACAGAGCTCAAAGCTTTCTGCAAAGCCTTTCATCTCCCTGCAGCAAGTAGGCAGTGAGCTATTGTCGCCCCGATTTTTGCAGGGGGTGAATGCCAGTGATCGGGGATCTCCCGTCGAGGCAGAGACCAGGCCTCCAAGACCGCCCCAGCGAGGCATCCACGTGGCCACCCACCTACCGGAGGGGTGCTGGGTAAGGAAGCCGATCCATTGTTCTGGCTTTCAAAGGAACCACAGATCCGAAAACAGGCAAAGGGGGAAAGGAGGGCCCAGAGACGATGCCACCCCATAAGCCCCCATCCCAGCGCCTGCCAGGGACCGCGAGTGCCTAGCGTGGGTGATCAGTCTTCGTTTCTTCCTCCCCCTCAGCAGCAGGCCCCACTGGGAAAAGTGGAAGGCTGGCTCCGTGCTCTTTGTGGGTGGGGGGGAGATGAAAAAGAAACGAAAACACCACAAGCAAGTGACCTGCCAGGAACACAAGGTCCTCAAGAAAGGGAAGCCCAGACATTGGTCTGGAGAGCATGGGGCTCTGGGAAGAAAGTGCTCTCTCTTCTCCTGGTCTTGGCTATGTTCCAGAGGATTTGAACCACCTCCATCGGCCTGTGCTCAGGGAGAGGGTGGAGAAGGGGTCCCCCACAGCTAAGCCGGCAAGGGGAAGCTTCACTGGGACCCTTGCTAGCAGCCCCCCTGCCCACCCCTCCCAAGGGGTTCCTAAGATGAGGCCTACAGGATCTGGGCAGGGAGCAGAAAGCCCAGGGGAGGCAGCCACACACAGCAGGGCAAGAAGCAGGGTGACCCCGGCGCCACCGCACCAACCCCACAGGGGCAGCGCAGGCGGGCTCACCAGGCCGTCTGGGTCCACGGGCGGCACAGGATCCTCCGGAAGGCACGGCGGAAGTCCTGGTTGAAGATGGTGTAGATAACAGGGTTCAGTGAGCTGTTGCAGTAGCCGATCCAGAAGAAGAACTGGAAGAGGCCATGGGGCACCTTGCAGTGCTTCGGGCAGATGGCTCCCAGGCTGTAGCTGAAGAAGAAGGGGAACCAGCAGAGCACAAAAACGCCAATGACCACAGCCAGCACGAAGGTGAAGCGCTTCTCCCGGGTCAGCTGCGCCCGTCGACGCCACCACTGCCCACCTATAGCACCCACGCCCCTGCCCAGGAGCACCTGGCCACGTAGGGTGGCCAGCACCCGGGAGCCCTGTGGCTGCTGCAGCGGGGGGCTGCAAGCTGAGGCCGGAGACACTGGCACTGCCTGGGGTTCACACTCTTCCTCCTCCTCCTCCTCCTCTTCAGCTTCATCCTCTGGAGATGCCCCACAAACACCCTCCTTCTGGCCCTGGCCTGAGTTGGGAAGGGCAGCCCAACTGGGTGGCAAGGCCCGGGTCCCAGTATCTTCAGGGGTCTCCCCCTCCTCCTTCTCCCCAGTGGACTTCGAGTGTCCGTTGACCTCTCTGGCAGAAGCCACAGAGGCCAGGGCTGGCAGTTTGGCTGAGGCCAAAGCCCCACCATGGTCGGGTCGGGGCTGCTTGGACTCACCCTGCCCAGGCCCCCCCTTGGCCCTGGGACCTCTGCGGTTGCTGCGTTTGGCGATCAGGTAGATGCGCAGGTAGACAAGGATCATGATGAGGCAAGGAGCAAAGAAAGATCCGATGCTGGAGGCCAGGATGTACCAGGCCTCCTGGTTGAGCTTGCACTGGGGGCGCCCGCGCGGCTGGGGGCCCTGGTCGCCCTTGTAGATGAGGGGCGGCAGCGAGATGACGGCGGCGATGAGCCACACAGTGAGGATGATGCACTTGATGCGGCGCGGGGTGCGCTTGGAGTTGTACTCCAGCGCGCGGCTCACGGCCCAGTAGCGGTCCAGGCTGATGGCGCACAGGTGCACGATGGACGAGGTGCAGAAGAGCACGTCGAGCGCCAGGTACACCTCGCACCACGTGCGCCGGAAGTACCAGTAGCCCAGCAGCTCGTTGGCCAGCGAGAAAGGGATGATGAGCGTGGCCACCAGGATGTCGGCGGCGGCCAGCGACACCAGGAACAGGTTCTGAGGGGCGCGCAGCGAGCGGCTGGTCAACACAGCCAGGATGACCAGAGCGTTGCCGAAGATGGTAAAGAGAATGAGGAAGGTGATGGCCGCCGCTATGGCCGCTGTGGCCTGCACGGAGTAGGGGTCCTGGTGGTCCATGACGGGGCGGGAGGTGGGCAGAGGGAGCGCTGCCCGCCCAGTGCGCACCGTGGACGACAGCGCTGCCCGGCTCGGCTAGACAAGAGCGTCGCCCCT", 96778623 - 1); var codingRegion = new CodingRegion(96780545, 96781888, 97, 1449, 1344); var regions = new ITranscriptRegion[] { new TranscriptRegion(TranscriptRegionType.Exon, 1, 96778623, 96780986, 1008, 3371), new TranscriptRegion(TranscriptRegionType.Exon, 1, 96780987, 96781984, 1, 998) }; var rnaEdits = new IRnaEdit[] { new RnaEdit(999, 998, "AGAGGAGGA") }; const byte startExonPhase = 0; const bool onReverseStrand = true; var codingSequence = new CdnaSequence(genomicSeq, codingRegion, regions, onReverseStrand, rnaEdits); var expectedCodingSeq = "ATGGACCACCAGGACCCCTACTCCGTGCAGGCCACAGCGGCCATAGCGGCGGCCATCACCTTCCTCATTCTCTTTACCATCTTCGGCAACGCTCTGGTCATCCTGGCTGTGTTGACCAGCCGCTCGCTGCGCGCCCCTCAGAACCTGTTCCTGGTGTCGCTGGCCGCCGCCGACATCCTGGTGGCCACGCTCATCATCCCTTTCTCGCTGGCCAACGAGCTGCTGGGCTACTGGTACTTCCGGCGCACGTGGTGCGAGGTGTACCTGGCGCTCGACGTGCTCTTCTGCACCTCGTCCATCGTGCACCTGTGCGCCATCAGCCTGGACCGCTACTGGGCCGTGAGCCGCGCGCTGGAGTACAACTCCAAGCGCACCCCGCGCCGCATCAAGTGCATCATCCTCACTGTGTGGCTCATCGCCGCCGTCATCTCGCTGCCGCCCCTCATCTACAAGGGCGACCAGGGCCCCCAGCCGCGCGGGCGCCCCCAGTGCAAGCTCAACCAGGAGGCCTGGTACATCCTGGCCTCCAGCATCGGATCTTTCTTTGCTCCTTGCCTCATCATGATCCTTGTCTACCTGCGCATCTACCTGATCGCCAAACGCAGCAACCGCAGAGGTCCCAGGGCCAAGGGGGGGCCTGGGCAGGGTGAGTCCAAGCAGCCCCGACCCGACCATGGTGGGGCTTTGGCCTCAGCCAAACTGCCAGCCCTGGCCTCTGTGGCTTCTGCCAGAGAGGTCAACGGACACTCGAAGTCCACTGGGGAGAAGGAGGAGGGGGAGACCCCTGAAGATACTGGGACCCGGGCCTTGCCACCCAGTTGGGCTGCCCTTCCCAACTCAGGCCAGGGCCAGAAGGAGGGTGTTTGTGGGGCATCTCCAGAGGATGAAGCTGAAGAGGAGGAAGAGGAGGAGGAGGAGGAGGAAGAGTGTGAACCCCAGGCAGTGCCAGTGTCTCCGGCCTCAGCTTGCAGCCCCCCGCTGCAGCAGCCACAGGGCTCCCGGGTGCTGGCCACCCTACGTGGCCAGGTGCTCCTGGGCAGGGGCGTGGGTGCTATAGGTGGGCAGTGGTGGCGTCGACGGGCGCAGCTGACCCGGGAGAAGCGCTTCACCTTCGTGCTGGCTGTGGTCATTGGCGTTTTTGTGCTCTGCTGGTTCCCCTTCTTCTTCAGCTACAGCCTGGGAGCCATCTGCCCGAAGCACTGCAAGGTGCCCCATGGCCTCTTCCAGTTCTTCTTCTGGATCGGCTACTGCAACAGCTCACTGAACCCTGTTATCTACACCATCTTCAACCAGGACTTCCGCCGTGCCTTCCGGAGGATCCTGTGCCGCCCGTGGACCCAGACGGCCTGGTGA"; var rnaEditLength = rnaEdits[0].Bases.Length; Assert.Equal(expectedCodingSeq, codingSequence.GetCdnaSequence().Substring(codingRegion.CdnaStart - 1, codingRegion.Length + rnaEditLength)); }
private static ITranslation GetTranslation(ICodingRegion oldCodingRegion, int cdsLength, CompactId proteinId, string peptideSeq) { var codingRegion = new CodingRegion(oldCodingRegion.Start, oldCodingRegion.End, oldCodingRegion.CdnaStart, oldCodingRegion.CdnaEnd, cdsLength); return(new Translation(codingRegion, proteinId, peptideSeq)); }
public void GetCdsPosition_Snv_AfterOutFrameRnaEditDeletion() { // NM_001317107.1 var codingRegion = new CodingRegion(22138201, 22139150, 83, 1030, 948); const byte startExonPhase = 0; (int cdsStart, _) = MappedPositionUtilities.GetCdsPositions(codingRegion, 681, 681, startExonPhase, false); Assert.Equal(599, cdsStart); }
public void GetCdsPosition_Snv_AfterInframeRnaEditInsertion() { // NM_000682.6 var codingRegion = new CodingRegion(96780545, 96781888, 97, 1449, 1344); const byte startExonPhase = 0; (int cdsStart, _) = MappedPositionUtilities.GetCdsPositions(codingRegion, 1010, 1010, startExonPhase, false); Assert.Equal(914, cdsStart); }
public void GetCdsPosition_Snv_AfterOutframeRnaEditInsertion() { // NM_033517.1 var codingRegion = new CodingRegion(51113070, 51169740, 1, 5196, 5157); const byte startExonPhase = 0; (int cdsStart, _) = MappedPositionUtilities.GetCdsPositions(codingRegion, 1343, 1343, startExonPhase, false); Assert.Equal(1343, cdsStart); }
public void GetCdsPosition_Forward_Insertion_WithStartExonPhase() { var codingRegion = new CodingRegion(6413107, 6415837, 1, 953, 953); const byte startExonPhase = 1; var(cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, 29, 28, startExonPhase, true); Assert.Equal(30, cdsStart); Assert.Equal(29, cdsEnd); }
public void GetCdsPosition_Forward_Insertion() { var codingRegion = new CodingRegion(78001559, 78024355, 262, 495, 234); const byte startExonPhase = 0; var(cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, 486, 485, startExonPhase, true); Assert.Equal(225, cdsStart); Assert.Equal(224, cdsEnd); }
public GeneFusionUtilitiesTests() { IGene mzt2BGene = new Gene(ChromosomeUtilities.Chr2, 0, 0, false, "MZT2B", 0, CompactId.Empty, CompactId.Empty); IGene mzt2AGene = new Gene(ChromosomeUtilities.Chr2, 0, 0, true, "MZT2A", 0, CompactId.Empty, CompactId.Empty); var transcriptRegions = new ITranscriptRegion[5]; transcriptRegions[0] = new TranscriptRegion(TranscriptRegionType.Exon, 1, 130181729, 130181797, 1, 61); transcriptRegions[1] = new TranscriptRegion(TranscriptRegionType.Intron, 1, 130181798, 130182626, 61, 62); transcriptRegions[2] = new TranscriptRegion(TranscriptRegionType.Exon, 2, 130182627, 130182775, 62, 210); transcriptRegions[3] = new TranscriptRegion(TranscriptRegionType.Intron, 2, 130182776, 130190468, 210, 211); transcriptRegions[4] = new TranscriptRegion(TranscriptRegionType.Exon, 3, 130190469, 130190713, 211, 455); var codingRegion = new CodingRegion(130181737, 130190626, 1, 368, 369); var translation = new Translation(codingRegion, CompactId.Empty, null); _enst00000425361 = new Transcript(ChromosomeUtilities.Chr2, 130181737, 130190713, CompactId.Convert("ENST00000425361", 5), translation, BioType.other, mzt2BGene, 0, 0, false, transcriptRegions, 0, null, 0, 0, Source.Ensembl, false, false, null, null); _originTranscripts = new[] { _enst00000425361 }; var transcriptRegions2 = new ITranscriptRegion[10]; transcriptRegions2[0] = new TranscriptRegion(TranscriptRegionType.Exon, 5, 131464900, 131465047, 532, 679); transcriptRegions2[1] = new TranscriptRegion(TranscriptRegionType.Intron, 4, 131465048, 131470205, 531, 532); transcriptRegions2[2] = new TranscriptRegion(TranscriptRegionType.Exon, 4, 131470206, 131470343, 394, 531); transcriptRegions2[3] = new TranscriptRegion(TranscriptRegionType.Intron, 3, 131470344, 131472067, 393, 394); transcriptRegions2[4] = new TranscriptRegion(TranscriptRegionType.Exon, 3, 131472068, 131472182, 279, 393); transcriptRegions2[5] = new TranscriptRegion(TranscriptRegionType.Intron, 2, 131472183, 131491875, 278, 279); transcriptRegions2[6] = new TranscriptRegion(TranscriptRegionType.Exon, 2, 131491876, 131492024, 130, 278); transcriptRegions2[7] = new TranscriptRegion(TranscriptRegionType.Intron, 1, 131492025, 131492206, 129, 130); transcriptRegions2[8] = new TranscriptRegion(TranscriptRegionType.Exon, 1, 131492207, 131492335, 1, 129); transcriptRegions2[9] = new TranscriptRegion(TranscriptRegionType.Intron, 0, 131492336, 131492341, 0, 0); var codingRegion2 = new CodingRegion(131470316, 131492335, 1, 421, 423); var translation2 = new Translation(codingRegion2, CompactId.Empty, null); _enst00000427024 = new Transcript(ChromosomeUtilities.Chr2, 131464900, 131492335, CompactId.Convert("ENST00000427024", 5), translation2, BioType.other, mzt2AGene, 0, 0, false, transcriptRegions2, 0, null, 0, 0, Source.Ensembl, false, false, null, null); var transcriptRegions3 = new ITranscriptRegion[5]; transcriptRegions3[0] = new TranscriptRegion(TranscriptRegionType.Exon, 3, 131483960, 131484218, 366, 624); transcriptRegions3[1] = new TranscriptRegion(TranscriptRegionType.Intron, 2, 131484219, 131491875, 365, 366); transcriptRegions3[2] = new TranscriptRegion(TranscriptRegionType.Exon, 2, 131491876, 131492024, 217, 365); transcriptRegions3[3] = new TranscriptRegion(TranscriptRegionType.Intron, 1, 131492025, 131492206, 216, 217); transcriptRegions3[4] = new TranscriptRegion(TranscriptRegionType.Exon, 1, 131492207, 131492422, 1, 216); var codingRegion3 = new CodingRegion(131484061, 131492376, 47, 523, 477); var translation3 = new Translation(codingRegion3, CompactId.Empty, null); var enst00000309451 = new Transcript(ChromosomeUtilities.Chr2, 131483960, 131492422, CompactId.Convert("ENST00000309451", 6), translation3, BioType.other, mzt2AGene, 0, 0, false, transcriptRegions3, 0, null, 0, 0, Source.Ensembl, false, false, null, null); _partnerTranscripts = new[] { _enst00000427024, enst00000309451 }; }
public void GetCdsPosition_DoNotSilenceOutput_Reverse() { // variant: [179315139, 179315692] var codingRegion = new CodingRegion(179308070, 179315170, 617, 942, 326); const byte startExonPhase = 0; var(cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, 95, 648, startExonPhase, false); Assert.Equal(-1, cdsStart); Assert.Equal(32, cdsEnd); }
public void GetCdsPosition_SilenceOutput_InsertionAfterCodingRegion_Reverse() { // variant: [103629803, 103629804] insertion after coding region var codingRegion = new CodingRegion(103113259, 103629803, 161, 10543, 10383); const byte startExonPhase = 0; var(cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, 161, 160, startExonPhase, true); Assert.Equal(-1, cdsStart); Assert.Equal(-1, cdsEnd); }
public void GetCdsPosition_SilenceOutput_InsertionBeforeCodingRegion_Reverse() { // variant: [37480320, 37480319] insertion after coding region var codingRegion = new CodingRegion(37480320, 37543667, 556, 3228, 2673); const byte startExonPhase = 0; var(cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, 3229, 3228, startExonPhase, true); Assert.Equal(-1, cdsStart); Assert.Equal(-1, cdsEnd); }
public void GetCdsPosition_SilenceOutput_InsertionAfterCodingRegion_Forward() { // variant: [6647337, 6647336] insertion after coding region var codingRegion = new CodingRegion(6643999, 6647336, 667, 1674, 1008); const byte startExonPhase = 0; var(cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, 1675, 1674, startExonPhase, true); Assert.Equal(-1, cdsStart); Assert.Equal(-1, cdsEnd); }
public void RnaEdits_snv_forward_no_utr() { //NR_002754.2 var genomicSeq = new SimpleSequence("actctggtttctcttcaaatcgtataaatctttcgccttttactaaagatttccgtggagagaaacgagtgtgagtctgaaaccaattttttgaggccttgcgtttattagcagggctt", 11968210); var codingRegion = new CodingRegion(11968211, 11968329, 1, 119, 119); var regions = new ITranscriptRegion[] { new TranscriptRegion(TranscriptRegionType.Exon, 1, 11968211, 11968329, 1, 119) }; var rnaEdits = new IRnaEdit[] { new RnaEdit(107, 107, "t") }; const byte startExonPhase = 0; var codingSequence = new CdnaSequence(genomicSeq, codingRegion, regions, false, rnaEdits); Assert.Equal("actctggtttctcttcaaatcgtataaatctttcgccttttactaaagatttccgtggagagaaacgagtgtgagtctgaaaccaattttttgaggccttgcgttttttagcagggctt", codingSequence.GetCdnaSequence()); }
public void Length_ReturnTrueLength_WhenGapsArePresent() { const int expectedResults = 720; var sequence = new NSequence(); var codingRegion = new CodingRegion(10051, 12770, 51, 800, 720); const byte startExonPhase = 1; var regions = new ITranscriptRegion[] { new TranscriptRegion(TranscriptRegionType.Exon, 1, 10001, 10299, 1, 299), new TranscriptRegion(TranscriptRegionType.Intron, 1, 10300, 12300, 229, 331), new TranscriptRegion(TranscriptRegionType.Exon, 2, 12301, 12970, 331, 1000) }; var codingSequence = new CodingSequence(sequence, codingRegion, regions, false, startExonPhase); var observedResults = codingSequence.Length; Assert.Equal(expectedResults, observedResults); Assert.NotEqual(expectedResults, codingRegion.End - codingRegion.Start + 1); }
public void RnaEdits_snv_forward_with_utr() { //NM_001144032.2 chr1:148644011-148644795 var genomicSeq = new SimpleSequence("ACTATAAAGACAGTGAAAAGATCAGTGGTTATCTTTGCAGACGCCACCATCGCTGTGAGCCCTGTACTATCAGCCATGGTCAACTCCGTCGTCTTTTTTGAAATCACCAGGGATGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGTGCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCGTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCACGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAATGAGTTTGACTTGTGTTTTATTTTCACCACCAGACCCATTCCTTCTGTAGCTCAGGAGAGCACCCCTCCACCACATTTGCTTGCAATATCCTAGAATCTTTGTGCTCTTGCTGCAGTTCCCTTTGGGTTCCATGTTTTCCTTGTTCCCTTCCATGCCTAGCTGGATGGCAGAGTTGAGTTAAGTTTATGATTATGAAATAAAAACTAAGTAACAA", 148644011 - 1); var codingRegion = new CodingRegion(148644086, 148644580, 76, 570, 495); var regions = new ITranscriptRegion[] { new TranscriptRegion(TranscriptRegionType.Exon, 1, 148644011, 148644795, 1, 785) }; var rnaEdits = new IRnaEdit[] { new RnaEdit(420, 420, "C"), new RnaEdit(500, 500, "T"), new RnaEdit(737, 737, "T") }; const byte startExonPhase = 0; var codingSequence = new CdnaSequence(genomicSeq, codingRegion, regions, false, rnaEdits); Assert.Equal("ATGGTCAACTCCGTCGTCTTTTTTGAAATCACCAGGGATGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGCGCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCGTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAA", codingSequence.GetCdnaSequence().Substring(codingRegion.CdnaStart - 1, codingRegion.Length)); }
public void Transcript_EndToEnd() { IChromosome expectedChromosome = new Chromosome("chrBob", "Bob", 1); const int expectedStart = int.MaxValue; const int expectedEnd = int.MinValue; const string expectedId = "ENST00000540021"; const byte expectedVersion = 7; const BioType expectedBioType = BioType.IG_J_pseudogene; const bool expectedCanonical = true; const Source expectedSource = Source.BothRefSeqAndEnsembl; const bool expectedCdsStartNotFound = true; const bool expectedCdsEndNotFound = true; var expectedIdAndVersion = expectedId + "." + expectedVersion; ICodingRegion expectedCodingRegion = new CodingRegion(10001, 10200, 1, 200, 200); ITranscriptRegion[] expectedTranscriptRegions = GetTranscriptRegions(); const byte expectedNumExons = 3; const int expectedTotalExonLength = 300; const byte expectedStartExonPhase = 3; const int expectedSiftIndex = 11; const int expectedPolyPhenIndex = 13; IInterval[] expectedMicroRnas = GetMicroRnas(); ITranslation expectedTranslation = new Translation(expectedCodingRegion, CompactId.Convert("ENSP00000446475", 17), "VEIDSD"); IGene expectedGene = new Gene(expectedChromosome, 100, 200, true, "TP53", 300, CompactId.Convert("7157"), CompactId.Convert("ENSG00000141510")); var genes = new IGene[1]; genes[0] = expectedGene; var peptideSeqs = new string[1]; peptideSeqs[0] = expectedTranslation.PeptideSeq; var geneIndices = CreateIndices(genes); var transcriptRegionIndices = CreateIndices(expectedTranscriptRegions); var microRnaIndices = CreateIndices(expectedMicroRnas); var peptideIndices = CreateIndices(peptideSeqs); var indexToChromosome = new Dictionary <ushort, IChromosome> { [expectedChromosome.Index] = expectedChromosome }; // ReSharper disable ConditionIsAlwaysTrueOrFalse var transcript = new Transcript(expectedChromosome, expectedStart, expectedEnd, CompactId.Convert(expectedId, expectedVersion), expectedTranslation, expectedBioType, expectedGene, expectedTotalExonLength, expectedStartExonPhase, expectedCanonical, expectedTranscriptRegions, expectedNumExons, expectedMicroRnas, expectedSiftIndex, expectedPolyPhenIndex, expectedSource, expectedCdsStartNotFound, expectedCdsEndNotFound, null, null); // ReSharper restore ConditionIsAlwaysTrueOrFalse ITranscript observedTranscript; using (var ms = new MemoryStream()) { using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true)) { transcript.Write(writer, geneIndices, transcriptRegionIndices, microRnaIndices, peptideIndices); } ms.Position = 0; using (var reader = new ExtendedBinaryReader(ms)) { observedTranscript = Transcript.Read(reader, indexToChromosome, genes, expectedTranscriptRegions, expectedMicroRnas, peptideSeqs); } } Assert.NotNull(observedTranscript); Assert.Equal(expectedStart, observedTranscript.Start); Assert.Equal(expectedEnd, observedTranscript.End); Assert.Equal(expectedIdAndVersion, observedTranscript.Id.WithVersion); Assert.Equal(expectedBioType, observedTranscript.BioType); Assert.Equal(expectedCanonical, observedTranscript.IsCanonical); Assert.Equal(expectedSource, observedTranscript.Source); Assert.Equal(expectedTotalExonLength, observedTranscript.TotalExonLength); Assert.Equal(expectedStartExonPhase, observedTranscript.StartExonPhase); Assert.Equal(expectedSiftIndex, observedTranscript.SiftIndex); Assert.Equal(expectedPolyPhenIndex, observedTranscript.PolyPhenIndex); Assert.Equal(expectedChromosome.Index, observedTranscript.Chromosome.Index); Assert.Equal(expectedGene.Symbol, observedTranscript.Gene.Symbol); Assert.Equal(expectedTranslation.PeptideSeq, observedTranscript.Translation.PeptideSeq); Assert.Equal(expectedTranscriptRegions.Length, observedTranscript.TranscriptRegions.Length); Assert.Equal(expectedMicroRnas.Length, observedTranscript.MicroRnas.Length); }
/// <summary> /// parses the relevant data from each transcript /// </summary> public static MutableTranscript Parse(ObjectValueNode objectValue, IChromosome chromosome, Source source) { // IDs string transcriptId = null; byte transcriptVersion = 1; string proteinId = null; byte proteinVersion = 0; string ccdsId = null; string refSeqId = null; string geneId = null; int hgncId = -1; // gene int geneStart = -1; int geneEnd = -1; var geneOnReverseStrand = false; string geneSymbol = null; var geneSymbolSource = GeneSymbolSource.Unknown; // translation int translationStart = -1; int translationEnd = -1; MutableExon translationStartExon = null; MutableExon translationEndExon = null; // predictions string siftData = null; string polyphenData = null; var bioType = BioType.other; IInterval[] microRnas = null; MutableTranscriptRegion[] cdnaMaps = null; IInterval[] introns = null; string peptideSequence = null; string translateableSequence = null; var isCanonical = false; int compDnaCodingStart = -1; int compDnaCodingEnd = -1; int start = -1; int end = -1; MutableExon[] exons = null; var cdsStartNotFound = false; var cdsEndNotFound = false; int[] selenocysteinePositions = null; IRnaEdit[] rnaEdits = null; string bamEditStatus = null; foreach (var node in objectValue.Values) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(node.Key)) { throw new InvalidDataException($"Encountered an unknown key in the dumper transcript object: {node.Key}"); } // handle each key switch (node.Key) { case ImportKeys.CodingRegionEnd: case ImportKeys.CodingRegionStart: case ImportKeys.CreatedDate: case ImportKeys.DbId: case ImportKeys.Description: case ImportKeys.DisplayXref: case ImportKeys.ExternalDb: case ImportKeys.ExternalDisplayName: case ImportKeys.ExternalName: case ImportKeys.ExternalStatus: case ImportKeys.GenePhenotype: case ImportKeys.GeneStableId: case ImportKeys.ModifiedDate: case ImportKeys.Protein: case ImportKeys.Slice: case ImportKeys.Source: case ImportKeys.Strand: case ImportKeys.SwissProt: case ImportKeys.Trembl: case ImportKeys.UniParc: case ImportKeys.VepLazyLoaded: // not used break; case ImportKeys.BamEditStatus: bamEditStatus = node.GetString(); break; case ImportKeys.Attributes: (microRnas, rnaEdits, cdsStartNotFound, cdsEndNotFound) = Attribute.ParseList(node); break; case ImportKeys.Biotype: bioType = TranscriptUtilities.GetBiotype(node); break; case ImportKeys.Ccds: ccdsId = node.GetString(); break; case ImportKeys.CdnaCodingEnd: compDnaCodingEnd = node.GetInt32(); break; case ImportKeys.CdnaCodingStart: compDnaCodingStart = node.GetInt32(); break; case ImportKeys.End: end = node.GetInt32(); break; case ImportKeys.GeneHgncId: hgncId = node.GetHgncId(); break; case ImportKeys.GeneSymbol: case ImportKeys.GeneHgnc: // older key geneSymbol = node.GetString(); break; case ImportKeys.GeneSymbolSource: geneSymbolSource = GeneSymbolSourceHelper.GetGeneSymbolSource(node.GetString()); break; case ImportKeys.Gene: (geneStart, geneEnd, geneId, geneOnReverseStrand) = ImportGene.Parse(node); break; case ImportKeys.IsCanonical: isCanonical = node.GetBool(); break; case ImportKeys.Refseq: refSeqId = node.GetString(); break; case ImportKeys.StableId: transcriptId = node.GetString(); break; case ImportKeys.Start: start = node.GetInt32(); break; case ImportKeys.TransExonArray: exons = ImportExon.ParseList(node, chromosome); break; case ImportKeys.Translation: (translationStart, translationEnd, proteinId, proteinVersion, translationStartExon, translationEndExon) = ImportTranslation.Parse(node, chromosome); break; case ImportKeys.VariationEffectFeatureCache: (cdnaMaps, introns, peptideSequence, translateableSequence, siftData, polyphenData, selenocysteinePositions) = ImportVariantEffectFeatureCache.Parse(node); break; case ImportKeys.Version: transcriptVersion = (byte)node.GetInt32(); break; default: throw new InvalidDataException($"Unknown key found: {node.Key}"); } } var fixedTranscript = AccessionUtilities.GetMaxVersion(transcriptId, transcriptVersion); var fixedProtein = AccessionUtilities.GetMaxVersion(proteinId, proteinVersion); var gene = new MutableGene(chromosome, geneStart, geneEnd, geneOnReverseStrand, geneSymbol, geneSymbolSource, geneId, hgncId); var codingRegion = new CodingRegion(GetCodingRegionStart(geneOnReverseStrand, translationStartExon, translationEndExon, translationStart, translationEnd), GetCodingRegionEnd(geneOnReverseStrand, translationStartExon, translationEndExon, translationStart, translationEnd), compDnaCodingStart, compDnaCodingEnd, 0); int totalExonLength = GetTotalExonLength(exons); int startExonPhase = translationStartExon?.Phase ?? int.MinValue; return(new MutableTranscript(chromosome, start, end, fixedTranscript.Id, fixedTranscript.Version, ccdsId, refSeqId, bioType, isCanonical, codingRegion, fixedProtein.Id, fixedProtein.Version, peptideSequence, source, gene, exons, startExonPhase, totalExonLength, introns, cdnaMaps, siftData, polyphenData, translateableSequence, microRnas, cdsStartNotFound, cdsEndNotFound, selenocysteinePositions, rnaEdits, bamEditStatus)); }