public void ExonEqualityTests() { var exon1 = new CdnaCoordinateMap(100, 200, 1, 2); var exon2 = new CdnaCoordinateMap(100, 200, 1, 2); Assert.Equal(exon1, exon2); }
/// <summary> /// calculates the cDNA coordinates given the specified genomic coordinates [Transcript.pm:927 cdna_coding_start] /// genomic2pep [TransciptMapper:482] /// </summary> public static void GetCodingDnaEndpoints(CdnaCoordinateMap[] cdnaMaps, int genomicBegin, int genomicEnd, out int cdnaBegin, out int cdnaEnd) { // find an overlapping mapper pair var coordinateMap = CdnaCoordinateMap.Null(); bool foundOverlap = false; for (int i = 0; i < cdnaMaps.Length; i++) { coordinateMap = cdnaMaps[i]; if (genomicEnd >= coordinateMap.GenomicStart && genomicBegin <= coordinateMap.GenomicEnd) { foundOverlap = true; break; } } if (!foundOverlap) { throw new GeneralException($"Unable to find an overlapping mapping pair for these genomic coordinates: ({genomicBegin}, {genomicEnd})"); } // calculate the cDNA position cdnaBegin = coordinateMap.CdnaEnd - (genomicEnd - coordinateMap.GenomicStart); cdnaEnd = coordinateMap.CdnaEnd - (genomicBegin - coordinateMap.GenomicStart); }
private static bool HasUtr(CdnaCoordinateMap exon, int codingRegionStart, int codingRegionEnd) { if (codingRegionStart == -1 || codingRegionEnd == -1) { return(false); } return(exon.GenomicStart < codingRegionStart || exon.GenomicEnd > codingRegionEnd); }
private static bool HasCds(CdnaCoordinateMap exon, int codingRegionStart, int codingRegionEnd) { if (codingRegionStart == -1 || codingRegionEnd == -1) { return(false); } return(Overlap.Partial(exon.GenomicStart, exon.GenomicEnd, codingRegionStart, codingRegionEnd)); }
private void WriteExon(TextWriter writer, string ucscReferenceName, Transcript transcript, CdnaCoordinateMap exon, int exonIndex, byte exonPhase) { var strand = transcript.Gene.OnReverseStrand ? '-' : '+'; // write the exon entry WriteExonEntry(writer, ucscReferenceName, transcript, "exon", exon.GenomicStart, exon.GenomicEnd, strand, exonIndex, exonPhase); if (transcript.Translation == null) { return; } var codingRegion = transcript.Translation.CodingRegion; // write the CDS entry if (HasCds(exon, codingRegion.GenomicStart, codingRegion.GenomicEnd)) { int cdsStart, cdsEnd; GetCdsCoordinates(exon, codingRegion.GenomicStart, codingRegion.GenomicEnd, out cdsStart, out cdsEnd); WriteExonEntry(writer, ucscReferenceName, transcript, "CDS", cdsStart, cdsEnd, strand, exonIndex, exonPhase); } // write the UTR entry if (HasUtr(exon, codingRegion.GenomicStart, codingRegion.GenomicEnd)) { // check before CDS if (exon.GenomicStart < codingRegion.GenomicStart) { int utrEnd = codingRegion.GenomicStart - 1; if (utrEnd > exon.GenomicEnd) { utrEnd = exon.GenomicEnd; } WriteExonEntry(writer, ucscReferenceName, transcript, "UTR", exon.GenomicStart, utrEnd, strand, exonIndex, exonPhase); } // check after CDS if (exon.GenomicEnd > codingRegion.GenomicEnd) { int utrStart = codingRegion.GenomicEnd + 1; if (utrStart < exon.GenomicStart) { utrStart = exon.GenomicStart; } WriteExonEntry(writer, ucscReferenceName, transcript, "UTR", utrStart, exon.GenomicEnd, strand, exonIndex, exonPhase); } } }
private static void GetCdsCoordinates(CdnaCoordinateMap exon, int codingRegionStart, int codingRegionEnd, out int cdsStart, out int cdsEnd) { cdsStart = exon.GenomicStart; cdsEnd = exon.GenomicEnd; if (cdsStart < codingRegionStart) { cdsStart = codingRegionStart; } if (cdsEnd > codingRegionEnd) { cdsEnd = codingRegionEnd; } }
private static Coordinate ConvertGenomicPosToCdnaPos(int start, int end, CdnaCoordinateMap map, bool onReverseStrand) { int cdnaStart; int cdnaEnd; if (onReverseStrand) { cdnaStart = map.CdnaStart - end + map.GenomicEnd; cdnaEnd = map.CdnaStart - start + map.GenomicEnd; } else { cdnaStart = start - map.GenomicStart + map.CdnaStart; cdnaEnd = end - map.GenomicEnd + map.CdnaEnd; } return(new Coordinate(cdnaStart, cdnaEnd, false)); }
/// <summary> /// sets both the exon and intron number strings according to which were affected by the variant [BaseTranscriptVariation.pm:474 _exon_intron_number] /// </summary> public static void ExonIntronNumber(CdnaCoordinateMap[] cdnaMaps, SimpleInterval[] introns, bool onReverseStrand, TranscriptAnnotation ta, out string exonNumber, out string intronNumber) { int exonCount = 0; var altAllele = ta.AlternateAllele; var variantInterval = new AnnotationInterval(altAllele.Start, altAllele.End); var overlappedExons = new List <int>(); var overlappedIntrons = new List <int>(); var prevExon = CdnaCoordinateMap.Null(); foreach (var exon in cdnaMaps) { exonCount++; if (variantInterval.Overlaps(exon.GenomicStart, exon.GenomicEnd)) { overlappedExons.Add(exonCount); } if (!prevExon.IsNull) { int intronStart = prevExon.GenomicEnd + 1; int intronEnd = exon.GenomicStart - 1; if (variantInterval.Overlaps(intronStart, intronEnd)) { overlappedIntrons.Add(exonCount - 1); } } prevExon = exon; } exonNumber = GetExonIntronNumber(overlappedExons, cdnaMaps.Length, onReverseStrand); intronNumber = introns != null?GetExonIntronNumber(overlappedIntrons, introns.Length, onReverseStrand) : null; if (overlappedExons.Count > 0) { ta.HasExonOverlap = true; } }
/// <summary> /// get the shorted intron offset from the nearest exon /// </summary> private void GetIntronOffset(CdnaCoordinateMap prevExon, CdnaCoordinateMap exon, int?position, PositionOffset po) { int?upDist = position - prevExon.GenomicEnd; int?downDist = exon.GenomicStart - position; int tempCdnaBegin, tempCdnaEnd; if (upDist < downDist || upDist == downDist && !_transcript.Gene.OnReverseStrand) { // distance to upstream exon is the shortest (or equal and in the positive orientation) TranscriptUtilities.GetCodingDnaEndpoints(_transcript.CdnaMaps, prevExon.GenomicStart, prevExon.GenomicEnd, out tempCdnaBegin, out tempCdnaEnd); if (_transcript.Gene.OnReverseStrand) { po.Position = tempCdnaBegin; po.Offset = -upDist; } else { po.Position = tempCdnaEnd; po.Offset = upDist; } } else { // distance to downstream exon is the shortest TranscriptUtilities.GetCodingDnaEndpoints(_transcript.CdnaMaps, exon.GenomicStart, exon.GenomicEnd, out tempCdnaBegin, out tempCdnaEnd); if (_transcript.Gene.OnReverseStrand) { po.Position = tempCdnaEnd; po.Offset = downDist; } else { po.Position = tempCdnaBegin; po.Offset = -downDist; } } }
public void ExonReadWriteTests() { var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); var exon1 = new CdnaCoordinateMap(100, 200, 10, 20); var exon2 = new CdnaCoordinateMap(100, 200, 11, 21); var exon3 = new CdnaCoordinateMap(105, 201, 12, 15); using (var writer = new ExtendedBinaryWriter(FileUtilities.GetCreateStream(randomPath))) { exon1.Write(writer); exon2.Write(writer); exon3.Write(writer); } using (var reader = new ExtendedBinaryReader(FileUtilities.GetReadStream(randomPath))) { Assert.Equal(exon1, CdnaCoordinateMap.Read(reader)); Assert.Equal(exon2, CdnaCoordinateMap.Read(reader)); Assert.Equal(exon3, CdnaCoordinateMap.Read(reader)); } File.Delete(randomPath); }
/// <summary> /// retrieves the next variantFeature. Returns false if there are no more variants available /// </summary> public Transcript GetLightTranscript() { if (!_hasLists) { throw new GeneralException("No lists have been supplied to the transcript reader."); } // ================ // read the ID line // ================ string line = _reader.ReadLine(); if (line == null) { return(null); } var cols = line.Split('\t'); if (cols.Length != 7) { throw new GeneralException($"Expected 7 columns but found {cols.Length} when parsing the transcript entry: {line}"); } var lineType = cols[0]; var transcriptId = CompactId.Convert(cols[1]); var transcriptVersion = GetMaxVersion(cols[2], cols[1]); var proteinId = CompactId.Convert(cols[3]); var proteinVersion = GetMaxVersion(cols[4], cols[3]); var geneId = cols[5]; var bioType = (BioType)byte.Parse(cols[6]); if (lineType != "Transcript") { throw new GeneralException($"Expected the Transcript lineType, but found: {line}"); } // ======================== // read the transcript info // ======================== line = _reader.ReadLine(); if (line == null) { return(null); } cols = line.Split('\t'); if (cols.Length != 11) { throw new GeneralException($"Expected 11 columns but found {cols.Length} when parsing the transcript info entry: {line}"); } var referenceIndex = ushort.Parse(cols[0]); var start = int.Parse(cols[1]); var end = int.Parse(cols[2]); var codingRegionStart = int.Parse(cols[3]); var codingRegionEnd = int.Parse(cols[4]); var cdnaCodingStart = int.Parse(cols[5]); var cdnaCodingEnd = int.Parse(cols[6]); var totalExonLength = int.Parse(cols[7]); var isCanonical = cols[8] == "Y"; var startExonPhase = GetExonPhase(cols[9]); var gene = GetGene(referenceIndex, start, end, transcriptId.ToString(), geneId).ToGene(); // read the internal indices line = _reader.ReadLine(); if (line == null) { return(null); } cols = line.Split('\t'); if (cols.Length != 4) { throw new GeneralException($"Expected 4 columns but found {cols.Length} when parsing the transcript internal indices: {line}"); } // ReSharper disable once UnusedVariable var cdnaSeqIndex = int.Parse(cols[0]); var peptideSeqIndex = int.Parse(cols[1]); var siftIndex = int.Parse(cols[2]); var polyPhenIndex = int.Parse(cols[3]); // ================================= // read the exons, introns, & miRNAs // ================================= SkipItems("Exons"); var introns = GetItems("Introns", _introns); var peptide = peptideSeqIndex != -1 ? _peptideSeqs[peptideSeqIndex] : null; // ================== // read the cDNA maps // ================== line = _reader.ReadLine(); if (line == null) { return(null); } cols = line.Split('\t'); lineType = cols[0]; var numCdnaMaps = int.Parse(cols[1]); if (lineType != "cDNA maps") { throw new GeneralException($"Expected the cDNA maps lineType, but found: {line}"); } CdnaCoordinateMap[] cdnaMaps = null; if (numCdnaMaps > 0) { cdnaMaps = new CdnaCoordinateMap[numCdnaMaps]; for (int i = 0; i < numCdnaMaps; i++) { line = _reader.ReadLine(); if (line == null) { throw new GeneralException("Found null line while parsing CDNA maps."); } cols = line.Split('\t'); if (cols.Length != 4) { throw new GeneralException($"Expected 4 columns but found {cols.Length} when parsing the cDNA map entry: {line}"); } var genomicStart = int.Parse(cols[0]); var genomicEnd = int.Parse(cols[1]); var cdnaStart = int.Parse(cols[2]); var cdnaEnd = int.Parse(cols[3]); cdnaMaps[i] = new CdnaCoordinateMap(genomicStart, genomicEnd, cdnaStart, cdnaEnd); } } // =============== // read the miRNAs // =============== var microRnas = GetItems("miRNAs", _microRnas); // =================== // put it all together // =================== var codingRegion = new CdnaCoordinateMap(codingRegionStart, codingRegionEnd, cdnaCodingStart, cdnaCodingEnd); var translation = codingRegionStart != -1 ? new Translation(codingRegion, proteinId, proteinVersion, peptide) : null; return(new Transcript(referenceIndex, start, end, transcriptId, transcriptVersion, translation, bioType, gene, totalExonLength, startExonPhase, isCanonical, introns, microRnas, cdnaMaps, siftIndex, polyPhenIndex, Header.TranscriptSource)); }
private static int GetExonLength(CdnaCoordinateMap cdnaMap) { return(cdnaMap.GenomicEnd - cdnaMap.GenomicStart + 1); }
public void ExonToStringTests() { var exon1 = new CdnaCoordinateMap(100, 200, 1, 2); Assert.NotNull(exon1.ToString()); }