Пример #1
0
        public void ExonEqualityTests()
        {
            var exon1 = new CdnaCoordinateMap(100, 200, 1, 2);
            var exon2 = new CdnaCoordinateMap(100, 200, 1, 2);

            Assert.Equal(exon1, exon2);
        }
Пример #2
0
        /// <summary>
        /// calculates the cDNA coordinates given the specified genomic coordinates [Transcript.pm:927 cdna_coding_start]
        /// genomic2pep [TransciptMapper:482]
        /// </summary>
        public static void GetCodingDnaEndpoints(CdnaCoordinateMap[] cdnaMaps, int genomicBegin, int genomicEnd, out int cdnaBegin, out int cdnaEnd)
        {
            // find an overlapping mapper pair
            var  coordinateMap = CdnaCoordinateMap.Null();
            bool foundOverlap  = false;

            for (int i = 0; i < cdnaMaps.Length; i++)
            {
                coordinateMap = cdnaMaps[i];

                if (genomicEnd >= coordinateMap.GenomicStart &&
                    genomicBegin <= coordinateMap.GenomicEnd)
                {
                    foundOverlap = true;
                    break;
                }
            }

            if (!foundOverlap)
            {
                throw new GeneralException($"Unable to find an overlapping mapping pair for these genomic coordinates: ({genomicBegin}, {genomicEnd})");
            }

            // calculate the cDNA position
            cdnaBegin = coordinateMap.CdnaEnd - (genomicEnd - coordinateMap.GenomicStart);
            cdnaEnd   = coordinateMap.CdnaEnd - (genomicBegin - coordinateMap.GenomicStart);
        }
Пример #3
0
 private static bool HasUtr(CdnaCoordinateMap exon, int codingRegionStart, int codingRegionEnd)
 {
     if (codingRegionStart == -1 || codingRegionEnd == -1)
     {
         return(false);
     }
     return(exon.GenomicStart < codingRegionStart || exon.GenomicEnd > codingRegionEnd);
 }
Пример #4
0
 private static bool HasCds(CdnaCoordinateMap exon, int codingRegionStart, int codingRegionEnd)
 {
     if (codingRegionStart == -1 || codingRegionEnd == -1)
     {
         return(false);
     }
     return(Overlap.Partial(exon.GenomicStart, exon.GenomicEnd, codingRegionStart, codingRegionEnd));
 }
Пример #5
0
        private void WriteExon(TextWriter writer, string ucscReferenceName, Transcript transcript,
                               CdnaCoordinateMap exon, int exonIndex, byte exonPhase)
        {
            var strand = transcript.Gene.OnReverseStrand ? '-' : '+';

            // write the exon entry
            WriteExonEntry(writer, ucscReferenceName, transcript, "exon", exon.GenomicStart, exon.GenomicEnd, strand, exonIndex, exonPhase);
            if (transcript.Translation == null)
            {
                return;
            }

            var codingRegion = transcript.Translation.CodingRegion;

            // write the CDS entry
            if (HasCds(exon, codingRegion.GenomicStart, codingRegion.GenomicEnd))
            {
                int cdsStart, cdsEnd;
                GetCdsCoordinates(exon, codingRegion.GenomicStart, codingRegion.GenomicEnd, out cdsStart,
                                  out cdsEnd);
                WriteExonEntry(writer, ucscReferenceName, transcript, "CDS", cdsStart, cdsEnd, strand, exonIndex, exonPhase);
            }

            // write the UTR entry
            if (HasUtr(exon, codingRegion.GenomicStart, codingRegion.GenomicEnd))
            {
                // check before CDS
                if (exon.GenomicStart < codingRegion.GenomicStart)
                {
                    int utrEnd = codingRegion.GenomicStart - 1;
                    if (utrEnd > exon.GenomicEnd)
                    {
                        utrEnd = exon.GenomicEnd;
                    }
                    WriteExonEntry(writer, ucscReferenceName, transcript, "UTR", exon.GenomicStart, utrEnd, strand,
                                   exonIndex, exonPhase);
                }

                // check after CDS
                if (exon.GenomicEnd > codingRegion.GenomicEnd)
                {
                    int utrStart = codingRegion.GenomicEnd + 1;
                    if (utrStart < exon.GenomicStart)
                    {
                        utrStart = exon.GenomicStart;
                    }
                    WriteExonEntry(writer, ucscReferenceName, transcript, "UTR", utrStart, exon.GenomicEnd, strand,
                                   exonIndex, exonPhase);
                }
            }
        }
Пример #6
0
        private static void GetCdsCoordinates(CdnaCoordinateMap exon, int codingRegionStart, int codingRegionEnd, out int cdsStart,
                                              out int cdsEnd)
        {
            cdsStart = exon.GenomicStart;
            cdsEnd   = exon.GenomicEnd;

            if (cdsStart < codingRegionStart)
            {
                cdsStart = codingRegionStart;
            }
            if (cdsEnd > codingRegionEnd)
            {
                cdsEnd = codingRegionEnd;
            }
        }
Пример #7
0
        private static Coordinate ConvertGenomicPosToCdnaPos(int start, int end, CdnaCoordinateMap map, bool onReverseStrand)
        {
            int cdnaStart;
            int cdnaEnd;

            if (onReverseStrand)
            {
                cdnaStart = map.CdnaStart - end + map.GenomicEnd;
                cdnaEnd   = map.CdnaStart - start + map.GenomicEnd;
            }
            else
            {
                cdnaStart = start - map.GenomicStart + map.CdnaStart;
                cdnaEnd   = end - map.GenomicEnd + map.CdnaEnd;
            }

            return(new Coordinate(cdnaStart, cdnaEnd, false));
        }
Пример #8
0
        /// <summary>
        /// sets both the exon and intron number strings according to which were affected by the variant [BaseTranscriptVariation.pm:474 _exon_intron_number]
        /// </summary>
        public static void ExonIntronNumber(CdnaCoordinateMap[] cdnaMaps, SimpleInterval[] introns, bool onReverseStrand,
                                            TranscriptAnnotation ta, out string exonNumber, out string intronNumber)
        {
            int exonCount = 0;

            var altAllele       = ta.AlternateAllele;
            var variantInterval = new AnnotationInterval(altAllele.Start, altAllele.End);

            var overlappedExons   = new List <int>();
            var overlappedIntrons = new List <int>();

            var prevExon = CdnaCoordinateMap.Null();

            foreach (var exon in cdnaMaps)
            {
                exonCount++;

                if (variantInterval.Overlaps(exon.GenomicStart, exon.GenomicEnd))
                {
                    overlappedExons.Add(exonCount);
                }

                if (!prevExon.IsNull)
                {
                    int intronStart = prevExon.GenomicEnd + 1;
                    int intronEnd   = exon.GenomicStart - 1;

                    if (variantInterval.Overlaps(intronStart, intronEnd))
                    {
                        overlappedIntrons.Add(exonCount - 1);
                    }
                }

                prevExon = exon;
            }

            exonNumber   = GetExonIntronNumber(overlappedExons, cdnaMaps.Length, onReverseStrand);
            intronNumber = introns != null?GetExonIntronNumber(overlappedIntrons, introns.Length, onReverseStrand) : null;

            if (overlappedExons.Count > 0)
            {
                ta.HasExonOverlap = true;
            }
        }
Пример #9
0
        /// <summary>
        /// get the shorted intron offset from the nearest exon
        /// </summary>
        private void GetIntronOffset(CdnaCoordinateMap prevExon, CdnaCoordinateMap exon, int?position, PositionOffset po)
        {
            int?upDist   = position - prevExon.GenomicEnd;
            int?downDist = exon.GenomicStart - position;

            int tempCdnaBegin, tempCdnaEnd;

            if (upDist < downDist || upDist == downDist && !_transcript.Gene.OnReverseStrand)
            {
                // distance to upstream exon is the shortest (or equal and in the positive orientation)
                TranscriptUtilities.GetCodingDnaEndpoints(_transcript.CdnaMaps, prevExon.GenomicStart,
                                                          prevExon.GenomicEnd, out tempCdnaBegin, out tempCdnaEnd);

                if (_transcript.Gene.OnReverseStrand)
                {
                    po.Position = tempCdnaBegin;
                    po.Offset   = -upDist;
                }
                else
                {
                    po.Position = tempCdnaEnd;
                    po.Offset   = upDist;
                }
            }
            else
            {
                // distance to downstream exon is the shortest
                TranscriptUtilities.GetCodingDnaEndpoints(_transcript.CdnaMaps, exon.GenomicStart, exon.GenomicEnd,
                                                          out tempCdnaBegin, out tempCdnaEnd);

                if (_transcript.Gene.OnReverseStrand)
                {
                    po.Position = tempCdnaEnd;
                    po.Offset   = downDist;
                }
                else
                {
                    po.Position = tempCdnaBegin;
                    po.Offset   = -downDist;
                }
            }
        }
Пример #10
0
        public void ExonReadWriteTests()
        {
            var randomPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            var exon1 = new CdnaCoordinateMap(100, 200, 10, 20);
            var exon2 = new CdnaCoordinateMap(100, 200, 11, 21);
            var exon3 = new CdnaCoordinateMap(105, 201, 12, 15);

            using (var writer = new ExtendedBinaryWriter(FileUtilities.GetCreateStream(randomPath)))
            {
                exon1.Write(writer);
                exon2.Write(writer);
                exon3.Write(writer);
            }

            using (var reader = new ExtendedBinaryReader(FileUtilities.GetReadStream(randomPath)))
            {
                Assert.Equal(exon1, CdnaCoordinateMap.Read(reader));
                Assert.Equal(exon2, CdnaCoordinateMap.Read(reader));
                Assert.Equal(exon3, CdnaCoordinateMap.Read(reader));
            }

            File.Delete(randomPath);
        }
Пример #11
0
        /// <summary>
        /// retrieves the next variantFeature. Returns false if there are no more variants available
        /// </summary>
        public Transcript GetLightTranscript()
        {
            if (!_hasLists)
            {
                throw new GeneralException("No lists have been supplied to the transcript reader.");
            }

            // ================
            // read the ID line
            // ================

            string line = _reader.ReadLine();

            if (line == null)
            {
                return(null);
            }

            var cols = line.Split('\t');

            if (cols.Length != 7)
            {
                throw new GeneralException($"Expected 7 columns but found {cols.Length} when parsing the transcript entry: {line}");
            }

            var lineType          = cols[0];
            var transcriptId      = CompactId.Convert(cols[1]);
            var transcriptVersion = GetMaxVersion(cols[2], cols[1]);
            var proteinId         = CompactId.Convert(cols[3]);
            var proteinVersion    = GetMaxVersion(cols[4], cols[3]);
            var geneId            = cols[5];
            var bioType           = (BioType)byte.Parse(cols[6]);

            if (lineType != "Transcript")
            {
                throw new GeneralException($"Expected the Transcript lineType, but found: {line}");
            }

            // ========================
            // read the transcript info
            // ========================

            line = _reader.ReadLine();
            if (line == null)
            {
                return(null);
            }

            cols = line.Split('\t');
            if (cols.Length != 11)
            {
                throw new GeneralException($"Expected 11 columns but found {cols.Length} when parsing the transcript info entry: {line}");
            }

            var referenceIndex    = ushort.Parse(cols[0]);
            var start             = int.Parse(cols[1]);
            var end               = int.Parse(cols[2]);
            var codingRegionStart = int.Parse(cols[3]);
            var codingRegionEnd   = int.Parse(cols[4]);
            var cdnaCodingStart   = int.Parse(cols[5]);
            var cdnaCodingEnd     = int.Parse(cols[6]);
            var totalExonLength   = int.Parse(cols[7]);
            var isCanonical       = cols[8] == "Y";
            var startExonPhase    = GetExonPhase(cols[9]);

            var gene = GetGene(referenceIndex, start, end, transcriptId.ToString(), geneId).ToGene();

            // read the internal indices
            line = _reader.ReadLine();
            if (line == null)
            {
                return(null);
            }

            cols = line.Split('\t');
            if (cols.Length != 4)
            {
                throw new GeneralException($"Expected 4 columns but found {cols.Length} when parsing the transcript internal indices: {line}");
            }

            // ReSharper disable once UnusedVariable
            var cdnaSeqIndex    = int.Parse(cols[0]);
            var peptideSeqIndex = int.Parse(cols[1]);
            var siftIndex       = int.Parse(cols[2]);
            var polyPhenIndex   = int.Parse(cols[3]);

            // =================================
            // read the exons, introns, & miRNAs
            // =================================

            SkipItems("Exons");
            var introns = GetItems("Introns", _introns);
            var peptide = peptideSeqIndex != -1 ? _peptideSeqs[peptideSeqIndex] : null;

            // ==================
            // read the cDNA maps
            // ==================

            line = _reader.ReadLine();
            if (line == null)
            {
                return(null);
            }

            cols = line.Split('\t');

            lineType = cols[0];
            var numCdnaMaps = int.Parse(cols[1]);

            if (lineType != "cDNA maps")
            {
                throw new GeneralException($"Expected the cDNA maps lineType, but found: {line}");
            }

            CdnaCoordinateMap[] cdnaMaps = null;

            if (numCdnaMaps > 0)
            {
                cdnaMaps = new CdnaCoordinateMap[numCdnaMaps];

                for (int i = 0; i < numCdnaMaps; i++)
                {
                    line = _reader.ReadLine();
                    if (line == null)
                    {
                        throw new GeneralException("Found null line while parsing CDNA maps.");
                    }

                    cols = line.Split('\t');

                    if (cols.Length != 4)
                    {
                        throw new GeneralException($"Expected 4 columns but found {cols.Length} when parsing the cDNA map entry: {line}");
                    }

                    var genomicStart = int.Parse(cols[0]);
                    var genomicEnd   = int.Parse(cols[1]);
                    var cdnaStart    = int.Parse(cols[2]);
                    var cdnaEnd      = int.Parse(cols[3]);

                    cdnaMaps[i] = new CdnaCoordinateMap(genomicStart, genomicEnd, cdnaStart, cdnaEnd);
                }
            }

            // ===============
            // read the miRNAs
            // ===============

            var microRnas = GetItems("miRNAs", _microRnas);

            // ===================
            // put it all together
            // ===================

            var codingRegion = new CdnaCoordinateMap(codingRegionStart, codingRegionEnd, cdnaCodingStart, cdnaCodingEnd);

            var translation = codingRegionStart != -1
                ? new Translation(codingRegion, proteinId, proteinVersion, peptide)
                : null;

            return(new Transcript(referenceIndex, start, end, transcriptId, transcriptVersion, translation, bioType,
                                  gene, totalExonLength, startExonPhase, isCanonical, introns, microRnas, cdnaMaps, siftIndex,
                                  polyPhenIndex, Header.TranscriptSource));
        }
Пример #12
0
 private static int GetExonLength(CdnaCoordinateMap cdnaMap)
 {
     return(cdnaMap.GenomicEnd - cdnaMap.GenomicStart + 1);
 }
Пример #13
0
        public void ExonToStringTests()
        {
            var exon1 = new CdnaCoordinateMap(100, 200, 1, 2);

            Assert.NotNull(exon1.ToString());
        }