Ejemplo n.º 1
0
        public bool CheckForDuplicationForAltAlleleWithinTranscript(ICompressedSequence compressedSequence, Transcript transcript)
        {
            if (VepVariantType != VariantType.insertion)
            {
                return(false);
            }
            int    altAlleleLen = AlternateAllele.Length;
            string compareRegion;

            if (transcript.Gene.OnReverseStrand)
            {
                if (End + altAlleleLen > transcript.End)
                {
                    return(false);
                }
                compareRegion = compressedSequence.Substring(Start - 1, altAlleleLen);
            }
            else
            {
                if (Start - altAlleleLen < transcript.Start)
                {
                    return(false);
                }
                compareRegion = compressedSequence.Substring(End - altAlleleLen, altAlleleLen);
            }

            if (compareRegion == AlternateAllele)
            {
                return(true);
            }
            return(false);
        }
Ejemplo n.º 2
0
        public void CheckForDuplicationForAltAllele(ICompressedSequence compressedSequence)
        {
            if (VepVariantType != VariantType.insertion)
            {
                return;
            }
            int altAlleleLen = AlternateAllele.Length;

            var forwardRegion = compressedSequence.Substring(Start - 1, altAlleleLen);
            var reverseRegion = compressedSequence.Substring(End - altAlleleLen, altAlleleLen);

            _isForwardTranscriptDuplicate = forwardRegion == AlternateAllele;
            _isReverseTranscriptDuplicate = reverseRegion == AlternateAllele;
        }
Ejemplo n.º 3
0
        private void CheckNonCanonicalSplice(int intronIdx, Transcript transcript,
                                             HashSet <LofteeFilter.Filter> filters, ICompressedSequence sequence)
        {
            var intron          = transcript.Introns[intronIdx];
            var startNucleotide = sequence.Substring(intron.Start - 1, 2);
            var endNucleotide   = sequence.Substring(intron.End - 2, 2);
            var onReverseStrand = transcript.Gene.OnReverseStrand;

            if (!onReverseStrand && (startNucleotide != "GT" || endNucleotide != "AG"))
            {
                filters.Add(LofteeFilter.Filter.non_can_splice);
            }

            if (onReverseStrand && (startNucleotide != "CT" || endNucleotide != "AC"))
            {
                filters.Add(LofteeFilter.Filter.non_can_splice);
            }
        }
Ejemplo n.º 4
0
        private static ClinvarVariant GenerateRefAllele(ClinvarVariant variant, ICompressedSequence compressedSequence)
        {
            if (variant == null)
            {
                return(null);
            }
            var extractedRef = compressedSequence.Substring(variant.Start - 1, variant.Stop - variant.Start + 1);

            return(new ClinvarVariant(variant.Chromosome, variant.Start, variant.Stop, extractedRef, variant.AltAllele ?? ""));
        }
Ejemplo n.º 5
0
        private bool ValidateReference(string chromosome, int pos, string refAllele)
        {
            var refIndex = _compressedSequence.Renamer.GetReferenceIndex(chromosome);

            if (refIndex == ChromosomeRenamer.UnknownReferenceIndex)
            {
                return(false);
            }
            _dataFileManager.LoadReference(refIndex, () => { });
            return(_compressedSequence.Substring(pos - 1, refAllele.Length) == refAllele);
        }
Ejemplo n.º 6
0
        private void CheckNonCanonicalSpliceSurr(IAnnotatedTranscript ta, Transcript transcript,
                                                 HashSet <LofteeFilter.Filter> filters, ICompressedSequence sequence)
        {
            if (ta.Exons == null)
            {
                return;
            }
            int affectedExonIndex = Convert.ToInt32(ta.Exons.Split('/').First().Split('-').First());
            var totalExons        = transcript.CdnaMaps.Length;

            string surrDonor    = null;
            string surrAcceptor = null;

            if (totalExons <= 1)
            {
                return;
            }

            var onReverseStrand = transcript.Gene.OnReverseStrand;

            if (affectedExonIndex > 1)
            {
                var intron        = onReverseStrand ? transcript.Introns[totalExons - affectedExonIndex] : transcript.Introns[affectedExonIndex - 2];
                int acceptorStart = onReverseStrand ? intron.Start : intron.End - 1;
                var acceptorSeq   = sequence.Substring(acceptorStart - 1, 2);
                surrAcceptor = onReverseStrand ? SequenceUtilities.GetReverseComplement(acceptorSeq) : acceptorSeq;
            }

            if (affectedExonIndex < totalExons)
            {
                var intron     = onReverseStrand ? transcript.Introns[totalExons - affectedExonIndex - 1] : transcript.Introns[affectedExonIndex - 1];
                int donorStart = onReverseStrand ? intron.End - 1 : intron.Start;
                var donorSeq   = sequence.Substring(donorStart - 1, 2);
                surrDonor = onReverseStrand ? SequenceUtilities.GetReverseComplement(donorSeq) : donorSeq;
            }

            if (surrAcceptor != null && surrAcceptor != "AG" || surrDonor != null && surrDonor != "GT")
            {
                filters.Add(LofteeFilter.Filter.non_can_splice_surr);
            }
        }
Ejemplo n.º 7
0
        /// <summary>
        /// returns the correct start value when retrieving a substring of a substring
        /// where the top level might be reverse complemented
        /// </summary>
        public static string GetSubSubstring(int seqStart, int seqEnd, bool seqOnReverseStrand, int subStart, int subEnd, ICompressedSequence cs)
        {
            var start = seqOnReverseStrand ? seqEnd - subEnd : seqStart + subStart;

            var precedingBases = cs.Substring(start - 1, subEnd - subStart + 1);

            if (seqOnReverseStrand)
            {
                precedingBases = GetReverseComplement(precedingBases);
            }

            return(precedingBases);
        }
Ejemplo n.º 8
0
        private void CheckNagnagSite(Transcript transcript, IAnnotatedAlternateAllele allele,
                                     HashSet <LofteeFilter.Flag> flags, ICompressedSequence sequence)
        {
            if (allele.ReferenceBegin == null || allele.ReferenceEnd == null ||
                allele.ReferenceBegin.Value != allele.ReferenceEnd.Value)
            {
                return;
            }

            int pos = allele.ReferenceBegin.Value;

            string upStreamSeq   = sequence.Substring(pos - 6, 6);
            string downStreamSeq = sequence.Substring(pos, 5);

            var combineSeq = transcript.Gene.OnReverseStrand
                ? SequenceUtilities.GetReverseComplement(upStreamSeq + downStreamSeq)
                : upStreamSeq + downStreamSeq;

            if (Regex.Match(combineSeq, "[A|T|C|G]AG[A|T|C|G]AG").Success)
            {
                flags.Add(LofteeFilter.Flag.nagnag_site);
            }
        }
Ejemplo n.º 9
0
        /// <summary>
        /// extracts the coding sequence corresponding to the listed exons
        /// </summary>
        public string Sequence()
        {
            _sb.Clear();

            // account for the exon phase (forward orientation)
            if (_startExonPhase > 0 && !_geneOnReverseStrand)
            {
                _sb.Append('N', _startExonPhase);
            }

            foreach (var map in _cdnaMaps)
            {
                // handle exons that are entirely in the UTR
                if (map.GenomicEnd < _start || map.GenomicStart > _end)
                {
                    continue;
                }

                int tempBegin = map.GenomicStart;
                int tempEnd   = map.GenomicEnd;

                // trim the first and last exons
                if (_start >= tempBegin && _start <= tempEnd)
                {
                    tempBegin = _start;
                }
                if (_end >= tempBegin && _end <= tempEnd)
                {
                    tempEnd = _end;
                }

                _sb.Append(_sequence.Substring(tempBegin - 1, tempEnd - tempBegin + 1));
            }

            // account for the exon phase (reverse orientation)
            if (_startExonPhase > 0 && _geneOnReverseStrand)
            {
                _sb.Append('N', _startExonPhase);
            }

            return(_geneOnReverseStrand ? SequenceUtilities.GetReverseComplement(_sb.ToString()) : _sb.ToString());
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Retrieves all Exon sequences and concats them together.
        /// This includes 5' UTR + cDNA + 3' UTR [Transcript.pm:862 spliced_seq]
        /// </summary>
        private static string GetSplicedSequence(ICompressedSequence compressedSequence, CdnaCoordinateMap[] cdnaMaps, bool onReverseStrand)
        {
            var sb = new StringBuilder();

            foreach (var exon in cdnaMaps)
            {
                var exonLength = exon.GenomicEnd - exon.GenomicStart + 1;

                // sanity check: handle the situation where no reference has been provided
                if (compressedSequence == null)
                {
                    sb.Append(new string('N', exonLength));
                    continue;
                }

                sb.Append(compressedSequence.Substring(exon.GenomicStart - 1, exonLength));
            }

            return(onReverseStrand ? SequenceUtilities.GetReverseComplement(sb.ToString()) : sb.ToString());
        }
Ejemplo n.º 11
0
        private void CreateVcf(StreamWriter writer, Transcript transcript)
        {
            var geneSymbol = transcript.Gene.Symbol;

            if (!transcript.IsCanonical && _processedGeneSet.Contains(geneSymbol))
            {
                return;
            }
            if (transcript.Translation == null)
            {
                return;
            }
            _processedGeneSet.Add(geneSymbol);

            _dataFileManager.LoadReference(transcript.ReferenceIndex, () => {});

            var position  = (transcript.Translation.CodingRegion.GenomicStart + transcript.Translation.CodingRegion.GenomicEnd) / 2;
            var refAllele = _compressedSequence.Substring(position - 1, 1);
            var altAllele = _nucleotides.First(nuceleotide => nuceleotide != refAllele);

            writer.WriteLine($"{_renamer.UcscReferenceNames[transcript.ReferenceIndex]}\t{position}\t.\t{refAllele}\t{altAllele}\t.\t.\t.");
        }
Ejemplo n.º 12
0
        public void Substring(int offset, int length, string expectedSubstring)
        {
            var observedSubstring = _compressedSequence.Substring(offset, length);

            Assert.Equal(expectedSubstring, observedSubstring);
        }
Ejemplo n.º 13
0
        private SupplementaryPositionCreator GetNextSupplementaryAnnotation()
        {
            // no more active iterators left
            if (_iSupplementaryDataItemList.Count == 0 && _additionalItemsList.Count == 0)
            {
                return(null);
            }

            var minSupplementaryDataItem = CurrentMinSupplementaryDataItem();

            if (minSupplementaryDataItem == null)
            {
                return(null);                                 //nothing more to retun. All enumerators are empty.
            }
            var sa = new SupplementaryAnnotationPosition(minSupplementaryDataItem.Start);


            var saCreator = new SupplementaryPositionCreator(sa)
            {
                RefSeqName = minSupplementaryDataItem.Chromosome
            };

            string refSequence = null;

            if (_currentRefName == null || !_currentRefName.Equals(saCreator.RefSeqName))
            {
                CloseCurrentSaWriter();

                _currentRefName = saCreator.RefSeqName;

                var refIndex = _renamer.GetReferenceIndex(_currentRefName);
                if (refIndex == ChromosomeRenamer.UnknownReferenceIndex)
                {
                    throw new GeneralException($"Could not find the reference index for: {_currentRefName}");
                }
                _dataFileManager.LoadReference(refIndex, () => {});

                OpenNewSaWriter();
            }

            if (_compressedSequence != null)
            {
                refSequence = _compressedSequence.Substring(sa.ReferencePosition - 1, ReferenceWindowSize);
            }
            // list of data items to be removed and added
            var deleteList = new List <IEnumerator <SupplementaryDataItem> >();

            foreach (var iDataEnumerator in _iSupplementaryDataItemList)
            {
                // only using items at the same location as minSuppDataItem
                if (!iDataEnumerator.Current.Equals(minSupplementaryDataItem))
                {
                    continue;
                }

                if (iDataEnumerator.Current.IsInterval)
                {
                    var suppInterval = iDataEnumerator.Current.GetSupplementaryInterval(_renamer);

                    _supplementaryIntervalList.Add(suppInterval);
                }
                else
                {
                    var additionalSuppData = iDataEnumerator.Current.SetSupplementaryAnnotations(saCreator, refSequence);

                    if (additionalSuppData != null)
                    {
                        _additionalItemsList.Add(additionalSuppData);
                    }
                }
                // adding empty enumerators to deleteList
                if (!iDataEnumerator.MoveNext())
                {
                    deleteList.Add(iDataEnumerator);
                }
            }

            // add annotations from additional items if applicable.
            AddAdditionalItems(minSupplementaryDataItem, saCreator);

            // removing lists that are empty and therfore should be removed from the list of enumerators
            _iSupplementaryDataItemList.RemoveAll(x => deleteList.Contains(x));

            return(saCreator);
        }