public bool CheckForDuplicationForAltAlleleWithinTranscript(ICompressedSequence compressedSequence, Transcript transcript) { if (VepVariantType != VariantType.insertion) { return(false); } int altAlleleLen = AlternateAllele.Length; string compareRegion; if (transcript.Gene.OnReverseStrand) { if (End + altAlleleLen > transcript.End) { return(false); } compareRegion = compressedSequence.Substring(Start - 1, altAlleleLen); } else { if (Start - altAlleleLen < transcript.Start) { return(false); } compareRegion = compressedSequence.Substring(End - altAlleleLen, altAlleleLen); } if (compareRegion == AlternateAllele) { return(true); } return(false); }
public void CheckForDuplicationForAltAllele(ICompressedSequence compressedSequence) { if (VepVariantType != VariantType.insertion) { return; } int altAlleleLen = AlternateAllele.Length; var forwardRegion = compressedSequence.Substring(Start - 1, altAlleleLen); var reverseRegion = compressedSequence.Substring(End - altAlleleLen, altAlleleLen); _isForwardTranscriptDuplicate = forwardRegion == AlternateAllele; _isReverseTranscriptDuplicate = reverseRegion == AlternateAllele; }
private void CheckNonCanonicalSplice(int intronIdx, Transcript transcript, HashSet <LofteeFilter.Filter> filters, ICompressedSequence sequence) { var intron = transcript.Introns[intronIdx]; var startNucleotide = sequence.Substring(intron.Start - 1, 2); var endNucleotide = sequence.Substring(intron.End - 2, 2); var onReverseStrand = transcript.Gene.OnReverseStrand; if (!onReverseStrand && (startNucleotide != "GT" || endNucleotide != "AG")) { filters.Add(LofteeFilter.Filter.non_can_splice); } if (onReverseStrand && (startNucleotide != "CT" || endNucleotide != "AC")) { filters.Add(LofteeFilter.Filter.non_can_splice); } }
private static ClinvarVariant GenerateRefAllele(ClinvarVariant variant, ICompressedSequence compressedSequence) { if (variant == null) { return(null); } var extractedRef = compressedSequence.Substring(variant.Start - 1, variant.Stop - variant.Start + 1); return(new ClinvarVariant(variant.Chromosome, variant.Start, variant.Stop, extractedRef, variant.AltAllele ?? "")); }
private bool ValidateReference(string chromosome, int pos, string refAllele) { var refIndex = _compressedSequence.Renamer.GetReferenceIndex(chromosome); if (refIndex == ChromosomeRenamer.UnknownReferenceIndex) { return(false); } _dataFileManager.LoadReference(refIndex, () => { }); return(_compressedSequence.Substring(pos - 1, refAllele.Length) == refAllele); }
private void CheckNonCanonicalSpliceSurr(IAnnotatedTranscript ta, Transcript transcript, HashSet <LofteeFilter.Filter> filters, ICompressedSequence sequence) { if (ta.Exons == null) { return; } int affectedExonIndex = Convert.ToInt32(ta.Exons.Split('/').First().Split('-').First()); var totalExons = transcript.CdnaMaps.Length; string surrDonor = null; string surrAcceptor = null; if (totalExons <= 1) { return; } var onReverseStrand = transcript.Gene.OnReverseStrand; if (affectedExonIndex > 1) { var intron = onReverseStrand ? transcript.Introns[totalExons - affectedExonIndex] : transcript.Introns[affectedExonIndex - 2]; int acceptorStart = onReverseStrand ? intron.Start : intron.End - 1; var acceptorSeq = sequence.Substring(acceptorStart - 1, 2); surrAcceptor = onReverseStrand ? SequenceUtilities.GetReverseComplement(acceptorSeq) : acceptorSeq; } if (affectedExonIndex < totalExons) { var intron = onReverseStrand ? transcript.Introns[totalExons - affectedExonIndex - 1] : transcript.Introns[affectedExonIndex - 1]; int donorStart = onReverseStrand ? intron.End - 1 : intron.Start; var donorSeq = sequence.Substring(donorStart - 1, 2); surrDonor = onReverseStrand ? SequenceUtilities.GetReverseComplement(donorSeq) : donorSeq; } if (surrAcceptor != null && surrAcceptor != "AG" || surrDonor != null && surrDonor != "GT") { filters.Add(LofteeFilter.Filter.non_can_splice_surr); } }
/// <summary> /// returns the correct start value when retrieving a substring of a substring /// where the top level might be reverse complemented /// </summary> public static string GetSubSubstring(int seqStart, int seqEnd, bool seqOnReverseStrand, int subStart, int subEnd, ICompressedSequence cs) { var start = seqOnReverseStrand ? seqEnd - subEnd : seqStart + subStart; var precedingBases = cs.Substring(start - 1, subEnd - subStart + 1); if (seqOnReverseStrand) { precedingBases = GetReverseComplement(precedingBases); } return(precedingBases); }
private void CheckNagnagSite(Transcript transcript, IAnnotatedAlternateAllele allele, HashSet <LofteeFilter.Flag> flags, ICompressedSequence sequence) { if (allele.ReferenceBegin == null || allele.ReferenceEnd == null || allele.ReferenceBegin.Value != allele.ReferenceEnd.Value) { return; } int pos = allele.ReferenceBegin.Value; string upStreamSeq = sequence.Substring(pos - 6, 6); string downStreamSeq = sequence.Substring(pos, 5); var combineSeq = transcript.Gene.OnReverseStrand ? SequenceUtilities.GetReverseComplement(upStreamSeq + downStreamSeq) : upStreamSeq + downStreamSeq; if (Regex.Match(combineSeq, "[A|T|C|G]AG[A|T|C|G]AG").Success) { flags.Add(LofteeFilter.Flag.nagnag_site); } }
/// <summary> /// extracts the coding sequence corresponding to the listed exons /// </summary> public string Sequence() { _sb.Clear(); // account for the exon phase (forward orientation) if (_startExonPhase > 0 && !_geneOnReverseStrand) { _sb.Append('N', _startExonPhase); } foreach (var map in _cdnaMaps) { // handle exons that are entirely in the UTR if (map.GenomicEnd < _start || map.GenomicStart > _end) { continue; } int tempBegin = map.GenomicStart; int tempEnd = map.GenomicEnd; // trim the first and last exons if (_start >= tempBegin && _start <= tempEnd) { tempBegin = _start; } if (_end >= tempBegin && _end <= tempEnd) { tempEnd = _end; } _sb.Append(_sequence.Substring(tempBegin - 1, tempEnd - tempBegin + 1)); } // account for the exon phase (reverse orientation) if (_startExonPhase > 0 && _geneOnReverseStrand) { _sb.Append('N', _startExonPhase); } return(_geneOnReverseStrand ? SequenceUtilities.GetReverseComplement(_sb.ToString()) : _sb.ToString()); }
/// <summary> /// Retrieves all Exon sequences and concats them together. /// This includes 5' UTR + cDNA + 3' UTR [Transcript.pm:862 spliced_seq] /// </summary> private static string GetSplicedSequence(ICompressedSequence compressedSequence, CdnaCoordinateMap[] cdnaMaps, bool onReverseStrand) { var sb = new StringBuilder(); foreach (var exon in cdnaMaps) { var exonLength = exon.GenomicEnd - exon.GenomicStart + 1; // sanity check: handle the situation where no reference has been provided if (compressedSequence == null) { sb.Append(new string('N', exonLength)); continue; } sb.Append(compressedSequence.Substring(exon.GenomicStart - 1, exonLength)); } return(onReverseStrand ? SequenceUtilities.GetReverseComplement(sb.ToString()) : sb.ToString()); }
private void CreateVcf(StreamWriter writer, Transcript transcript) { var geneSymbol = transcript.Gene.Symbol; if (!transcript.IsCanonical && _processedGeneSet.Contains(geneSymbol)) { return; } if (transcript.Translation == null) { return; } _processedGeneSet.Add(geneSymbol); _dataFileManager.LoadReference(transcript.ReferenceIndex, () => {}); var position = (transcript.Translation.CodingRegion.GenomicStart + transcript.Translation.CodingRegion.GenomicEnd) / 2; var refAllele = _compressedSequence.Substring(position - 1, 1); var altAllele = _nucleotides.First(nuceleotide => nuceleotide != refAllele); writer.WriteLine($"{_renamer.UcscReferenceNames[transcript.ReferenceIndex]}\t{position}\t.\t{refAllele}\t{altAllele}\t.\t.\t."); }
public void Substring(int offset, int length, string expectedSubstring) { var observedSubstring = _compressedSequence.Substring(offset, length); Assert.Equal(expectedSubstring, observedSubstring); }
private SupplementaryPositionCreator GetNextSupplementaryAnnotation() { // no more active iterators left if (_iSupplementaryDataItemList.Count == 0 && _additionalItemsList.Count == 0) { return(null); } var minSupplementaryDataItem = CurrentMinSupplementaryDataItem(); if (minSupplementaryDataItem == null) { return(null); //nothing more to retun. All enumerators are empty. } var sa = new SupplementaryAnnotationPosition(minSupplementaryDataItem.Start); var saCreator = new SupplementaryPositionCreator(sa) { RefSeqName = minSupplementaryDataItem.Chromosome }; string refSequence = null; if (_currentRefName == null || !_currentRefName.Equals(saCreator.RefSeqName)) { CloseCurrentSaWriter(); _currentRefName = saCreator.RefSeqName; var refIndex = _renamer.GetReferenceIndex(_currentRefName); if (refIndex == ChromosomeRenamer.UnknownReferenceIndex) { throw new GeneralException($"Could not find the reference index for: {_currentRefName}"); } _dataFileManager.LoadReference(refIndex, () => {}); OpenNewSaWriter(); } if (_compressedSequence != null) { refSequence = _compressedSequence.Substring(sa.ReferencePosition - 1, ReferenceWindowSize); } // list of data items to be removed and added var deleteList = new List <IEnumerator <SupplementaryDataItem> >(); foreach (var iDataEnumerator in _iSupplementaryDataItemList) { // only using items at the same location as minSuppDataItem if (!iDataEnumerator.Current.Equals(minSupplementaryDataItem)) { continue; } if (iDataEnumerator.Current.IsInterval) { var suppInterval = iDataEnumerator.Current.GetSupplementaryInterval(_renamer); _supplementaryIntervalList.Add(suppInterval); } else { var additionalSuppData = iDataEnumerator.Current.SetSupplementaryAnnotations(saCreator, refSequence); if (additionalSuppData != null) { _additionalItemsList.Add(additionalSuppData); } } // adding empty enumerators to deleteList if (!iDataEnumerator.MoveNext()) { deleteList.Add(iDataEnumerator); } } // add annotations from additional items if applicable. AddAdditionalItems(minSupplementaryDataItem, saCreator); // removing lists that are empty and therfore should be removed from the list of enumerators _iSupplementaryDataItemList.RemoveAll(x => deleteList.Contains(x)); return(saCreator); }