private void AnnotateAltAllele(VariantFeature variant, VariantAlternateAllele altAllele, Transcript transcript) { // handle upstream or downstream transcripts if (!Overlap.Partial(transcript.Start, transcript.End, altAllele.Start, altAllele.End)) { return; } var ta = new TranscriptAnnotation { AlternateAllele = altAllele, HasValidCdnaCodingStart = false, HasValidCdsStart = false }; MapCdnaCoordinates(transcript, ta, altAllele); _pianoVariant.CreateAnnotationObject(transcript, altAllele); GetCodingAnnotations(transcript, ta, _compressedSequence); var consequence = new Consequences(new VariantEffect(ta, transcript, variant.InternalCopyNumberType)); consequence.DetermineVariantEffects(variant.InternalCopyNumberType); _pianoVariant.FinalizeAndAddAnnotationObject(transcript, ta, consequence.GetConsequenceStrings()); }
public void AddVariantData(VariantFeature variant) { //add CytoGenetic band CytogeneticBand = variant.CytogeneticBand; // populating supplementary interval specific fields PopulateSuppIntervalFields(variant.GetSupplementaryIntervals()); foreach (var altAllele in variant.AlternateAlleles) { var jsonVariant = new JsonVariant(altAllele, variant); AnnotatedAlternateAlleles.Add(jsonVariant); // custom intervals are not part of SA as they are a separate data structure AddCustomIntervals(altAllele, jsonVariant); if (altAllele.SupplementaryAnnotationPosition == null) { continue; } var sa = altAllele.SupplementaryAnnotationPosition; sa.AddSaPositionToVariant(jsonVariant); } }
private void AddTranscriptToVariant(VariantFeature variant, Transcript transcript) { foreach (var altAllele in variant.AlternateAlleles) { AnnotateAltAllele(variant, altAllele, transcript); } }
/// <summary> /// constructor /// </summary> public HgvsCodingNomenclature(TranscriptAnnotation ta, Transcript transcript, VariantFeature variant, ICompressedSequence compressedSequence, bool isGenomicDuplicate) { _ta = ta; _transcript = transcript; _variant = variant; _compressedSequence = compressedSequence; _isGenomicDuplicate = isGenomicDuplicate; _sb = new StringBuilder(); // get reference sequence strand var transcriptOnReverseStrand = transcript.Gene.OnReverseStrand; // this may be different to the input one for insertions/deletions var altAllele = ta.AlternateAllele; string variationFeatureSequence = altAllele.AlternateAllele; // get the reverse complement of the vfs if needed if (transcriptOnReverseStrand) { variationFeatureSequence = SequenceUtilities.GetReverseComplement(variationFeatureSequence); } // calculate the reference start and end GetReferenceCoordinates(transcript, altAllele, out _hgvsStart, out _hgvsEnd); // decide event type from HGVS nomenclature _hgvsNotation = new HgvsNotation(ta.TranscriptReferenceAllele, variationFeatureSequence, FormatUtilities.CombineIdAndVersion(transcript.Id, transcript.Version), _hgvsStart, _hgvsEnd, _transcript.Translation != null); }
public void AddVariantData(VariantFeature variant) { foreach (var altAllele in variant.AlternateAlleles) { var pianoAllele = new PianoAllele(altAllele); PianoAlleles.Add(pianoAllele); } }
public PianoVariant(VariantFeature variant) { ReferenceName = variant.ReferenceName; ReferenceBegin = variant.VcfReferenceBegin; ReferenceAllele = variant.VcfColumns[VcfCommon.RefIndex].ToUpperInvariant(); AlternateAlleles = variant.AlternateAlleles[0].NirvanaVariantType == VariantType.translocation_breakend ? variant.VcfColumns[VcfCommon.AltIndex].Split(',') : variant.VcfColumns[VcfCommon.AltIndex].ToUpperInvariant().Split(','); PianoAlleles = new List <PianoAllele>(); }
public JsonVariant(VariantAlternateAllele altAllele, VariantFeature variant) : this(altAllele) { IsReferenceMinor = variant.IsRefMinor; IsReference = variant.IsReference; IsReferenceNoCall = variant.IsRefNoCall; PhylopScore = altAllele.ConservationScore; ReferenceName = variant.ReferenceName; // change the ref and alternate allele for ref minor RefAllele = variant.IsRefMinor ? altAllele.AlternateAllele : altAllele.ReferenceAllele; AltAllele = variant.IsRefMinor ? null : altAllele.AlternateAllele; }
/// <summary> /// returns the number of transcripts within flanking distance /// </summary> private bool HasOverlap(VariantFeature variant) { // ReSharper disable once LoopCanBeConvertedToQuery foreach (var transcript in OverlappingTranscripts) { var overlapInterval = new AnnotationInterval(transcript.Start - FlankingLength, transcript.End + FlankingLength); if (variant.AlternateAlleles.Any(altAllele => overlapInterval.Overlaps(altAllele.Start, altAllele.End))) { return(true); } } return(false); }
public void AddProteinChangeEffect(VariantFeature variant) { if (variant.SiftPrediction != null) { _currTranscript.SiftPrediction = variant.SiftPrediction; _currTranscript.SiftScore = variant.SiftScore; } if (variant.PolyPhenPrediction != null) { _currTranscript.PolyPhenPrediction = variant.PolyPhenPrediction; _currTranscript.PolyPhenScore = variant.PolyPhenScore; } }
/// <summary> /// constructor /// </summary> public HgvsProteinNomenclature(VariantEffect variantEffect, TranscriptAnnotation ta, Transcript transcript, VariantFeature variant, ICompressedSequence compressedSequence, AminoAcids aminoAcids) { _variantEffect = variantEffect; _ta = ta; _transcript = transcript; _variant = variant; _compressedSequence = compressedSequence; _aminoAcids = aminoAcids; _hgvsNotation = new HgvsNotation(_ta.ReferenceAminoAcids, _ta.AlternateAminoAcids, FormatUtilities.CombineIdAndVersion(_transcript.Translation.ProteinId, _transcript.Translation.ProteinVersion), _ta.ProteinBegin, _ta.ProteinEnd); }
private static Tuple <ushort, int, int> GetTuple(string vcfLine, ChromosomeRenamer renamer, int flankingLength = 0) { var fields = vcfLine.Split('\t'); if (fields.Length < VcfCommon.MinNumColumns) { throw new GeneralException($"Expected at least {VcfCommon.MinNumColumns} fields in the vcf string: [{vcfLine}]"); } var vcfVariant = new VcfVariant(fields, vcfLine, false); var variant = new VariantFeature(vcfVariant, renamer, new VID()); return(new Tuple <ushort, int, int>(variant.ReferenceIndex, variant.VcfReferenceBegin - flankingLength, variant.VcfReferenceEnd + flankingLength)); }
internal static VariantFeature GetVariant(string vcfLine, ChromosomeRenamer renamer, bool isGatkGenomeVcf = false) { if (string.IsNullOrEmpty(vcfLine)) { return(null); } var fields = vcfLine.Split('\t'); if (fields.Length < VcfCommon.MinNumColumns) { return(null); } var variant = new VariantFeature(GetVcfVariant(vcfLine, isGatkGenomeVcf), renamer, new VID()); variant.AssignAlternateAlleles(); return(variant); }
private void Annotate(VariantFeature variant) { if (variant.IsReference) { return; } if (variant.UcscReferenceName == "chrM" && !_enableMitochondrialAnnotation) { return; } if (variant.IsStructuralVariant) { return; } _pianoVariant.AddVariantData(variant); GetOverlappingTranscripts(variant); if (!HasOverlap(variant)) { return; } // check each allele to see if it is a genomic duplicate if (_compressedSequence != null) { variant.CheckForGenomicDuplicates(_compressedSequence); } // setting the protein coding scheme _aminoAcids.CodonConversionScheme = variant.UcscReferenceName == "chrM" ? AminoAcids.CodonConversion.HumanMitochondria : AminoAcids.CodonConversion.HumanChromosome; foreach (var transcript in OverlappingTranscripts) { AddTranscriptToVariant(variant, transcript); } }
public PianoVariant Annotate(IVariant variant) { if (variant == null) { return(null); } var variantFeature = new VariantFeature(variant as VcfVariant, _renamer, _vid); // load the reference sequence _dataFileManager.LoadReference(variantFeature.ReferenceIndex, () => {}); // handle ref no-calls and assign the alternate alleles variantFeature.AssignAlternateAlleles(); // annotate the variant _pianoVariant = new PianoVariant(variantFeature); Annotate(variantFeature); _performanceMetrics.Increment(); return(_pianoVariant); }
// constructor public UnifiedJson(VariantFeature variant) { ReferenceName = variant.ReferenceName; ReferenceBegin = variant.VcfReferenceBegin; _referenceEnd = variant.VcfReferenceEnd; ReferenceAllele = variant.VcfColumns[VcfCommon.RefIndex].ToUpperInvariant(); AlternateAlleles = variant.AlternateAlleles[0].NirvanaVariantType == VariantType.translocation_breakend ? variant.VcfColumns[VcfCommon.AltIndex].Split(',') : variant.VcfColumns[VcfCommon.AltIndex].ToUpperInvariant().Split(','); Quality = variant.VcfColumns[VcfCommon.QualIndex]; Filters = variant.VcfColumns[VcfCommon.FilterIndex].Split(';'); StrandBias = variant.StrandBias?.ToString(CultureInfo.InvariantCulture); JointSomaticNormalQuality = variant.JointSomaticNormalQuality?.ToString(); RecalibratedQuality = variant.RecalibratedQuality?.ToString(); CopyNumber = variant.CopyNumber?.ToString(); InfoFromVcf = variant.VcfColumns[VcfCommon.InfoIndex]; AnnotatedAlternateAlleles = new List <IAnnotatedAlternateAllele>(); AnnotatedSamples = variant.ExtractSampleInfo(); ColocalizedWithCnv = variant.ColocalizedWithCnv; CiPos = variant.CiPos; CiEnd = variant.CiEnd; SvLength = variant.SvLength; }
private void GetOverlappingTranscripts(VariantFeature variant) { _transcriptIntervalForest.GetAllOverlappingValues(variant.ReferenceIndex, variant.OverlapReferenceBegin - FlankingLength, variant.OverlapReferenceEnd + FlankingLength, OverlappingTranscripts); }
public void GenotypeIndexes() { var genotypeIndicies = new List <int>(); // 0/0 genotypeIndicies.Clear(); VariantFeature.GetGenotypeIndices("0/0", genotypeIndicies); Assert.Equal(2, genotypeIndicies.Count); Assert.Equal(0, genotypeIndicies[0]); Assert.Equal(0, genotypeIndicies[1]); // 0/1 genotypeIndicies.Clear(); VariantFeature.GetGenotypeIndices("0/1", genotypeIndicies); Assert.Equal(2, genotypeIndicies.Count); Assert.Equal(0, genotypeIndicies[0]); Assert.Equal(1, genotypeIndicies[1]); // 1/1 genotypeIndicies.Clear(); VariantFeature.GetGenotypeIndices("1/1", genotypeIndicies); Assert.Equal(2, genotypeIndicies.Count); Assert.Equal(1, genotypeIndicies[0]); Assert.Equal(1, genotypeIndicies[1]); // 1/2 genotypeIndicies.Clear(); VariantFeature.GetGenotypeIndices("1/2", genotypeIndicies); Assert.Equal(2, genotypeIndicies.Count); Assert.Equal(1, genotypeIndicies[0]); Assert.Equal(2, genotypeIndicies[1]); // 0 genotypeIndicies.Clear(); VariantFeature.GetGenotypeIndices("0", genotypeIndicies); Assert.Equal(1, genotypeIndicies.Count); Assert.Equal(0, genotypeIndicies[0]); // 1 genotypeIndicies.Clear(); VariantFeature.GetGenotypeIndices("1", genotypeIndicies); Assert.Equal(1, genotypeIndicies.Count); Assert.Equal(1, genotypeIndicies[0]); // 2|3 genotypeIndicies.Clear(); VariantFeature.GetGenotypeIndices("2|3", genotypeIndicies); Assert.Equal(2, genotypeIndicies.Count); Assert.Equal(2, genotypeIndicies[0]); Assert.Equal(3, genotypeIndicies[1]); // ./0 genotypeIndicies.Clear(); VariantFeature.GetGenotypeIndices("./0", genotypeIndicies); Assert.Equal(0, genotypeIndicies.Count); // ./. genotypeIndicies.Clear(); VariantFeature.GetGenotypeIndices("./.", genotypeIndicies); Assert.Equal(0, genotypeIndicies.Count); // . genotypeIndicies.Clear(); VariantFeature.GetGenotypeIndices(".", genotypeIndicies); Assert.Equal(0, genotypeIndicies.Count); // bob genotypeIndicies.Clear(); VariantFeature.GetGenotypeIndices("bob", genotypeIndicies); Assert.Equal(0, genotypeIndicies.Count); }
static void Main(string[] args) { if (args.Length != 2) { Console.WriteLine("USAGE: {0} <reference path> <input vcf file>", Path.GetFileName(Environment.GetCommandLineArgs()[0])); Environment.Exit(1); } var referencePath = args[0]; var vcfPath = args[1]; if (!File.Exists(vcfPath)) { Console.WriteLine($"ERROR: {vcfPath} does not exist."); Environment.Exit(1); } if (!File.Exists(referencePath)) { Console.WriteLine($"ERROR: {referencePath} does not exist."); Environment.Exit(1); } var renamer = ChromosomeRenamer.GetChromosomeRenamer(FileUtilities.GetReadStream(referencePath)); var vid = new VID(); var counts = new Dictionary <VariantType, int>(); using (var reader = new LiteVcfReader(vcfPath)) { while (true) { var vcfLine = reader.ReadLine(); if (vcfLine == null) { break; } if (vcfLine.StartsWith("#")) { continue; } VcfVariant vcfVariant = null; try { vcfVariant = CreateVcfVariant(vcfLine); } catch (Exception e) { Console.WriteLine($"ERROR: Could not parse the VCF line:\n{vcfLine}"); Console.WriteLine(e.Message); Environment.Exit(1); } if (vcfVariant == null) { continue; } var variant = new VariantFeature(vcfVariant, renamer, vid); if (variant.IsReference) { continue; } variant.AssignAlternateAlleles(); bool hasMnv = false; foreach (var altAllele in variant.AlternateAlleles) { AddVariantType(altAllele.NirvanaVariantType, counts); if (altAllele.NirvanaVariantType == VariantType.MNV) { hasMnv = true; } } if (hasMnv) { Console.WriteLine(vcfLine); } } } const int keyFieldLength = 15; const int valueFieldLength = 9; Console.WriteLine("VariantType counts:"); Console.WriteLine($"{new string('-', keyFieldLength)} {new string('-', valueFieldLength)}"); foreach (var kvp in counts.OrderBy(x => x.Key.ToString())) { var spaceLeft = keyFieldLength - kvp.Key.ToString().Length - 1; var filler = new string(' ', spaceLeft); Console.WriteLine($"{kvp.Key}:{filler} {kvp.Value,9:N0}"); } }