public SnpEffAnnotation(string annotation) { bool isSnpEffAnnotation = annotation.StartsWith("ANN=") || annotation.StartsWith("EFF="); Annotation = isSnpEffAnnotation ? annotation.Substring(4) : annotation; if (!isSnpEffAnnotation) { return; } string[] a = Annotation.Split('|'); Allele = a[0]; Effects = a[1].Split('&'); PutativeImpact = a[2]; GeneName = a[3]; GeneID = a[4]; FeatureType = a[5]; FeatureID = a[6]; TranscriptBiotype = a[7]; if (a[8].Split('/').Length > 0 && int.TryParse(a[8].Split('/')[0], out int x)) { ExonIntronRank = x; } if (a[8].Split('/').Length > 1 && int.TryParse(a[8].Split('/')[1], out int y)) { ExonIntronTotal = y; } HGVSNotationDnaLevel = a[9]; HGVSNotationProteinLevel = a[10]; if (a[11].Split('/').Length > 0 && int.TryParse(a[11].Split('/')[0], out x)) { OneBasedTranscriptCDNAPosition = x; } if (a[11].Split('/').Length > 1 && int.TryParse(a[11].Split('/')[1], out y)) { TranscriptCDNALength = y; } if (a[12].Split('/').Length > 0 && int.TryParse(a[12].Split('/')[0], out x)) { OneBasedCodingDomainSequencePosition = x; } if (a[12].Split('/').Length > 1 && int.TryParse(a[12].Split('/')[1], out y)) { CodingDomainSequenceLengthIncludingStopCodon = y; } if (a[13].Split('/').Length > 0 && int.TryParse(a[13].Split('/')[0], out x)) { OneBasedProteinPosition = x; } if (a[13].Split('/').Length > 1 && int.TryParse(a[13].Split('/')[1], out y)) { ProteinLength = y; } if (int.TryParse(a[14], out y)) { DistanceToFeature = y; } Warnings = a[15].Split('&'); Missense = Effects.Any(eff => eff == "missense_variant"); Synonymous = !Effects.Any(eff => NonSynonymousVariations.Contains(eff)); FrameshiftVariant = Effects.Contains("frameshift_variant"); BadTranscript = Warnings.Any(w => BadTranscriptWarnings.Contains(w)); }
public SnpEffAnnotation(Variant variant, string annotation) { Variant = variant; Annotation = annotation; string[] a = annotation.Split('|'); Allele = a[0]; Effects = a[1].Split('&'); PutativeImpact = a[2]; GeneName = a[3]; GeneID = a[4]; FeatureType = a[5]; FeatureID = a[6]; TranscriptBiotype = a[7]; if (a[8].Split('/').Length > 0 && int.TryParse(a[8].Split('/')[0], out int x)) { ExonIntronRank = x; } if (a[8].Split('/').Length > 1 && int.TryParse(a[8].Split('/')[1], out int y)) { ExonIntronTotal = y; } HGVSNotationDnaLevel = a[9]; HGVSNotationProteinLevel = a[10]; if (a[11].Split('/').Length > 0 && int.TryParse(a[11].Split('/')[0], out x)) { OneBasedTranscriptCDNAPosition = x; } if (a[11].Split('/').Length > 1 && int.TryParse(a[11].Split('/')[1], out y)) { TranscriptCDNALength = y; } if (a[12].Split('/').Length > 0 && int.TryParse(a[12].Split('/')[0], out x)) { OneBasedCodingDomainSequencePosition = x; } if (a[12].Split('/').Length > 1 && int.TryParse(a[12].Split('/')[1], out y)) { CodingDomainSequenceLengthIncludingStopCodon = y; } if (a[13].Split('/').Length > 0 && int.TryParse(a[13].Split('/')[0], out x)) { OneBasedProteinPosition = x; } if (a[13].Split('/').Length > 1 && int.TryParse(a[13].Split('/')[1], out y)) { ProteinLength = y; } if (int.TryParse(a[14], out y)) { DistanceToFeature = y; } Warnings = a[15].Split('&'); if (HGVSNotationProteinLevel != null) { GroupCollection hgvsProteinMatch = HGVSProteinRegex.Match(HGVSNotationProteinLevel).Groups; if (hgvsProteinMatch.Count > 2) { ReferenceAminoAcid = ProteogenomicsUtility.amino_acids_3to1[hgvsProteinMatch[2].Value]; } if (hgvsProteinMatch.Count > 3 && int.TryParse(hgvsProteinMatch[3].Value, out int location)) { AminoAcidLocation = location; } if (hgvsProteinMatch.Count > 4) { AlternateAminoAcid = ProteogenomicsUtility.amino_acids_3to1[hgvsProteinMatch[4].Value]; } } Missense = Effects.Any(eff => eff == "missense_variant"); Synonymous = !Effects.Any(eff => NonSynonymousVariations.Contains(eff)); FrameshiftVariant = Effects.Contains("frameshift_variant"); BadTranscript = Warnings.Any(w => BadTranscriptWarnings.Contains(w)); }