예제 #1
0
        public SnpEffAnnotation(string annotation)
        {
            bool isSnpEffAnnotation = annotation.StartsWith("ANN=") || annotation.StartsWith("EFF=");

            Annotation = isSnpEffAnnotation ? annotation.Substring(4) : annotation;
            if (!isSnpEffAnnotation)
            {
                return;
            }
            string[] a = Annotation.Split('|');
            Allele            = a[0];
            Effects           = a[1].Split('&');
            PutativeImpact    = a[2];
            GeneName          = a[3];
            GeneID            = a[4];
            FeatureType       = a[5];
            FeatureID         = a[6];
            TranscriptBiotype = a[7];
            if (a[8].Split('/').Length > 0 && int.TryParse(a[8].Split('/')[0], out int x))
            {
                ExonIntronRank = x;
            }
            if (a[8].Split('/').Length > 1 && int.TryParse(a[8].Split('/')[1], out int y))
            {
                ExonIntronTotal = y;
            }
            HGVSNotationDnaLevel     = a[9];
            HGVSNotationProteinLevel = a[10];
            if (a[11].Split('/').Length > 0 && int.TryParse(a[11].Split('/')[0], out x))
            {
                OneBasedTranscriptCDNAPosition = x;
            }
            if (a[11].Split('/').Length > 1 && int.TryParse(a[11].Split('/')[1], out y))
            {
                TranscriptCDNALength = y;
            }
            if (a[12].Split('/').Length > 0 && int.TryParse(a[12].Split('/')[0], out x))
            {
                OneBasedCodingDomainSequencePosition = x;
            }
            if (a[12].Split('/').Length > 1 && int.TryParse(a[12].Split('/')[1], out y))
            {
                CodingDomainSequenceLengthIncludingStopCodon = y;
            }
            if (a[13].Split('/').Length > 0 && int.TryParse(a[13].Split('/')[0], out x))
            {
                OneBasedProteinPosition = x;
            }
            if (a[13].Split('/').Length > 1 && int.TryParse(a[13].Split('/')[1], out y))
            {
                ProteinLength = y;
            }
            if (int.TryParse(a[14], out y))
            {
                DistanceToFeature = y;
            }
            Warnings = a[15].Split('&');

            Missense          = Effects.Any(eff => eff == "missense_variant");
            Synonymous        = !Effects.Any(eff => NonSynonymousVariations.Contains(eff));
            FrameshiftVariant = Effects.Contains("frameshift_variant");
            BadTranscript     = Warnings.Any(w => BadTranscriptWarnings.Contains(w));
        }
        public SnpEffAnnotation(Variant variant, string annotation)
        {
            Variant    = variant;
            Annotation = annotation;
            string[] a = annotation.Split('|');
            Allele            = a[0];
            Effects           = a[1].Split('&');
            PutativeImpact    = a[2];
            GeneName          = a[3];
            GeneID            = a[4];
            FeatureType       = a[5];
            FeatureID         = a[6];
            TranscriptBiotype = a[7];
            if (a[8].Split('/').Length > 0 && int.TryParse(a[8].Split('/')[0], out int x))
            {
                ExonIntronRank = x;
            }
            if (a[8].Split('/').Length > 1 && int.TryParse(a[8].Split('/')[1], out int y))
            {
                ExonIntronTotal = y;
            }
            HGVSNotationDnaLevel     = a[9];
            HGVSNotationProteinLevel = a[10];
            if (a[11].Split('/').Length > 0 && int.TryParse(a[11].Split('/')[0], out x))
            {
                OneBasedTranscriptCDNAPosition = x;
            }
            if (a[11].Split('/').Length > 1 && int.TryParse(a[11].Split('/')[1], out y))
            {
                TranscriptCDNALength = y;
            }
            if (a[12].Split('/').Length > 0 && int.TryParse(a[12].Split('/')[0], out x))
            {
                OneBasedCodingDomainSequencePosition = x;
            }
            if (a[12].Split('/').Length > 1 && int.TryParse(a[12].Split('/')[1], out y))
            {
                CodingDomainSequenceLengthIncludingStopCodon = y;
            }
            if (a[13].Split('/').Length > 0 && int.TryParse(a[13].Split('/')[0], out x))
            {
                OneBasedProteinPosition = x;
            }
            if (a[13].Split('/').Length > 1 && int.TryParse(a[13].Split('/')[1], out y))
            {
                ProteinLength = y;
            }
            if (int.TryParse(a[14], out y))
            {
                DistanceToFeature = y;
            }
            Warnings = a[15].Split('&');

            if (HGVSNotationProteinLevel != null)
            {
                GroupCollection hgvsProteinMatch = HGVSProteinRegex.Match(HGVSNotationProteinLevel).Groups;
                if (hgvsProteinMatch.Count > 2)
                {
                    ReferenceAminoAcid = ProteogenomicsUtility.amino_acids_3to1[hgvsProteinMatch[2].Value];
                }
                if (hgvsProteinMatch.Count > 3 && int.TryParse(hgvsProteinMatch[3].Value, out int location))
                {
                    AminoAcidLocation = location;
                }
                if (hgvsProteinMatch.Count > 4)
                {
                    AlternateAminoAcid = ProteogenomicsUtility.amino_acids_3to1[hgvsProteinMatch[4].Value];
                }
            }

            Missense          = Effects.Any(eff => eff == "missense_variant");
            Synonymous        = !Effects.Any(eff => NonSynonymousVariations.Contains(eff));
            FrameshiftVariant = Effects.Contains("frameshift_variant");
            BadTranscript     = Warnings.Any(w => BadTranscriptWarnings.Contains(w));
        }