예제 #1
0
 private void AddIupacVariants(ClinvarVariant variant, List <ClinvarVariant> variantList)
 {
     foreach (char altAllele in _iupacBases[variant.AltAllele[0]])
     {
         variantList.Add(new ClinvarVariant(variant.Chromosome, variant.Start, variant.Stop, variant.VariantId, variant.RefAllele, altAllele.ToString()));
     }
 }
예제 #2
0
 private void AddIupacVariants(ClinvarVariant variant)
 {
     foreach (var altAllele in _iupacBases[variant.AltAllele[0]])
     {
         _variantList.Add(new ClinvarVariant(variant.Chromosome, variant.Start, variant.Stop, variant.ReferenceAllele, altAllele.ToString()));
     }
 }
예제 #3
0
 private static bool IsInvalidVariant(ClinvarVariant variant)
 {
     if (variant.VariantType == "ALU")
     {
         return(true);
     }
     return(variant.Chromosome == null ||
            (variant.VariantType == "Microsatellite" || variant.VariantType == "Variation") &&
            string.IsNullOrEmpty(variant.AltAllele));
 }
예제 #4
0
        private static ClinvarVariant GenerateRefAllele(ClinvarVariant variant, ICompressedSequence compressedSequence)
        {
            if (variant == null)
            {
                return(null);
            }
            var extractedRef = compressedSequence.Substring(variant.Start - 1, variant.Stop - variant.Start + 1);

            return(new ClinvarVariant(variant.Chromosome, variant.Start, variant.Stop, extractedRef, variant.AltAllele ?? ""));
        }
예제 #5
0
        private HashSet <string> GetOmimIds(ClinvarVariant variant)
        {
            var extendedOmimIds = new HashSet <string>(_omimIDs);

            foreach (var omimId in variant.AllelicOmimIds)
            {
                extendedOmimIds.Add(omimId);
            }

            return(extendedOmimIds);
        }
예제 #6
0
        private ClinvarVariant LeftShift(ClinvarVariant variant)
        {
            if (variant.ReferenceAllele == null || variant.AltAllele == null)
            {
                return(variant);
            }

            var alignedVariant = _aligner.LeftAlign(variant.Start, variant.ReferenceAllele, variant.AltAllele);

            if (alignedVariant == null)
            {
                return(variant);
            }

            return(new ClinvarVariant(variant.Chromosome, alignedVariant.Item1, variant.Stop, alignedVariant.Item2, alignedVariant.Item3));
        }
예제 #7
0
        private static void UpdateVariantType(ClinvarVariant variant)
        {
            var refAllele = variant.RefAllele;
            var altAllele = variant.AltAllele;

            if (refAllele == null || altAllele == null)
            {
                return;
            }

            var variantType = SmallVariantCreator.GetVariantType(refAllele, altAllele);

            switch (variantType)
            {
            case VariantType.deletion:
                variant.VariantType = "Deletion";
                break;

            case VariantType.insertion:
                variant.VariantType = "Insertion";
                break;

            case VariantType.indel:
                variant.VariantType = "Indel";
                break;

            case VariantType.duplication:
                variant.VariantType = "Duplication";
                break;

            case VariantType.SNV:
                variant.VariantType = "SNV";
                break;

            case VariantType.MNV:
                variant.VariantType = "MNV";
                break;
            }
        }
예제 #8
0
        private List <ClinVarItem> ExtractClinVarItems(LiteXmlElement xmlElement)
        {
            ClearClinvarFields();

            if (xmlElement == null)
            {
                return(null);
            }
            if (xmlElement.IsEmpty())
            {
                return(null);
            }

            foreach (var child in xmlElement.Children)
            {
                switch (child.Name)
                {
                case "ReferenceClinVarAssertion":
                    ParseRefClinVarAssertion(child);
                    break;

                case "ClinVarAssertion":
                    ParseScv(child);
                    break;
                }
            }

            if (_recordStatus != "current")
            {
                Console.WriteLine($"record status not current: {_recordStatus} for {_id}");
                return(null);
            }

            var clinvarList = new List <ClinVarItem>();

            foreach (var variant in _variantList)
            {
                // in order to match the VCF, we leave out the ones that do not have dbsnp id
                if (variant.DbSnp == null)
                {
                    continue;
                }
                if (!InputFileParserUtilities.IsDesiredChromosome(variant.Chromosome, _compressedSequence.Renamer))
                {
                    continue;
                }
                if (variant.VariantType == "Microsatellite")
                {
                    continue;
                }

                var refIndex = _compressedSequence.Renamer.GetReferenceIndex(variant.Chromosome);
                if (refIndex == ChromosomeRenamer.UnknownReferenceIndex)
                {
                    throw new GeneralException($"Could not find the reference index for: {variant.Chromosome}");
                }
                _dataFileManager.LoadReference(refIndex, () => {});

                ClinvarVariant shiftedVariant = variant;
                //some entries do not have ref allele in the xml file. For those, we extract them from our ref sequence
                if (variant.ReferenceAllele == null && variant.VariantType == "Deletion")
                {
                    shiftedVariant = GenerateRefAllele(variant, _compressedSequence);
                }
                if (variant.AltAllele == null && variant.VariantType == "Duplication")
                {
                    shiftedVariant = GenerateAltAllele(variant, _compressedSequence);
                }


                //left align the variant
                shiftedVariant = LeftShift(shiftedVariant);

                if (variant.ReferenceAllele == null && variant.VariantType == "Indel" && variant.AltAllele != null)
                {
                    shiftedVariant = GenerateRefAllele(variant, _compressedSequence);
                }

                _pubMedIds.Sort();

                if (string.IsNullOrEmpty(shiftedVariant.ReferenceAllele) && string.IsNullOrEmpty(shiftedVariant.AltAllele))
                {
                    continue;
                }

                clinvarList.Add(
                    new ClinVarItem(shiftedVariant.Chromosome,
                                    shiftedVariant.Start,
                                    _alleleOrigins.Distinct().ToList(),
                                    shiftedVariant.AltAllele ?? "",
                                    _id,
                                    _reviewStatus,
                                    _medGenIDs.Distinct().ToList(),
                                    _omimIDs.Distinct().ToList(),
                                    _orphanetIDs.Distinct().ToList(),
                                    _prefPhenotypes.Count > 0? _prefPhenotypes.Distinct().ToList(): _altPhenotypes.Distinct().ToList(),
                                    shiftedVariant.ReferenceAllele ?? "",
                                    _significance,
                                    _pubMedIds.Distinct().ToList(),
                                    _lastUpdatedDate));
            }

            return(clinvarList.Count > 0 ? clinvarList: null);
        }