private void AddIupacVariants(ClinvarVariant variant, List <ClinvarVariant> variantList) { foreach (char altAllele in _iupacBases[variant.AltAllele[0]]) { variantList.Add(new ClinvarVariant(variant.Chromosome, variant.Start, variant.Stop, variant.VariantId, variant.RefAllele, altAllele.ToString())); } }
private void AddIupacVariants(ClinvarVariant variant) { foreach (var altAllele in _iupacBases[variant.AltAllele[0]]) { _variantList.Add(new ClinvarVariant(variant.Chromosome, variant.Start, variant.Stop, variant.ReferenceAllele, altAllele.ToString())); } }
private static bool IsInvalidVariant(ClinvarVariant variant) { if (variant.VariantType == "ALU") { return(true); } return(variant.Chromosome == null || (variant.VariantType == "Microsatellite" || variant.VariantType == "Variation") && string.IsNullOrEmpty(variant.AltAllele)); }
private static ClinvarVariant GenerateRefAllele(ClinvarVariant variant, ICompressedSequence compressedSequence) { if (variant == null) { return(null); } var extractedRef = compressedSequence.Substring(variant.Start - 1, variant.Stop - variant.Start + 1); return(new ClinvarVariant(variant.Chromosome, variant.Start, variant.Stop, extractedRef, variant.AltAllele ?? "")); }
private HashSet <string> GetOmimIds(ClinvarVariant variant) { var extendedOmimIds = new HashSet <string>(_omimIDs); foreach (var omimId in variant.AllelicOmimIds) { extendedOmimIds.Add(omimId); } return(extendedOmimIds); }
private ClinvarVariant LeftShift(ClinvarVariant variant) { if (variant.ReferenceAllele == null || variant.AltAllele == null) { return(variant); } var alignedVariant = _aligner.LeftAlign(variant.Start, variant.ReferenceAllele, variant.AltAllele); if (alignedVariant == null) { return(variant); } return(new ClinvarVariant(variant.Chromosome, alignedVariant.Item1, variant.Stop, alignedVariant.Item2, alignedVariant.Item3)); }
private static void UpdateVariantType(ClinvarVariant variant) { var refAllele = variant.RefAllele; var altAllele = variant.AltAllele; if (refAllele == null || altAllele == null) { return; } var variantType = SmallVariantCreator.GetVariantType(refAllele, altAllele); switch (variantType) { case VariantType.deletion: variant.VariantType = "Deletion"; break; case VariantType.insertion: variant.VariantType = "Insertion"; break; case VariantType.indel: variant.VariantType = "Indel"; break; case VariantType.duplication: variant.VariantType = "Duplication"; break; case VariantType.SNV: variant.VariantType = "SNV"; break; case VariantType.MNV: variant.VariantType = "MNV"; break; } }
private List <ClinVarItem> ExtractClinVarItems(LiteXmlElement xmlElement) { ClearClinvarFields(); if (xmlElement == null) { return(null); } if (xmlElement.IsEmpty()) { return(null); } foreach (var child in xmlElement.Children) { switch (child.Name) { case "ReferenceClinVarAssertion": ParseRefClinVarAssertion(child); break; case "ClinVarAssertion": ParseScv(child); break; } } if (_recordStatus != "current") { Console.WriteLine($"record status not current: {_recordStatus} for {_id}"); return(null); } var clinvarList = new List <ClinVarItem>(); foreach (var variant in _variantList) { // in order to match the VCF, we leave out the ones that do not have dbsnp id if (variant.DbSnp == null) { continue; } if (!InputFileParserUtilities.IsDesiredChromosome(variant.Chromosome, _compressedSequence.Renamer)) { continue; } if (variant.VariantType == "Microsatellite") { continue; } var refIndex = _compressedSequence.Renamer.GetReferenceIndex(variant.Chromosome); if (refIndex == ChromosomeRenamer.UnknownReferenceIndex) { throw new GeneralException($"Could not find the reference index for: {variant.Chromosome}"); } _dataFileManager.LoadReference(refIndex, () => {}); ClinvarVariant shiftedVariant = variant; //some entries do not have ref allele in the xml file. For those, we extract them from our ref sequence if (variant.ReferenceAllele == null && variant.VariantType == "Deletion") { shiftedVariant = GenerateRefAllele(variant, _compressedSequence); } if (variant.AltAllele == null && variant.VariantType == "Duplication") { shiftedVariant = GenerateAltAllele(variant, _compressedSequence); } //left align the variant shiftedVariant = LeftShift(shiftedVariant); if (variant.ReferenceAllele == null && variant.VariantType == "Indel" && variant.AltAllele != null) { shiftedVariant = GenerateRefAllele(variant, _compressedSequence); } _pubMedIds.Sort(); if (string.IsNullOrEmpty(shiftedVariant.ReferenceAllele) && string.IsNullOrEmpty(shiftedVariant.AltAllele)) { continue; } clinvarList.Add( new ClinVarItem(shiftedVariant.Chromosome, shiftedVariant.Start, _alleleOrigins.Distinct().ToList(), shiftedVariant.AltAllele ?? "", _id, _reviewStatus, _medGenIDs.Distinct().ToList(), _omimIDs.Distinct().ToList(), _orphanetIDs.Distinct().ToList(), _prefPhenotypes.Count > 0? _prefPhenotypes.Distinct().ToList(): _altPhenotypes.Distinct().ToList(), shiftedVariant.ReferenceAllele ?? "", _significance, _pubMedIds.Distinct().ToList(), _lastUpdatedDate)); } return(clinvarList.Count > 0 ? clinvarList: null); }