private static LiteXmlElement ParseXmlElement(XmlTextReader xmlReader) { var xmlElement = new LiteXmlElement(xmlReader.Name); var isEmptyElement = xmlReader.IsEmptyElement; if (xmlReader.HasAttributes) { while (xmlReader.MoveToNextAttribute()) { xmlElement.Attributes[xmlReader.Name] = xmlReader.Value; } } if (isEmptyElement) { return(xmlElement.IsEmpty()? null: xmlElement); } while (xmlReader.Read()) { //we will read till an end tag is observed switch (xmlReader.NodeType) { case XmlNodeType.Element: // The node is an element. var child = ParseXmlElement(xmlReader); if (child != null) { xmlElement.Children.Add(child); } break; case XmlNodeType.Text: if (!string.IsNullOrEmpty(xmlReader.Value)) { xmlElement.StringValues.Add(xmlReader.Value); } break; case XmlNodeType.EndElement: //Display the end of the element. if (xmlReader.Name == xmlElement.Name) { return(xmlElement.IsEmpty()? null: xmlElement); } Console.WriteLine("WARNING!! encountered unexpected endElement tag:" + xmlReader.Name); break; } } return(null); }
private List <ClinVarItem> ExtractClinVarItems(LiteXmlElement xmlElement) { ClearClinvarFields(); if (xmlElement == null) { return(null); } if (xmlElement.IsEmpty()) { return(null); } foreach (var child in xmlElement.Children) { switch (child.Name) { case "ReferenceClinVarAssertion": ParseRefClinVarAssertion(child); break; case "ClinVarAssertion": ParseScv(child); break; } } if (_recordStatus != "current") { Console.WriteLine($"record status not current: {_recordStatus} for {_id}"); return(null); } var clinvarList = new List <ClinVarItem>(); foreach (var variant in _variantList) { // in order to match the VCF, we leave out the ones that do not have dbsnp id if (variant.DbSnp == null) { continue; } if (!InputFileParserUtilities.IsDesiredChromosome(variant.Chromosome, _compressedSequence.Renamer)) { continue; } if (variant.VariantType == "Microsatellite") { continue; } var refIndex = _compressedSequence.Renamer.GetReferenceIndex(variant.Chromosome); if (refIndex == ChromosomeRenamer.UnknownReferenceIndex) { throw new GeneralException($"Could not find the reference index for: {variant.Chromosome}"); } _dataFileManager.LoadReference(refIndex, () => {}); ClinvarVariant shiftedVariant = variant; //some entries do not have ref allele in the xml file. For those, we extract them from our ref sequence if (variant.ReferenceAllele == null && variant.VariantType == "Deletion") { shiftedVariant = GenerateRefAllele(variant, _compressedSequence); } if (variant.AltAllele == null && variant.VariantType == "Duplication") { shiftedVariant = GenerateAltAllele(variant, _compressedSequence); } //left align the variant shiftedVariant = LeftShift(shiftedVariant); if (variant.ReferenceAllele == null && variant.VariantType == "Indel" && variant.AltAllele != null) { shiftedVariant = GenerateRefAllele(variant, _compressedSequence); } _pubMedIds.Sort(); if (string.IsNullOrEmpty(shiftedVariant.ReferenceAllele) && string.IsNullOrEmpty(shiftedVariant.AltAllele)) { continue; } clinvarList.Add( new ClinVarItem(shiftedVariant.Chromosome, shiftedVariant.Start, _alleleOrigins.Distinct().ToList(), shiftedVariant.AltAllele ?? "", _id, _reviewStatus, _medGenIDs.Distinct().ToList(), _omimIDs.Distinct().ToList(), _orphanetIDs.Distinct().ToList(), _prefPhenotypes.Count > 0? _prefPhenotypes.Distinct().ToList(): _altPhenotypes.Distinct().ToList(), shiftedVariant.ReferenceAllele ?? "", _significance, _pubMedIds.Distinct().ToList(), _lastUpdatedDate)); } return(clinvarList.Count > 0 ? clinvarList: null); }