Exemple #1
0
        private static LiteXmlElement ParseXmlElement(XmlTextReader xmlReader)
        {
            var xmlElement = new LiteXmlElement(xmlReader.Name);

            var isEmptyElement = xmlReader.IsEmptyElement;

            if (xmlReader.HasAttributes)
            {
                while (xmlReader.MoveToNextAttribute())
                {
                    xmlElement.Attributes[xmlReader.Name] = xmlReader.Value;
                }
            }

            if (isEmptyElement)
            {
                return(xmlElement.IsEmpty()? null: xmlElement);
            }

            while (xmlReader.Read())
            {
                //we will read till an end tag is observed
                switch (xmlReader.NodeType)
                {
                case XmlNodeType.Element:                         // The node is an element.
                    var child = ParseXmlElement(xmlReader);
                    if (child != null)
                    {
                        xmlElement.Children.Add(child);
                    }
                    break;

                case XmlNodeType.Text:
                    if (!string.IsNullOrEmpty(xmlReader.Value))
                    {
                        xmlElement.StringValues.Add(xmlReader.Value);
                    }
                    break;

                case XmlNodeType.EndElement:                         //Display the end of the element.
                    if (xmlReader.Name == xmlElement.Name)
                    {
                        return(xmlElement.IsEmpty()? null: xmlElement);
                    }
                    Console.WriteLine("WARNING!! encountered unexpected endElement tag:" + xmlReader.Name);
                    break;
                }
            }
            return(null);
        }
Exemple #2
0
        private List <ClinVarItem> ExtractClinVarItems(LiteXmlElement xmlElement)
        {
            ClearClinvarFields();

            if (xmlElement == null)
            {
                return(null);
            }
            if (xmlElement.IsEmpty())
            {
                return(null);
            }

            foreach (var child in xmlElement.Children)
            {
                switch (child.Name)
                {
                case "ReferenceClinVarAssertion":
                    ParseRefClinVarAssertion(child);
                    break;

                case "ClinVarAssertion":
                    ParseScv(child);
                    break;
                }
            }

            if (_recordStatus != "current")
            {
                Console.WriteLine($"record status not current: {_recordStatus} for {_id}");
                return(null);
            }

            var clinvarList = new List <ClinVarItem>();

            foreach (var variant in _variantList)
            {
                // in order to match the VCF, we leave out the ones that do not have dbsnp id
                if (variant.DbSnp == null)
                {
                    continue;
                }
                if (!InputFileParserUtilities.IsDesiredChromosome(variant.Chromosome, _compressedSequence.Renamer))
                {
                    continue;
                }
                if (variant.VariantType == "Microsatellite")
                {
                    continue;
                }

                var refIndex = _compressedSequence.Renamer.GetReferenceIndex(variant.Chromosome);
                if (refIndex == ChromosomeRenamer.UnknownReferenceIndex)
                {
                    throw new GeneralException($"Could not find the reference index for: {variant.Chromosome}");
                }
                _dataFileManager.LoadReference(refIndex, () => {});

                ClinvarVariant shiftedVariant = variant;
                //some entries do not have ref allele in the xml file. For those, we extract them from our ref sequence
                if (variant.ReferenceAllele == null && variant.VariantType == "Deletion")
                {
                    shiftedVariant = GenerateRefAllele(variant, _compressedSequence);
                }
                if (variant.AltAllele == null && variant.VariantType == "Duplication")
                {
                    shiftedVariant = GenerateAltAllele(variant, _compressedSequence);
                }


                //left align the variant
                shiftedVariant = LeftShift(shiftedVariant);

                if (variant.ReferenceAllele == null && variant.VariantType == "Indel" && variant.AltAllele != null)
                {
                    shiftedVariant = GenerateRefAllele(variant, _compressedSequence);
                }

                _pubMedIds.Sort();

                if (string.IsNullOrEmpty(shiftedVariant.ReferenceAllele) && string.IsNullOrEmpty(shiftedVariant.AltAllele))
                {
                    continue;
                }

                clinvarList.Add(
                    new ClinVarItem(shiftedVariant.Chromosome,
                                    shiftedVariant.Start,
                                    _alleleOrigins.Distinct().ToList(),
                                    shiftedVariant.AltAllele ?? "",
                                    _id,
                                    _reviewStatus,
                                    _medGenIDs.Distinct().ToList(),
                                    _omimIDs.Distinct().ToList(),
                                    _orphanetIDs.Distinct().ToList(),
                                    _prefPhenotypes.Count > 0? _prefPhenotypes.Distinct().ToList(): _altPhenotypes.Distinct().ToList(),
                                    shiftedVariant.ReferenceAllele ?? "",
                                    _significance,
                                    _pubMedIds.Distinct().ToList(),
                                    _lastUpdatedDate));
            }

            return(clinvarList.Count > 0 ? clinvarList: null);
        }