private bool ValidateReference(string chromosome, int pos, string refAllele) { var refIndex = _compressedSequence.Renamer.GetReferenceIndex(chromosome); if (refIndex == ChromosomeRenamer.UnknownReferenceIndex) { return(false); } _dataFileManager.LoadReference(refIndex, () => { }); return(_compressedSequence.Substring(pos - 1, refAllele.Length) == refAllele); }
public PianoVariant Annotate(IVariant variant) { if (variant == null) { return(null); } var variantFeature = new VariantFeature(variant as VcfVariant, _renamer, _vid); // load the reference sequence _dataFileManager.LoadReference(variantFeature.ReferenceIndex, () => {}); // handle ref no-calls and assign the alternate alleles variantFeature.AssignAlternateAlleles(); // annotate the variant _pianoVariant = new PianoVariant(variantFeature); Annotate(variantFeature); _performanceMetrics.Increment(); return(_pianoVariant); }
private void CreateVcf(StreamWriter writer, Transcript transcript) { var geneSymbol = transcript.Gene.Symbol; if (!transcript.IsCanonical && _processedGeneSet.Contains(geneSymbol)) { return; } if (transcript.Translation == null) { return; } _processedGeneSet.Add(geneSymbol); _dataFileManager.LoadReference(transcript.ReferenceIndex, () => {}); var position = (transcript.Translation.CodingRegion.GenomicStart + transcript.Translation.CodingRegion.GenomicEnd) / 2; var refAllele = _compressedSequence.Substring(position - 1, 1); var altAllele = _nucleotides.First(nuceleotide => nuceleotide != refAllele); writer.WriteLine($"{_renamer.UcscReferenceNames[transcript.ReferenceIndex]}\t{position}\t.\t{refAllele}\t{altAllele}\t.\t.\t."); }
private List <ClinVarItem> ExtractClinVarItems(LiteXmlElement xmlElement) { ClearClinvarFields(); if (xmlElement == null) { return(null); } if (xmlElement.IsEmpty()) { return(null); } foreach (var child in xmlElement.Children) { switch (child.Name) { case "ReferenceClinVarAssertion": ParseRefClinVarAssertion(child); break; case "ClinVarAssertion": ParseScv(child); break; } } if (_recordStatus != "current") { Console.WriteLine($"record status not current: {_recordStatus} for {_id}"); return(null); } var clinvarList = new List <ClinVarItem>(); foreach (var variant in _variantList) { // in order to match the VCF, we leave out the ones that do not have dbsnp id if (variant.DbSnp == null) { continue; } if (!InputFileParserUtilities.IsDesiredChromosome(variant.Chromosome, _compressedSequence.Renamer)) { continue; } if (variant.VariantType == "Microsatellite") { continue; } var refIndex = _compressedSequence.Renamer.GetReferenceIndex(variant.Chromosome); if (refIndex == ChromosomeRenamer.UnknownReferenceIndex) { throw new GeneralException($"Could not find the reference index for: {variant.Chromosome}"); } _dataFileManager.LoadReference(refIndex, () => {}); ClinvarVariant shiftedVariant = variant; //some entries do not have ref allele in the xml file. For those, we extract them from our ref sequence if (variant.ReferenceAllele == null && variant.VariantType == "Deletion") { shiftedVariant = GenerateRefAllele(variant, _compressedSequence); } if (variant.AltAllele == null && variant.VariantType == "Duplication") { shiftedVariant = GenerateAltAllele(variant, _compressedSequence); } //left align the variant shiftedVariant = LeftShift(shiftedVariant); if (variant.ReferenceAllele == null && variant.VariantType == "Indel" && variant.AltAllele != null) { shiftedVariant = GenerateRefAllele(variant, _compressedSequence); } _pubMedIds.Sort(); if (string.IsNullOrEmpty(shiftedVariant.ReferenceAllele) && string.IsNullOrEmpty(shiftedVariant.AltAllele)) { continue; } clinvarList.Add( new ClinVarItem(shiftedVariant.Chromosome, shiftedVariant.Start, _alleleOrigins.Distinct().ToList(), shiftedVariant.AltAllele ?? "", _id, _reviewStatus, _medGenIDs.Distinct().ToList(), _omimIDs.Distinct().ToList(), _orphanetIDs.Distinct().ToList(), _prefPhenotypes.Count > 0? _prefPhenotypes.Distinct().ToList(): _altPhenotypes.Distinct().ToList(), shiftedVariant.ReferenceAllele ?? "", _significance, _pubMedIds.Distinct().ToList(), _lastUpdatedDate)); } return(clinvarList.Count > 0 ? clinvarList: null); }
private SupplementaryPositionCreator GetNextSupplementaryAnnotation() { // no more active iterators left if (_iSupplementaryDataItemList.Count == 0 && _additionalItemsList.Count == 0) { return(null); } var minSupplementaryDataItem = CurrentMinSupplementaryDataItem(); if (minSupplementaryDataItem == null) { return(null); //nothing more to retun. All enumerators are empty. } var sa = new SupplementaryAnnotationPosition(minSupplementaryDataItem.Start); var saCreator = new SupplementaryPositionCreator(sa) { RefSeqName = minSupplementaryDataItem.Chromosome }; string refSequence = null; if (_currentRefName == null || !_currentRefName.Equals(saCreator.RefSeqName)) { CloseCurrentSaWriter(); _currentRefName = saCreator.RefSeqName; var refIndex = _renamer.GetReferenceIndex(_currentRefName); if (refIndex == ChromosomeRenamer.UnknownReferenceIndex) { throw new GeneralException($"Could not find the reference index for: {_currentRefName}"); } _dataFileManager.LoadReference(refIndex, () => {}); OpenNewSaWriter(); } if (_compressedSequence != null) { refSequence = _compressedSequence.Substring(sa.ReferencePosition - 1, ReferenceWindowSize); } // list of data items to be removed and added var deleteList = new List <IEnumerator <SupplementaryDataItem> >(); foreach (var iDataEnumerator in _iSupplementaryDataItemList) { // only using items at the same location as minSuppDataItem if (!iDataEnumerator.Current.Equals(minSupplementaryDataItem)) { continue; } if (iDataEnumerator.Current.IsInterval) { var suppInterval = iDataEnumerator.Current.GetSupplementaryInterval(_renamer); _supplementaryIntervalList.Add(suppInterval); } else { var additionalSuppData = iDataEnumerator.Current.SetSupplementaryAnnotations(saCreator, refSequence); if (additionalSuppData != null) { _additionalItemsList.Add(additionalSuppData); } } // adding empty enumerators to deleteList if (!iDataEnumerator.MoveNext()) { deleteList.Add(iDataEnumerator); } } // add annotations from additional items if applicable. AddAdditionalItems(minSupplementaryDataItem, saCreator); // removing lists that are empty and therfore should be removed from the list of enumerators _iSupplementaryDataItemList.RemoveAll(x => deleteList.Contains(x)); return(saCreator); }