private static string ExtractDbId(IAnnotatedPosition annotatedPosition) { var dbSnp = new VcfField(); var nonDbsnpIds = GetNonDbsnpIds(annotatedPosition.Position.VcfFields[VcfCommon.IdIndex]); if (nonDbsnpIds != null) { foreach (var nonDbsnpId in nonDbsnpIds) { dbSnp.Add(nonDbsnpId); } } foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { foreach (var suppAnnotation in annotatedVariant.SupplementaryAnnotations) { if (suppAnnotation.SaDataSource.KeyName != DbSnpKeyName) { continue; } foreach (var s in suppAnnotation.GetVcfStrings()) { dbSnp.Add(s); } } } return(dbSnp.GetString("")); }
private void AddInfoField(IAnnotatedPosition annotatedPosition, StringBuilder sb) { var infoEntries = new VcfField(); var infoField = annotatedPosition.Position.InfoData.UpdatedInfoField; if (!string.IsNullOrEmpty(infoField)) { infoEntries.Add(infoField); } ExtractInfo(annotatedPosition, infoEntries); infoField = infoEntries.GetString(""); // remove . if (infoField == ".") { infoField = ""; } sb.Append(infoField); var csqs = new List <CsqEntry>(); ExtractCsqs(annotatedPosition, csqs); if (csqs.Count != 0) { if (infoField.Length > 0) { sb.Append(";"); } } // append CSQ tags using delegate from annotator sb.Append(GetCsqtAndCsqrVcfInfo(csqs)); if (csqs.Count == 0 && infoField.Length == 0) { sb.Append("."); } }
private static void ExtractInfo(IAnnotatedPosition annotatedPosition, VcfField infoEntries) { var alleleFreq1000G = new VcfInfoKeyValue("AF1000G"); var ancestralAllele = new VcfPositionalInfo("AA"); var phyloP = new VcfInfoKeyValue("phyloP"); var suppAnnotationSources = new Dictionary <string, VcfInfoKeyValue>(); var isSaArrayInfo = new Dictionary <string, bool>(); var numInputAltAlleles = annotatedPosition.Position.AltAlleles.Length; foreach (var alternateAllele in annotatedPosition.AnnotatedVariants) { foreach (var sa in alternateAllele.SupplementaryAnnotations) { if (!suppAnnotationSources.ContainsKey(sa.SaDataSource.KeyName)) { suppAnnotationSources[sa.SaDataSource.KeyName] = new VcfInfoKeyValue(sa.SaDataSource.VcfkeyName); isSaArrayInfo[sa.SaDataSource.KeyName] = sa.SaDataSource.IsArray; } } } foreach (var kvp in suppAnnotationSources) { if (isSaArrayInfo[kvp.Key]) { continue; } for (var i = 0; i < numInputAltAlleles; i++) { kvp.Value.Add(null); } } for (var i = 0; i < numInputAltAlleles; i++) { alleleFreq1000G.Add(null); } var inputGenotypeIndex = GetInputGenotypeIndex(annotatedPosition.Position.AltAlleles, annotatedPosition.AnnotatedVariants); // understand the number of annotation contains in the whole vcf line for (int i = 0; i < annotatedPosition.AnnotatedVariants.Length; i++) { var annotatedVariant = annotatedPosition.AnnotatedVariants[i]; var genotypeIndex = inputGenotypeIndex[i] + 1; if (annotatedVariant.Variant.IsRefMinor) { infoEntries.Add("RefMinor"); } phyloP.Add(annotatedVariant.PhylopScore?.ToString(CultureInfo.InvariantCulture)); foreach (var sa in annotatedVariant.SupplementaryAnnotations) { if (!sa.SaDataSource.MatchByAllele && !sa.IsAlleleSpecific && sa.SaDataSource.KeyName != GlobalAlleleKeyName) { continue; } if (sa.SaDataSource.KeyName == DbSnpKeyName) { continue; } if (sa.SaDataSource.KeyName == RefMinorKeyName) { continue; } foreach (var vcfAnnotation in sa.GetVcfStrings()) { if (string.IsNullOrEmpty(vcfAnnotation)) { continue; } if (sa.SaDataSource.KeyName == OneKgKeyName) { var contents = vcfAnnotation.Split(';'); var freq = contents[0]; var ancestryAllele = string.IsNullOrEmpty(contents[1]) ? null : contents[1]; alleleFreq1000G.Add(freq, genotypeIndex); ancestralAllele.AddValue(ancestryAllele); continue; } if (sa.SaDataSource.IsArray && sa.IsAlleleSpecific) { suppAnnotationSources[sa.SaDataSource.KeyName].Add( genotypeIndex.ToString(CultureInfo.InvariantCulture) + '|' + vcfAnnotation); } else if (!sa.SaDataSource.IsArray) { suppAnnotationSources[sa.SaDataSource.KeyName].Add(vcfAnnotation, genotypeIndex); } } } } foreach (var value in suppAnnotationSources.Values) { infoEntries.Add(value.GetString()); } infoEntries.Add(ancestralAllele.GetString()); infoEntries.Add(alleleFreq1000G.GetString()); infoEntries.Add(phyloP.GetString()); }