Ejemplo n.º 1
0
        private static string ExtractDbId(IAnnotatedPosition annotatedPosition)
        {
            var dbSnp = new VcfField();

            var nonDbsnpIds = GetNonDbsnpIds(annotatedPosition.Position.VcfFields[VcfCommon.IdIndex]);

            if (nonDbsnpIds != null)
            {
                foreach (var nonDbsnpId in nonDbsnpIds)
                {
                    dbSnp.Add(nonDbsnpId);
                }
            }

            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
            {
                foreach (var suppAnnotation in annotatedVariant.SupplementaryAnnotations)
                {
                    if (suppAnnotation.SaDataSource.KeyName != DbSnpKeyName)
                    {
                        continue;
                    }
                    foreach (var s in suppAnnotation.GetVcfStrings())
                    {
                        dbSnp.Add(s);
                    }
                }
            }

            return(dbSnp.GetString(""));
        }
Ejemplo n.º 2
0
        private void AddInfoField(IAnnotatedPosition annotatedPosition, StringBuilder sb)
        {
            var infoEntries = new VcfField();
            var infoField   = annotatedPosition.Position.InfoData.UpdatedInfoField;

            if (!string.IsNullOrEmpty(infoField))
            {
                infoEntries.Add(infoField);
            }

            ExtractInfo(annotatedPosition, infoEntries);

            infoField = infoEntries.GetString("");

            // remove .
            if (infoField == ".")
            {
                infoField = "";
            }

            sb.Append(infoField);

            var csqs = new List <CsqEntry>();

            ExtractCsqs(annotatedPosition, csqs);

            if (csqs.Count != 0)
            {
                if (infoField.Length > 0)
                {
                    sb.Append(";");
                }
            }

            // append CSQ tags using delegate from annotator
            sb.Append(GetCsqtAndCsqrVcfInfo(csqs));

            if (csqs.Count == 0 && infoField.Length == 0)
            {
                sb.Append(".");
            }
        }
Ejemplo n.º 3
0
        private static void ExtractInfo(IAnnotatedPosition annotatedPosition, VcfField infoEntries)
        {
            var alleleFreq1000G = new VcfInfoKeyValue("AF1000G");
            var ancestralAllele = new VcfPositionalInfo("AA");
            var phyloP          = new VcfInfoKeyValue("phyloP");

            var suppAnnotationSources = new Dictionary <string, VcfInfoKeyValue>();
            var isSaArrayInfo         = new Dictionary <string, bool>();
            var numInputAltAlleles    = annotatedPosition.Position.AltAlleles.Length;

            foreach (var alternateAllele in annotatedPosition.AnnotatedVariants)
            {
                foreach (var sa in alternateAllele.SupplementaryAnnotations)
                {
                    if (!suppAnnotationSources.ContainsKey(sa.SaDataSource.KeyName))
                    {
                        suppAnnotationSources[sa.SaDataSource.KeyName] = new VcfInfoKeyValue(sa.SaDataSource.VcfkeyName);
                        isSaArrayInfo[sa.SaDataSource.KeyName]         = sa.SaDataSource.IsArray;
                    }
                }
            }

            foreach (var kvp in suppAnnotationSources)
            {
                if (isSaArrayInfo[kvp.Key])
                {
                    continue;
                }
                for (var i = 0; i < numInputAltAlleles; i++)
                {
                    kvp.Value.Add(null);
                }
            }

            for (var i = 0; i < numInputAltAlleles; i++)
            {
                alleleFreq1000G.Add(null);
            }

            var inputGenotypeIndex = GetInputGenotypeIndex(annotatedPosition.Position.AltAlleles, annotatedPosition.AnnotatedVariants);

            // understand the number of annotation contains in the whole vcf line
            for (int i = 0; i < annotatedPosition.AnnotatedVariants.Length; i++)
            {
                var annotatedVariant = annotatedPosition.AnnotatedVariants[i];
                var genotypeIndex    = inputGenotypeIndex[i] + 1;
                if (annotatedVariant.Variant.IsRefMinor)
                {
                    infoEntries.Add("RefMinor");
                }

                phyloP.Add(annotatedVariant.PhylopScore?.ToString(CultureInfo.InvariantCulture));

                foreach (var sa in annotatedVariant.SupplementaryAnnotations)
                {
                    if (!sa.SaDataSource.MatchByAllele && !sa.IsAlleleSpecific && sa.SaDataSource.KeyName != GlobalAlleleKeyName)
                    {
                        continue;
                    }
                    if (sa.SaDataSource.KeyName == DbSnpKeyName)
                    {
                        continue;
                    }
                    if (sa.SaDataSource.KeyName == RefMinorKeyName)
                    {
                        continue;
                    }

                    foreach (var vcfAnnotation in sa.GetVcfStrings())
                    {
                        if (string.IsNullOrEmpty(vcfAnnotation))
                        {
                            continue;
                        }

                        if (sa.SaDataSource.KeyName == OneKgKeyName)
                        {
                            var contents       = vcfAnnotation.Split(';');
                            var freq           = contents[0];
                            var ancestryAllele = string.IsNullOrEmpty(contents[1]) ? null : contents[1];

                            alleleFreq1000G.Add(freq, genotypeIndex);
                            ancestralAllele.AddValue(ancestryAllele);
                            continue;
                        }

                        if (sa.SaDataSource.IsArray && sa.IsAlleleSpecific)
                        {
                            suppAnnotationSources[sa.SaDataSource.KeyName].Add(
                                genotypeIndex.ToString(CultureInfo.InvariantCulture) + '|' + vcfAnnotation);
                        }
                        else if (!sa.SaDataSource.IsArray)
                        {
                            suppAnnotationSources[sa.SaDataSource.KeyName].Add(vcfAnnotation, genotypeIndex);
                        }
                    }
                }
            }

            foreach (var value in suppAnnotationSources.Values)
            {
                infoEntries.Add(value.GetString());
            }

            infoEntries.Add(ancestralAllele.GetString());
            infoEntries.Add(alleleFreq1000G.GetString());
            infoEntries.Add(phyloP.GetString());
        }