示例#1
0
        /// <summary>
        /// Loop over variants like this: foreach (VcfVariant variant in reader.GetVariants())
        /// </summary>
        public IEnumerable <VcfVariant> GetVariants()
        {
            // sanity check: make sure the file is open
            if (!IsOpen)
            {
                yield break;
            }

            while (true)
            {
                // grab the next vcf line
                string line = Reader.ReadLine();
                if (line == null)
                {
                    break;
                }

                VcfVariant variant = new VcfVariant();

                // split the columns and assign them to VcfVariant
                string[] cols = line.Split('\t');

                // convert the columns to a variant
                ConvertColumnsToVariant(cols, variant);
                if (RequireGenotypes && (variant.Genotypes == null || variant.Genotypes.Count == 0))
                {
                    throw new InvalidDataException("Missing genotype columns in VCF file");
                }
                yield return(variant);
            }
        }
示例#2
0
        /// <summary>
        ///     Retrieves the next available variant and returns false if no variants are available.
        /// </summary>
        public bool GetNextVariant(VcfVariant variant)
        {
            // sanity check: make sure the file is open
            if (!IsOpen)
            {
                return(false);
            }

            // grab the next vcf line
            string line = Reader.ReadLine();

            if (line == null)
            {
                return(false);
            }

            // split the columns and assign them to VcfVariant
            string[] cols = line.Split('\t');

            // convert the columns to a variant
            ConvertColumnsToVariant(cols, variant);
            if (RequireGenotypes && variant.Genotypes.Count == 0)
            {
                throw new InvalidDataException("Missing genotype columns in VCF file");
            }

            return(true);
        }
示例#3
0
        /// <summary>
        /// Assign a variant type to a particular allele.  The rules are as follows:
        /// - If ref==alt, type is reference.
        /// - Otherwise, trim off any common prefix and any common suffix.  Let |ref| denote the length of the
        ///   reference allele after trimming, and |alt| denote the length of the alt allele after trimming.
        /// - If |ref|=0, it's an insertion
        /// - If |alt|=0, it's a deletion
        /// - If |ref|=|alt|=1, it's a SNV
        /// - If |ref| = |alt| > 1, it's a MNP
        /// - If |ref|>0 and |alt|>0 and |ref| != |alt|, it's a complex event
        /// </summary>
        private static VariantType GetAlleleVariantType(VcfVariant variant, int haplotype)
        {
            if (haplotype == 0)
            {
                return(VariantType.Reference);
            }
            if (haplotype == -1)
            {
                return(VariantType.Missing);
            }

            string altAllele = variant.VariantAlleles[haplotype - 1];

            return(GetAlleleVariantType(variant.ReferenceAllele, altAllele));
        }
示例#4
0
        /// <summary>
        ///     populates a vcf variant object given an array of vcf columns
        /// </summary>
        protected void ConvertColumnsToVariant(string[] cols, VcfVariant variant)
        {
            variant.ReferenceName     = cols[VcfCommon.ChromIndex];
            variant.ReferencePosition = int.Parse(cols[VcfCommon.PosIndex]);
            variant.Identifier        = cols[VcfCommon.IDIndex];
            variant.ReferenceAllele   = cols[VcfCommon.RefIndex];
            variant.Filters           = cols[VcfCommon.FilterIndex];

            if (cols[VcfCommon.QualIndex] == ".")
            {
                variant.HasQuality = false;
            }
            double.TryParse(cols[VcfCommon.QualIndex], out variant.Quality);             // CFTR uses a ".", which is not actually legal... (actually, vcf 4.1 does allow the missing value "." here. Strelka uses it)

            // parse the variant alleles
            variant.VariantAlleles = cols[VcfCommon.AltIndex].Split(',');

            // parse the info fields
            //variant.InfoFields.Clear();
            variant.InfoFields = new Dictionary <string, string>(StringComparer.OrdinalIgnoreCase);
            string InfoData = cols[VcfCommon.InfoIndex];

            if (InfoData == ".")
            {
                InfoData = "";                              // Special case: a "." in the INFO field should be treated like an empty string.
            }
            string[] infoCols = InfoData.Split(InfoSplitChars, StringSplitOptions.RemoveEmptyEntries);

            int numInfoCols = infoCols.Length;

            if ((variant.InfoTagOrder == null) || (numInfoCols != variant.InfoTagOrder.Length))
            {
                variant.InfoTagOrder = new string[numInfoCols];
            }

            for (int infoColIndex = 0; infoColIndex < numInfoCols; infoColIndex++)
            {
                string   infoField    = infoCols[infoColIndex];
                string[] infoFieldKvp = infoField.Split('=');
                variant.InfoTagOrder[infoColIndex]  = infoFieldKvp[0];
                variant.InfoFields[infoFieldKvp[0]] = (infoFieldKvp.Length == 1 ? null : infoFieldKvp[1]);
            }

            if (cols.Length > VcfCommon.GenotypeIndex)             // Genotype columns present
            {
                // parse the genotype format field
                if (cols[VcfCommon.FormatIndex] != GenotypeTagString)
                {
                    GenotypeTagString = cols[VcfCommon.FormatIndex];
                    GenotypeTagOrder  = GenotypeTagString.Split(':');
                }
                variant.GenotypeTagOrder = GenotypeTagOrder;

                // parse the genotype data for each sample
                variant.Genotypes = new List <Dictionary <string, string> >();
                for (int sampleIndex = 0; sampleIndex < this.Samples.Count; sampleIndex++)
                {
                    string genotypeColumn = cols[VcfCommon.GenotypeIndex + sampleIndex];
                    if (genotypeColumn == ".")
                    {
                        variant.Genotypes.Add(null);
                    }
                    else
                    {
                        string[] genotypeCols = genotypeColumn.Split(':');
                        variant.Genotypes.Add(ParseGenotype(variant.GenotypeTagOrder, genotypeCols));
                    }
                }

                // specify the variant type:
                AssignVariantType(variant);
            }
        }
示例#5
0
        private static void AssignVariantType(VcfVariant variant)
        {
            string genotype = null;

            if (variant.Genotypes[0] != null && variant.Genotypes[0].ContainsKey("GT"))
            {
                genotype = variant.Genotypes[0]["GT"];
            }

            // sanity check: support missing genotypes
            if (genotype == null || genotype == "./." || genotype == ".")
            {
                variant.VarType1 = VariantType.Missing;
                variant.VarType2 = VariantType.Missing;
                variant.VarType  = VariantType.Missing;
                return;
            }
            // Handle usual cases like 0/0, 0/1, 1/0, 1/1 as well as
            // special cases like ., ./., ./1, 1/.:
            int haplotypeA = int.TryParse(genotype.Substring(0, 1), out haplotypeA) ? haplotypeA : -1;
            int haplotypeB = genotype.Length >= 3 && int.TryParse(genotype.Substring(2, 1), out haplotypeB) ? haplotypeB : -1;

            // Treat things like ./1 or 0/. as homozygous:
            if (haplotypeA == -1)
            {
                haplotypeA = haplotypeB;
            }
            if (haplotypeB == -1)
            {
                haplotypeB = haplotypeA;
            }

            variant.VarType1 = GetAlleleVariantType(variant, haplotypeA);
            variant.VarType2 = GetAlleleVariantType(variant, haplotypeB);

            switch (variant.VarType1)
            {
            case VariantType.Reference:
                variant.VarType = variant.VarType2;
                break;

            case VariantType.SNV:
                switch (variant.VarType2)
                {
                case VariantType.Reference:
                    variant.VarType = VariantType.SNV;
                    break;

                case VariantType.SNV:
                    variant.VarType = VariantType.SNV;
                    break;

                case VariantType.Insertion:
                    variant.VarType = VariantType.SNVInsertion;
                    break;

                case VariantType.Deletion:
                    variant.VarType = VariantType.SNVDeletion;
                    break;

                default:
                    variant.VarType = VariantType.Complex;
                    break;
                }
                break;

            case VariantType.MNP:
                switch (variant.VarType2)
                {
                case VariantType.Reference:
                    variant.VarType = VariantType.MNP;
                    break;

                case VariantType.MNP:
                    variant.VarType = VariantType.MNP;
                    break;

                default:
                    variant.VarType = VariantType.Complex;
                    break;
                }
                break;

            case VariantType.Insertion:
                switch (variant.VarType2)
                {
                case VariantType.Reference:
                    variant.VarType = VariantType.Insertion;
                    break;

                case VariantType.SNV:
                    variant.VarType = VariantType.SNVInsertion;
                    break;

                case VariantType.Insertion:
                    variant.VarType = VariantType.Insertion;
                    break;

                case VariantType.Deletion:
                    variant.VarType = VariantType.InsertionDeletion;
                    break;

                default:
                    variant.VarType = VariantType.Complex;
                    break;
                }
                break;

            case VariantType.Deletion:
                switch (variant.VarType2)
                {
                case VariantType.Reference:
                    variant.VarType = VariantType.Deletion;
                    break;

                case VariantType.SNV:
                    variant.VarType = VariantType.SNVDeletion;
                    break;

                case VariantType.Insertion:
                    variant.VarType = VariantType.InsertionDeletion;
                    break;

                case VariantType.Deletion:
                    variant.VarType = VariantType.Deletion;
                    break;

                default:
                    variant.VarType = VariantType.Complex;
                    break;
                }
                break;

            default:
                variant.VarType = VariantType.Complex;
                break;
            }
        }