Beispiel #1
0
        /// <summary>
        /// Internal code to determine the type of the genotype from the alleles vector </summary>
        /// <returns> the type </returns>
        protected internal virtual GenotypeType determineType()         // we should never call if already calculated
        {
            // TODO -- this code is slow and could be optimized for the diploid case
            IList <Allele> alleles = Alleles;

            if (alleles.Count == 0)
            {
                return(GenotypeType.UNAVAILABLE);
            }

            bool   sawNoCall = false, sawMultipleAlleles = false;
            Allele observedAllele = null;

            foreach (Allele allele in alleles)
            {
                if (allele.NoCall)
                {
                    sawNoCall = true;
                }
                else if (observedAllele == null)
                {
                    observedAllele = allele;
                }
                else if (!allele.Equals(observedAllele))
                {
                    sawMultipleAlleles = true;
                }
            }

            if (sawNoCall)
            {
                if (observedAllele == null)
                {
                    return(GenotypeType.NO_CALL);
                }
                return(GenotypeType.MIXED);
            }

            if (observedAllele == null)
            {
                throw new Exception("BUG: there are no alleles present in this genotype but the alleles list is not null");
            }

            return(sawMultipleAlleles ? GenotypeType.HET : observedAllele.Reference?GenotypeType.HOM_REF : GenotypeType.HOM_VAR);
        }
Beispiel #2
0
        private static VariantType typeOfBiallelicVariant(Allele reference, Allele allele)
        {
            if (reference.Symbolic)
            {
                throw new Exception("Unexpected error: encountered a record with a symbolic reference allele");
            }

            if (allele.Symbolic)
            {
                return(VariantType.SYMBOLIC);
            }

            if (reference.Length == allele.Length)
            {
                if (allele.Length == 1)
                {
                    return(VariantType.SNP);
                }
                else
                {
                    return(VariantType.MNP);
                }
            }

            // Important note: previously we were checking that one allele is the prefix of the other.  However, that's not an
            // appropriate check as can be seen from the following example:
            // REF = CTTA and ALT = C,CT,CA
            // This should be assigned the INDEL type but was being marked as a MIXED type because of the prefix check.
            // In truth, it should be absolutely impossible to return a MIXED type from this method because it simply
            // performs a pairwise comparison of a single alternate allele against the reference allele (whereas the MIXED type
            // is reserved for cases of multiple alternate alleles of different types).  Therefore, if we've reached this point
            // in the code (so we're not a SNP, MNP, or symbolic allele), we absolutely must be an INDEL.

            return(VariantType.INDEL);

            // old incorrect logic:
            // if (oneIsPrefixOfOther(ref, allele))
            //     return Type.INDEL;
            // else
            //     return Type.MIXED;
        }
Beispiel #3
0
        /// <summary>
        /// Compute the end position for this VariantContext from the alleles themselves
        ///
        /// In the trivial case this is a single BP event and end = start (open intervals)
        /// In general the end is start + ref length - 1, handling the case where ref length == 0
        /// However, if alleles contains a symbolic allele then we use endForSymbolicAllele in all cases
        /// </summary>
        /// <param name="alleles"> the list of alleles to consider.  The reference allele must be the first one </param>
        /// <param name="start"> the known start position of this event </param>
        /// <param name="endForSymbolicAlleles"> the end position to use if any of the alleles is symbolic.  Can be -1
        ///                              if no is expected but will throw an error if one is found </param>
        /// <returns> this builder </returns>
        public static int ComputeEndFromAlleles(IList <Allele> alleles, int start, int endForSymbolicAlleles)
        {
            Allele reference = alleles [0];

            if (reference.NonReference)
            {
                throw new Exception("computeEndFromAlleles requires first allele to be reference");
            }
            if (VariantContext.HasSymbolicAlleles(alleles))
            {
                if (endForSymbolicAlleles == -1)
                {
                    throw new Exception("computeEndFromAlleles found a symbolic allele but endForSymbolicAlleles was provided");
                }
                return(endForSymbolicAlleles);
            }
            else
            {
                return(start + Math.Max(reference.Length - 1, 0));
            }
        }
Beispiel #4
0
        /// <summary>
        /// check to make sure the allele is an acceptable allele </summary>
        /// <param name="allele"> the allele to check </param>
        /// <param name="isRef"> are we the reference allele? </param>
        /// <param name="lineNo">  the line number for this record </param>
        private static void checkAllele(string allele, bool isRef, int lineNo)
        {
            if (allele == null || allele.Length == 0)
            {
                generateException("Empty alleles are not permitted in VCF records", lineNo);
            }

            if (MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.Length > MAX_ALLELE_SIZE_BEFORE_WARNING)
            {
                throw new VCFParsingError(string.Format("Allele detected with length {0:D} exceeding max size {1:D} at approximately line {2:D}, likely resulting in degraded VCF processing performance", allele.Length, MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo));
            }

            if (isSymbolicAllele(allele))
            {
                if (isRef)
                {
                    generateException("Symbolic alleles not allowed as reference allele: " + allele, lineNo);
                }
            }
            else
            {
                // check for VCF3 insertions or deletions
                if ((allele [0] == VCFConstants.DELETION_ALLELE_v3) || (allele [0] == VCFConstants.INSERTION_ALLELE_v3))
                {
                    generateException("Insertions/Deletions are not supported when reading 3.x VCF's. Please" + " convert your file to VCF4 using VCFTools, available at http://vcftools.sourceforge.net/index.html", lineNo);
                }

                if (!Allele.AcceptableAlleleBases(allele))
                {
                    generateException("Unparsable vcf record with allele " + allele, lineNo);
                }

                if (isRef && allele.Equals(VCFConstants.EMPTY_ALLELE))
                {
                    generateException("The reference allele cannot be missing", lineNo);
                }
            }
        }
Beispiel #5
0
        /// <summary>
        /// parse out the alleles </summary>
        /// <param name="reference"> the reference base </param>
        /// <param name="alts"> a string of alternates to break into alleles </param>
        /// <param name="lineNo">  the line number for this record </param>
        /// <returns> a list of alleles, and a pair of the shortest and longest sequence </returns>
        protected internal static IList <Allele> parseAlleles(string reference, string alts, int lineNo)
        {
            IList <Allele> alleles = new List <Allele> (2);           // we are almost always biallelic

            // ref
            checkAllele(reference, true, lineNo);
            Allele refAllele = Allele.Create(reference, true);

            alleles.Add(refAllele);

            if (alts.IndexOf(",") == -1)                // only 1 alternatives, don't call string split
            {
                parseSingleAltAllele(alleles, alts, lineNo);
            }
            else
            {
                foreach (string alt in alts.Split(VCFConstants.COMMA_AS_CHAR_ARRAY, StringSplitOptions.RemoveEmptyEntries))
                {
                    parseSingleAltAllele(alleles, alt, lineNo);
                }
            }
            return(alleles);
        }
Beispiel #6
0
        /// <summary>
        /// Add a record to the file
        /// </summary>
        /// <param name="vc">The Variant Context object </param>
        protected string getVariantLinetoWrite(VariantContext vc)
        {
            if (doNotWriteGenotypes)
            {
                vc = (new VariantContextBuilder(vc)).noGenotypes().make();
            }
            try
            {
                //Convert alleles to 1,2,3,etc. numbering
                IDictionary <Allele, string> alleleMap = buildAlleleMap(vc);
                // CHROM
                StringBuilder lineToWrite = new StringBuilder();
                //Add chr, pos, id, ref
                lineToWrite.Append(String.Join(VCFConstants.FIELD_SEPARATOR, vc.Chr, vc.Start.ToString(), vc.ID, vc.Reference.DisplayString));
                // ALT
                if (vc.Variant)
                {
                    Allele altAllele = vc.GetAlternateAllele(0);
                    string alt       = altAllele.DisplayString;
                    lineToWrite.Append(alt);

                    for (int i = 1; i < vc.AlternateAlleles.Count; i++)
                    {
                        altAllele = vc.GetAlternateAllele(i);
                        alt       = altAllele.DisplayString;
                        lineToWrite.Append(",");
                        lineToWrite.Append(alt);
                    }
                }
                else
                {
                    lineToWrite.Append(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD);
                }
                lineToWrite.Append(VCFConstants.FIELD_SEPARATOR);

                // QUAL
                if (!vc.HasLog10PError)
                {
                    lineToWrite.Append(VCFConstants.MISSING_VALUE_v4);
                }
                else
                {
                    lineToWrite.Append(formatQualValue(vc.PhredScaledQual));
                }
                lineToWrite.Append(VCFConstants.FIELD_SEPARATOR);

                // FILTER
                string filters = getFilterString(vc);
                lineToWrite.Append(filters);
                lineToWrite.Append(VCFConstants.FIELD_SEPARATOR);

                // INFO
                IDictionary <string, string> infoFields = new SortedDictionary <string, string>();
                foreach (KeyValuePair <string, object> field in vc.Attributes)
                {
                    string key = field.Key;
                    if (!mHeader.hasInfoLine(key))
                    {
                        fieldIsMissingFromHeaderError(vc, key, "INFO");
                    }
                    string outputValue = formatVCFField(field.Value);
                    if (outputValue != null)
                    {
                        infoFields[key] = outputValue;
                    }
                }
                lineToWrite.Append(getInfoString(infoFields));;

                // FORMAT
                GenotypesContext gc = vc.Genotypes;
                if (gc.LazyWithData && ((LazyGenotypesContext)gc).UnparsedGenotypeData is string)
                {
                    lineToWrite.Append(VCFConstants.FIELD_SEPARATOR);
                    lineToWrite.Append(((LazyGenotypesContext)gc).UnparsedGenotypeData.ToString());
                }
                else
                {
                    IList <string> genotypeAttributeKeys = calcVCFGenotypeKeys(vc);
                    if (genotypeAttributeKeys.Count > 0)
                    {
                        foreach (String format in genotypeAttributeKeys)
                        {
                            if (!mHeader.hasFormatLine(format))
                            {
                                fieldIsMissingFromHeaderError(vc, format, "FORMAT");
                            }
                        }

                        string genotypeFormatString = String.Join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
                        lineToWrite.Append(VCFConstants.FIELD_SEPARATOR);
                        lineToWrite.Append(genotypeFormatString);
                        lineToWrite.Append(getGenotypeDataText(vc, alleleMap, genotypeAttributeKeys));
                    }
                }
                lineToWrite.Append("\n");
                return(lineToWrite.ToString());
            }
            catch (IOException e)
            {
                throw new Exception("Unable to write the VCF object:\n " + vc.ToString() + "\n", e);
            }
        }
Beispiel #7
0
 /// <summary>
 /// Returns how many times allele appears in this genotype object?
 /// </summary>
 /// <param name="allele"> </param>
 /// <returns> a value >= 0 indicating how many times the allele occurred in this sample's genotype </returns>
 public virtual int CountAlleles(Allele allele)
 {
     return(Alleles.Count(x => x.Equals(allele)));
 }