Ejemplo n.º 1
0
        /// <summary>
        /// Calls the variants.
        ///
        /// Should only be used internally as assumptions are made that the alignments are left-aligned and fulfill certain criteria.
        /// </summary>
        /// <returns>The variants.</returns>
        /// <param name="refSeq">Reference seq.</param>
        /// <param name="querySeq">Query seq.</param>
        /// <param name="originallyReverseComplemented">If set to <c>true</c> the query sequence was originally reverse complemented. (this affects QV value scoring)</param>
        internal static List <Variant> CallVariants(byte[] refSeq, BPandQV[] querySeq, bool originallyReverseComplemented)
        {
            if (originallyReverseComplemented)
            {
                AlignmentUtils.ReverseQVValuesForHomopolymers(querySeq);
            }
            List <Variant> variants = new List <Variant>();

            // Now call variants.
            var gap      = DnaAlphabet.Instance.Gap;
            int i        = 0;
            int refPos   = 0;
            int queryPos = 0;

            while (i < refSeq.Length)
            {
                if (refSeq[i] == gap)
                {
                    int len         = AlignmentUtils.GetGapLength(i, refSeq);
                    var nextBasePos = (i + len);
                    // Should alway be true as we don't end in gaps
                    Debug.Assert(nextBasePos < refSeq.Length);
                    var hplenAndChar = determineHomoPolymerLength(nextBasePos, refSeq);
                    var bases        = getBases(querySeq, i, len);
                    var newVariant   = new IndelVariant(refPos - 1, len, bases, IndelType.Insertion,
                                                        hplenAndChar.Item2, hplenAndChar.Item1,
                                                        (i == 0 || (i + len + hplenAndChar.Item1) >= refSeq.Length));
                    newVariant.QV = querySeq[queryPos].QV;
                    variants.Add(newVariant);
                    i        += len;
                    queryPos += len;
                }
                else if (querySeq[i].BP == gap)
                {
                    int len          = AlignmentUtils.GetGapLength(i, querySeq);
                    var bases        = getBases(refSeq, i, len);
                    var hplenAndChar = determineHomoPolymerLength(i, refSeq);
                    var newVariant   = new IndelVariant(refPos - 1, len, bases,
                                                        IndelType.Deletion, hplenAndChar.Item2,
                                                        hplenAndChar.Item1, (i == 0 || (i + len + hplenAndChar.Item1) >= refSeq.Length));

                    /* An insertion mutation occurs BEFORE pos, so normally we get the next base
                     * or the last one if it's a reverse complemented alignment.  However, this is not true if
                     * it is a homopolymer because what would have been the previous position is the next position
                     * after left aligning and reversing the position of the QV value.
                     *
                     * Consider the following
                     * --*-       -*--
                     * A-TA   --> TA-T
                     * AGTA       TACT
                     *
                     * However,
                     * --*--         --*--
                     * A-TTA   ----> T-AAT
                     * ATTTA         TAAAT
                     *
                     */
                    if ((i + len) < querySeq.Length)
                    {
                        var qc_pos = originallyReverseComplemented ? i - 1 : i + len;
                        if (newVariant.InHomopolymer)
                        {
                            qc_pos = i + len;
                        }
                        newVariant.QV = querySeq[qc_pos].QV;
                    }
                    variants.Add(newVariant);
                    i      += len;
                    refPos += len;
                }
                else
                {
                    if (querySeq[i].BP != refSeq[i])
                    {
                        var newVariant = new SNPVariant(refPos, (char)querySeq[i].BP, (char)refSeq[i], (i == 0 || i == (refSeq.Length - 1)));
                        newVariant.QV = querySeq [queryPos].QV;
                        variants.Add(newVariant);
                    }
                    i++; refPos++; queryPos++;
                }
            }
            return(variants);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Calls the variants.
        /// 
        /// Should only be used internally as assumptions are made that the alignments are left-aligned and fulfill certain criteria.
        /// </summary>
        /// <returns>The variants.</returns>
        /// <param name="refSeq">Reference seq.</param>
        /// <param name="querySeq">Query seq.</param>
        /// <param name="originallyReverseComplemented">If set to <c>true</c> the query sequence was originally reverse complemented. (this affects QV value scoring)</param>
        internal static List<Variant> CallVariants(byte[] refSeq, BPandQV[] querySeq, bool originallyReverseComplemented)
        {
            if (originallyReverseComplemented) {
                AlignmentUtils.ReverseQVValuesForHomopolymers (querySeq);
            }
            List<Variant> variants = new List<Variant>();

            // Now call variants.
            var gap = DnaAlphabet.Instance.Gap;
            int i = 0;
            int refPos = 0;
            while( i < refSeq.Length)
            {
                if (refSeq[i] == gap)
                {
                    int len = AlignmentUtils.GetGapLength(i, refSeq);
                    var nextBasePos = (i + len);
                    // Should alway be true as we don't end in gaps
                    Debug.Assert (nextBasePos < refSeq.Length);
                    var hplenAndChar = determineHomoPolymerLength (nextBasePos, refSeq);
                    var bases = getBases(querySeq, i, len);
                    var newVariant = new IndelVariant(refPos - 1, len, bases, IndelType.Insertion,  
                                                      hplenAndChar.Item2, hplenAndChar.Item1, 
                                                      (i == 0 || (i + len + hplenAndChar.Item1) >= refSeq.Length));                   
                    newVariant.QV = querySeq[i].QV;
                    variants.Add(newVariant);
                    i += len;
                }
                else if (querySeq[i].BP == gap)
                {
                    int len = AlignmentUtils.GetGapLength(i, querySeq);
                    var bases = getBases(refSeq, i, len);
                    var hplenAndChar = determineHomoPolymerLength (i, refSeq);
                    var newVariant = new IndelVariant(refPos - 1, len, bases, 
                                                      IndelType.Deletion, hplenAndChar.Item2, 
                                                      hplenAndChar.Item1, (i == 0 || (i + len + hplenAndChar.Item1) >= refSeq.Length));
                    /* An insertion mutation occurs BEFORE pos, so normally we get the next base
                     * or the last one if it's a reverse complemented alignment.  However, this is not true if 
                     * it is a homopolymer because what would have been the previous position is the next position
                     * after left aligning and reversing the position of the QV value.
                     * 
                     * Consider the following
                     * --*-       -*--
                     * A-TA   --> TA-T
                     * AGTA       TACT
                     * 
                     * However, 
                     * --*--         --*--
                     * A-TTA   ----> T-AAT
                     * ATTTA         TAAAT
                     * 
                     */
                    if ((i + len ) < querySeq.Length) {
                        
                        var qc_pos = originallyReverseComplemented ? i - 1 : i + len;
                        if (newVariant.InHomopolymer) {
                            qc_pos = i + len;
                        }
                        newVariant.QV = querySeq[qc_pos].QV;
                    }
                    variants.Add(newVariant);
                    i += len;
                    refPos += len;
                }
                else
                {
                    if (querySeq[i].BP != refSeq[i])
                    {
                        var newVariant = new SNPVariant(refPos, (char) querySeq[i].BP, (char)refSeq[i], (i ==0 || i == (refSeq.Length -1)));
                        newVariant.QV = querySeq [i].QV;
                        variants.Add(newVariant);
                    }
                    i++; refPos++;
                }
            }
            return variants;
        }