/// <summary> /// Calls the variants. /// /// Should only be used internally as assumptions are made that the alignments are left-aligned and fulfill certain criteria. /// </summary> /// <returns>The variants.</returns> /// <param name="refSeq">Reference seq.</param> /// <param name="querySeq">Query seq.</param> /// <param name="originallyReverseComplemented">If set to <c>true</c> the query sequence was originally reverse complemented. (this affects QV value scoring)</param> internal static List <Variant> CallVariants(byte[] refSeq, BPandQV[] querySeq, bool originallyReverseComplemented) { if (originallyReverseComplemented) { AlignmentUtils.ReverseQVValuesForHomopolymers(querySeq); } List <Variant> variants = new List <Variant>(); // Now call variants. var gap = DnaAlphabet.Instance.Gap; int i = 0; int refPos = 0; int queryPos = 0; while (i < refSeq.Length) { if (refSeq[i] == gap) { int len = AlignmentUtils.GetGapLength(i, refSeq); var nextBasePos = (i + len); // Should alway be true as we don't end in gaps Debug.Assert(nextBasePos < refSeq.Length); var hplenAndChar = determineHomoPolymerLength(nextBasePos, refSeq); var bases = getBases(querySeq, i, len); var newVariant = new IndelVariant(refPos - 1, len, bases, IndelType.Insertion, hplenAndChar.Item2, hplenAndChar.Item1, (i == 0 || (i + len + hplenAndChar.Item1) >= refSeq.Length)); newVariant.QV = querySeq[queryPos].QV; variants.Add(newVariant); i += len; queryPos += len; } else if (querySeq[i].BP == gap) { int len = AlignmentUtils.GetGapLength(i, querySeq); var bases = getBases(refSeq, i, len); var hplenAndChar = determineHomoPolymerLength(i, refSeq); var newVariant = new IndelVariant(refPos - 1, len, bases, IndelType.Deletion, hplenAndChar.Item2, hplenAndChar.Item1, (i == 0 || (i + len + hplenAndChar.Item1) >= refSeq.Length)); /* An insertion mutation occurs BEFORE pos, so normally we get the next base * or the last one if it's a reverse complemented alignment. However, this is not true if * it is a homopolymer because what would have been the previous position is the next position * after left aligning and reversing the position of the QV value. * * Consider the following * --*- -*-- * A-TA --> TA-T * AGTA TACT * * However, * --*-- --*-- * A-TTA ----> T-AAT * ATTTA TAAAT * */ if ((i + len) < querySeq.Length) { var qc_pos = originallyReverseComplemented ? i - 1 : i + len; if (newVariant.InHomopolymer) { qc_pos = i + len; } newVariant.QV = querySeq[qc_pos].QV; } variants.Add(newVariant); i += len; refPos += len; } else { if (querySeq[i].BP != refSeq[i]) { var newVariant = new SNPVariant(refPos, (char)querySeq[i].BP, (char)refSeq[i], (i == 0 || i == (refSeq.Length - 1))); newVariant.QV = querySeq [queryPos].QV; variants.Add(newVariant); } i++; refPos++; queryPos++; } } return(variants); }
/// <summary> /// Calls the variants. /// /// Should only be used internally as assumptions are made that the alignments are left-aligned and fulfill certain criteria. /// </summary> /// <returns>The variants.</returns> /// <param name="refSeq">Reference seq.</param> /// <param name="querySeq">Query seq.</param> /// <param name="originallyReverseComplemented">If set to <c>true</c> the query sequence was originally reverse complemented. (this affects QV value scoring)</param> internal static List<Variant> CallVariants(byte[] refSeq, BPandQV[] querySeq, bool originallyReverseComplemented) { if (originallyReverseComplemented) { AlignmentUtils.ReverseQVValuesForHomopolymers (querySeq); } List<Variant> variants = new List<Variant>(); // Now call variants. var gap = DnaAlphabet.Instance.Gap; int i = 0; int refPos = 0; while( i < refSeq.Length) { if (refSeq[i] == gap) { int len = AlignmentUtils.GetGapLength(i, refSeq); var nextBasePos = (i + len); // Should alway be true as we don't end in gaps Debug.Assert (nextBasePos < refSeq.Length); var hplenAndChar = determineHomoPolymerLength (nextBasePos, refSeq); var bases = getBases(querySeq, i, len); var newVariant = new IndelVariant(refPos - 1, len, bases, IndelType.Insertion, hplenAndChar.Item2, hplenAndChar.Item1, (i == 0 || (i + len + hplenAndChar.Item1) >= refSeq.Length)); newVariant.QV = querySeq[i].QV; variants.Add(newVariant); i += len; } else if (querySeq[i].BP == gap) { int len = AlignmentUtils.GetGapLength(i, querySeq); var bases = getBases(refSeq, i, len); var hplenAndChar = determineHomoPolymerLength (i, refSeq); var newVariant = new IndelVariant(refPos - 1, len, bases, IndelType.Deletion, hplenAndChar.Item2, hplenAndChar.Item1, (i == 0 || (i + len + hplenAndChar.Item1) >= refSeq.Length)); /* An insertion mutation occurs BEFORE pos, so normally we get the next base * or the last one if it's a reverse complemented alignment. However, this is not true if * it is a homopolymer because what would have been the previous position is the next position * after left aligning and reversing the position of the QV value. * * Consider the following * --*- -*-- * A-TA --> TA-T * AGTA TACT * * However, * --*-- --*-- * A-TTA ----> T-AAT * ATTTA TAAAT * */ if ((i + len ) < querySeq.Length) { var qc_pos = originallyReverseComplemented ? i - 1 : i + len; if (newVariant.InHomopolymer) { qc_pos = i + len; } newVariant.QV = querySeq[qc_pos].QV; } variants.Add(newVariant); i += len; refPos += len; } else { if (querySeq[i].BP != refSeq[i]) { var newVariant = new SNPVariant(refPos, (char) querySeq[i].BP, (char)refSeq[i], (i ==0 || i == (refSeq.Length -1))); newVariant.QV = querySeq [i].QV; variants.Add(newVariant); } i++; refPos++; } } return variants; }