コード例 #1
0
        /// <summary>
        /// Unpacks the sequence so that it is aligned to the reference at the given start but ignoring insertions.
        /// Useful for Depth of Coverage;
        /// </summary>
        public void ProcessCountCoverageFromSequence(CompactSAMSequence orgSeq)
        {
            if (orgSeq == null || orgSeq.RName != StaticResources.MT_CHROMOSOME_NAME)
            {
                return;
            }
            string CIGAR = orgSeq.CIGAR;

            if (!CigarUtils.NoInformationCigar(CIGAR))
            {
                int curRef   = orgSeq.Pos - 1;
                var elements = CigarUtils.GetCigarElements(CIGAR);
                foreach (var v in elements)
                {
                    var len = v.Length;
                    switch (v.Operation)
                    {
                    case 'P':                     //padding (Silent deltions from padded reference)
                    case 'N':                     //skipped region from reference
                        throw new Exception("Not built to handle clipping yet");

                    case 'M':                     //match or mismatch
                    case '=':                     //match
                    case 'X':                     //mismatch
                        for (int k = 0; k < len; k++)
                        {
                            if (curRef >= StaticResources.CRS_LENGTH)
                            {
                                Debug.WriteLine("Seq: " + orgSeq.ID + " is aligned past the MT DNA reference genome");
                                break;
                            }
                            depthCounts [curRef] = depthCounts [curRef] + 1.0;
                            curRef++;
                        }
                        break;

                    case 'I':                    //insertion to the reference
                        break;

                    case 'D':                    //Deletion from the reference
                        curRef += len;
                        break;

                    case 'S':                    //soft clipped
                    case 'H':                    //had clipped
                        break;

                    default:
                        throw new FormatException("Unexpected SAM Cigar element found " + v.Operation.ToString());
                    }
                }
            }
        }
コード例 #2
0
 /// <summary>
 /// Method throws an exception if sequence violates any assumption made by this class anywhere.
 /// Avoids, separate checks within each method.
 /// </summary>
 /// <param name="seq"></param>
 private static bool validateSequence(CompactSAMSequence seq)
 {
     if (seq == null)
     {
         throw new ArgumentNullException("seq");
     }
     if (String.IsNullOrEmpty(seq.RName) ||
         seq.RefEndPos <= seq.Pos ||
         String.IsNullOrEmpty(seq.CIGAR) ||
         seq.CIGAR == "*")
     {
         return(false);
         //throw new ArgumentException("Tried to build a pileup with an invalid sequence.  Sequence was:\n"+
         //    seq.ToString());
     }
     return(true);
 }
コード例 #3
0
        protected CompactSAMSequence GetAlignedSequence()
        {
            byte[] array = new byte[4];
            ReadUnCompressedData(array, 0, 4);
            int blockLen = Helper.GetInt32(array, 0);

            byte[] alignmentBlock = new byte[blockLen];
            ReadUnCompressedData(alignmentBlock, 0, blockLen);
            int    value;
            UInt32 UnsignedValue;
            // 0-4 bytes
            int    refSeqIndex = Helper.GetInt32(alignmentBlock, 0);
            string RName;

            if (refSeqIndex == -1)
            {
                RName = "*";
            }
            else
            {
                RName = refSeqNames[refSeqIndex];
            }

            // 4-8 bytes
            int Pos = Helper.GetInt32(alignmentBlock, 4) + 1;

            // 8 - 12 bytes "bin<<16|mapQual<<8|read_name_len"
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 8);
            int queryNameLen = (int)(UnsignedValue & 0x000000FF);

            // 12 - 16 bytes
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 12);
            int flagValue = (int)(UnsignedValue & 0xFFFF0000) >> 16;
            int cigarLen  = (int)(UnsignedValue & 0x0000FFFF);

            //// 16-20 bytes
            int readLen = Helper.GetInt32(alignmentBlock, 16);

            // 32-(32+readLen) bytes
            string        name       = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, 32, queryNameLen - 1);
            StringBuilder strbuilder = new StringBuilder();
            int           startIndex = 32 + queryNameLen;

            for (int i = startIndex; i < (startIndex + cigarLen * 4); i += 4)
            {
                // Get the CIGAR operation length stored in first 28 bits.
                UInt32 cigarValue = Helper.GetUInt32(alignmentBlock, i);
                strbuilder.Append(((cigarValue & 0xFFFFFFF0) >> 4).ToString(CultureInfo.InvariantCulture));

                // Get the CIGAR operation stored in last 4 bits.
                value = (int)cigarValue & 0x0000000F;

                // MIDNSHP=>0123456
                switch (value)
                {
                case 0:
                    strbuilder.Append("M");
                    break;

                case 1:
                    strbuilder.Append("I");
                    break;

                case 2:
                    strbuilder.Append("D");
                    break;

                case 3:
                    strbuilder.Append("N");
                    break;

                case 4:
                    strbuilder.Append("S");
                    break;

                case 5:
                    strbuilder.Append("H");
                    break;

                case 6:
                    strbuilder.Append("P");
                    break;

                case 7:
                    strbuilder.Append("=");
                    break;

                case 8:
                    strbuilder.Append("X");
                    break;

                default:
                    throw new FileFormatException(Properties.Resource.BAM_InvalidCIGAR);
                }
            }

            string cigar = strbuilder.ToString();

            if (string.IsNullOrWhiteSpace(cigar))
            {
                cigar = "*";
            }
            startIndex += cigarLen * 4;
            //strbuilder = new StringBuilder();
            byte[] seqData      = new byte[readLen];
            int    seqDataIndex = 0;
            int    index        = startIndex;

            for (; index < (startIndex + (readLen + 1) / 2) - 1; index++)
            {
                // Get first 4 bit value
                value = (alignmentBlock[index] & 0xF0) >> 4;
                //strbuilder.Append(GetSeqChar(value));
                seqData[seqDataIndex++] = GetSeqCharAsByte(value);
                // Get last 4 bit value
                value = alignmentBlock[index] & 0x0F;
                //strbuilder.Append(GetSeqChar(value));
                seqData[seqDataIndex++] = GetSeqCharAsByte(value);
            }
            value = (alignmentBlock[index] & 0xF0) >> 4;
            //strbuilder.Append(GetSeqChar(value));
            seqData[seqDataIndex++] = GetSeqCharAsByte(value);
            if (readLen % 2 == 0)
            {
                value = alignmentBlock[index] & 0x0F;
                //strbuilder.Append(GetSeqChar(value));
                seqData[seqDataIndex++] = GetSeqCharAsByte(value);
            }
            startIndex = index + 1;
            // string strSequence = strbuilder.ToString();
            //Insert qual value catch here?  ADDING NEW QUALITY SCORE FINDER!!!
            byte[] qualValues    = new byte[readLen];
            string strQualValues = "*";

            if (alignmentBlock[startIndex] != 0xFF)
            {
                for (int i = startIndex; i < (startIndex + readLen); i++)
                {
                    qualValues[i - startIndex] = (byte)(alignmentBlock[i] + 33);
                }

                strQualValues = System.Text.ASCIIEncoding.ASCII.GetString(qualValues);
            }
            //END NEW EDITION!

            //var syms = Encoding.UTF8.GetBytes(strSequence);
            var alpha = Alphabets.AutoDetectAlphabet(seqData, 0, seqData.Length, null);
            //Sequence toReturn = new Sequence(alpha, syms);
            //TODO: Possibly a bit unsafe here
            var toReturn = new CompactSAMSequence(alpha, FastQFormatType.GATK_Recalibrated, seqData, qualValues, false);

            toReturn.ID       = name;
            toReturn.Pos      = Pos;
            toReturn.CIGAR    = cigar;
            toReturn.RName    = RName;
            toReturn.SAMFlags = (SAMFlags)flagValue;
            return(toReturn);
        }
コード例 #4
0
        /// <summary>
        /// Turn a SAMAlignedSequence into a list of BaseAndQualityAndPosition objects,
        /// useful for adding to a pile-up.
        /// </summary>
        /// <param name="seq"></param>
        /// <returns></returns>
        static List <BaseAndQualityAndPosition> getBasesForSequence(CompactSAMSequence seq)
        {
            List <BaseAndQualityAndPosition> toReturn = new List <BaseAndQualityAndPosition>(seq.RefEndPos - seq.Pos + 10);
            // Decode the cigar string into operations.
            // TODO: This code is duplicated in many places
            string CIGAR = seq.CIGAR;
            List <KeyValuePair <char, int> > charsAndPositions = new List <KeyValuePair <char, int> >();

            for (int i = 0; i < CIGAR.Length; i++)
            {
                char ch = CIGAR[i];
                if (Char.IsDigit(ch))
                {
                    continue;
                }
                charsAndPositions.Add(new KeyValuePair <char, int>(ch, i));
            }

            // Get sequence bases and error probabilities
            var seq_phred_scores = seq.GetPhredQualityScores();
            var seq_bases        = seq.ToArray();
            // Use the cigar operations to emit bases.
            int curRef   = seq.Pos;
            int curQuery = 0;

            for (int i = 0; i < charsAndPositions.Count; i++)
            {
                // Parse the current cigar operation
                char ch        = charsAndPositions[i].Key;
                int  cig_start = i == 0 ? 0 : charsAndPositions[i - 1].Value + 1;
                int  cig_end   = charsAndPositions[i].Value - cig_start;
                int  cig_len   = int.Parse(CIGAR.Substring(cig_start, cig_end));
                // Emit or advance based on cigar operation.
                switch (ch)
                {
                case 'P':     //padding (Silent deltions from padded reference)
                case 'N':     //skipped region from reference
                    throw new Exception("Pile up methods not built to handle reference clipping (Cigar P or N) yet.");

                case 'M':     //match or mismatch
                case '=':     //match
                case 'X':     //mismatch
                    for (int k = 0; k < cig_len; k++)
                    {
                        var bqp = new BaseAndQualityAndPosition(curRef, 0, new BaseAndQuality(seq_bases[curQuery], (byte)seq_phred_scores[curQuery]));
                        toReturn.Add(bqp);
                        curQuery++;
                        curRef++;
                    }
                    break;

                case 'I':    //insertion to the reference
                    for (int k = 0; k < cig_len; k++)
                    {
                        var bqp = new BaseAndQualityAndPosition(curRef, k, new BaseAndQuality(seq_bases[curQuery], (byte)seq_phred_scores[curQuery]));
                        toReturn.Add(bqp);
                        curQuery++;
                    }
                    break;

                case 'D':    //Deletion from the reference
                    for (int k = 0; k < cig_len; k++)
                    {
                        var bqp = new BaseAndQualityAndPosition(curRef, k, new BaseAndQuality((byte)'-', byte.MinValue));
                        toReturn.Add(bqp);
                        curRef++;
                    }
                    break;

                case 'S':     //soft clipped
                    curQuery += cig_len;
                    break;

                case 'H':    //had clipped
                    break;

                default:
                    throw new FormatException("Unexpected SAM Cigar element found " + ch.ToString());
                }
            }
            return(toReturn);
        }