Пример #1
0
        /// <summary>
        /// General method to Invalidate Quality Sequences
        /// <param name="method">enum type to execute different overload</param>
        /// </summary>
        private static void ValidateQualitySeqLength(ParseOrFormatQualLength method)
        {
            SAMAlignedSequence align = new SAMAlignedSequence();

            try
            {
                switch (method)
                {
                case ParseOrFormatQualLength.AlignedSeq:
                    SAMParser.ParseQualityNSequence(
                        align,
                        Alphabets.DNA,
                        null,
                        String.Empty);
                    break;

                case ParseOrFormatQualLength.Sequencedata:
                    align.QName = "Quality Value";
                    SAMParser.ParseQualityNSequence(
                        align,
                        Alphabets.DNA,
                        null,
                        String.Empty);
                    break;

                case ParseOrFormatQualLength.Qualitydata:
                    align.QName = "Quality Value";
                    SAMParser.ParseQualityNSequence(
                        align,
                        Alphabets.DNA,
                        null,
                        Constants.QualitySequence);
                    break;

                case ParseOrFormatQualLength.QualityLength:
                    align.QName = "Quality Value";
                    SAMParser.ParseQualityNSequence(
                        align,
                        Alphabets.DNA,
                        null,
                        Constants.QualitySequence);
                    break;

                default:
                    break;
                }

                Assert.Fail();
            }
            catch (ArgumentException)
            {
                ApplicationLog.WriteLine(
                    "SAM Parser P2 : Successfully validated the exception");
            }
            catch (FormatException)
            {
                ApplicationLog.WriteLine(
                    "SAM Parser P2 : Successfully validated the exception");
            }
        }
        /// <summary>
        /// Returns an aligned sequence by parses the BAM file.
        /// </summary>
        private SAMAlignedSequence GetAlignedSequence(int start, int end)
        {
            byte[] array = new byte[4];

            ReadUnCompressedData(array, 0, 4);
            int blockLen = Helper.GetInt32(array, 0);

            byte[] alignmentBlock = new byte[blockLen];
            ReadUnCompressedData(alignmentBlock, 0, blockLen);
            SAMAlignedSequence alignedSeq = new SAMAlignedSequence();
            int    value;
            UInt32 UnsignedValue;
            // 0-4 bytes
            int refSeqIndex = Helper.GetInt32(alignmentBlock, 0);

            if (refSeqIndex == -1)
            {
                alignedSeq.RName = "*";
            }
            else
            {
                alignedSeq.RName = refSeqNames[refSeqIndex];
            }


            // 4-8 bytes
            alignedSeq.Pos = Helper.GetInt32(alignmentBlock, 4) + 1;

            // if there is no overlap no need to parse further.
            //     BAMPos > closedEnd
            // => (alignedSeq.Pos - 1) > end -1
            if (alignedSeq.Pos > end)
            {
                return(null);
            }

            // 8 - 12 bytes "bin<<16|mapQual<<8|read_name_len"
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 8);

            // 10 -12 bytes
            alignedSeq.Bin = (int)(UnsignedValue & 0xFFFF0000) >> 16;
            // 9th bytes
            alignedSeq.MapQ = (int)(UnsignedValue & 0x0000FF00) >> 8;
            // 8th bytes
            int queryNameLen = (int)(UnsignedValue & 0x000000FF);

            // 12 - 16 bytes
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 12);
            // 14-16 bytes
            int flagValue = (int)(UnsignedValue & 0xFFFF0000) >> 16;

            alignedSeq.Flag = (SAMFlags)flagValue;
            // 12-14 bytes
            int cigarLen = (int)(UnsignedValue & 0x0000FFFF);

            // 16-20 bytes
            int readLen = Helper.GetInt32(alignmentBlock, 16);

            // 20-24 bytes
            int mateRefSeqIndex = Helper.GetInt32(alignmentBlock, 20);

            if (mateRefSeqIndex != -1)
            {
                alignedSeq.MRNM = refSeqNames[mateRefSeqIndex];
            }
            else
            {
                alignedSeq.MRNM = "*";
            }

            // 24-28 bytes
            alignedSeq.MPos = Helper.GetInt32(alignmentBlock, 24) + 1;

            // 28-32 bytes
            alignedSeq.ISize = Helper.GetInt32(alignmentBlock, 28);

            // 32-(32+readLen) bytes
            alignedSeq.QName = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, 32, queryNameLen - 1);
            StringBuilder strbuilder = new StringBuilder();
            int           startIndex = 32 + queryNameLen;

            for (int i = startIndex; i < (startIndex + cigarLen * 4); i += 4)
            {
                // Get the CIGAR operation length stored in first 28 bits.
                UInt32 cigarValue = Helper.GetUInt32(alignmentBlock, i);
                strbuilder.Append(((cigarValue & 0xFFFFFFF0) >> 4).ToString(CultureInfo.InvariantCulture));

                // Get the CIGAR operation stored in last 4 bits.
                value = (int)cigarValue & 0x0000000F;

                // MIDNSHP=>0123456
                switch (value)
                {
                case 0:
                    strbuilder.Append("M");
                    break;

                case 1:
                    strbuilder.Append("I");
                    break;

                case 2:
                    strbuilder.Append("D");
                    break;

                case 3:
                    strbuilder.Append("N");
                    break;

                case 4:
                    strbuilder.Append("S");
                    break;

                case 5:
                    strbuilder.Append("H");
                    break;

                case 6:
                    strbuilder.Append("P");
                    break;

                case 7:
                    strbuilder.Append("=");
                    break;

                case 8:
                    strbuilder.Append("X");
                    break;

                default:
                    throw new FileFormatException(Properties.Resource.BAM_InvalidCIGAR);
                }
            }

            string cigar = strbuilder.ToString();

            if (string.IsNullOrWhiteSpace(cigar))
            {
                alignedSeq.CIGAR = "*";
            }
            else
            {
                alignedSeq.CIGAR = cigar;
            }

            // if there is no overlap no need to parse further.
            // ZeroBasedRefEnd < start
            // => (alignedSeq.RefEndPos -1) < start
            if (alignedSeq.RefEndPos - 1 < start && alignedSeq.RName != Properties.Resource.SAM_NO_REFERENCE_DEFINED_INDICATOR)
            {
                return(null);
            }

            startIndex += cigarLen * 4;
            strbuilder  = new StringBuilder();
            int index = startIndex;

            for (; index < (startIndex + (readLen + 1) / 2) - 1; index++)
            {
                // Get first 4 bit value
                value = (alignmentBlock[index] & 0xF0) >> 4;
                strbuilder.Append(GetSeqChar(value));
                // Get last 4 bit value
                value = alignmentBlock[index] & 0x0F;
                strbuilder.Append(GetSeqChar(value));
            }

            value = (alignmentBlock[index] & 0xF0) >> 4;
            strbuilder.Append(GetSeqChar(value));
            if (readLen % 2 == 0)
            {
                value = alignmentBlock[index] & 0x0F;
                strbuilder.Append(GetSeqChar(value));
            }

            startIndex = index + 1;
            string strSequence = strbuilder.ToString();

            byte[] qualValues    = new byte[readLen];
            string strQualValues = "*";

            if (alignmentBlock[startIndex] != 0xFF)
            {
                for (int i = startIndex; i < (startIndex + readLen); i++)
                {
                    qualValues[i - startIndex] = (byte)(alignmentBlock[i] + 33);
                }

                strQualValues = System.Text.ASCIIEncoding.ASCII.GetString(qualValues);
            }

            SAMParser.ParseQualityNSequence(alignedSeq, Alphabet, strSequence, strQualValues);

            startIndex += readLen;

            if (alignmentBlock.Length > startIndex + 4 && alignmentBlock[startIndex] != 0x0 && alignmentBlock[startIndex + 1] != 0x0)
            {
                for (index = startIndex; index < alignmentBlock.Length;)
                {
                    SAMOptionalField optionalField = new SAMOptionalField();
                    optionalField.Tag = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, index, 2);
                    index            += 2;
                    char   vType     = (char)alignmentBlock[index++];
                    string valueType = vType.ToString();

                    // SAM format supports [AifZH] for value type.
                    // In BAM, an integer may be stored as a signed 8-bit integer (c), unsigned 8-bit integer (C), signed short (s), unsigned
                    // short (S), signed 32-bit (i) or unsigned 32-bit integer (I), depending on the signed magnitude of the integer. However,
                    // in SAM, all types of integers are presented as type ʻiʼ.
                    string message = Helper.IsValidPatternValue("VType", valueType, BAMOptionalFieldRegex);
                    if (!string.IsNullOrEmpty(message))
                    {
                        throw new FormatException(message);
                    }


                    optionalField.Value = GetOptionalValue(vType, alignmentBlock, ref index).ToString();

                    // Convert to SAM format.
                    if ("cCsSI".IndexOf(vType) >= 0)
                    {
                        valueType = "i";
                    }

                    optionalField.VType = valueType;

                    alignedSeq.OptionalFields.Add(optionalField);
                }
            }

            return(alignedSeq);
        }
        protected override SAMAlignedSequence GetAlignedSequence()
        {
            byte[] array = new byte[4];

            ReadUnCompressedData(array, 0, 4);
            int blockLen = Helper.GetInt32(array, 0);

            byte[] alignmentBlock = new byte[blockLen];
            ReadUnCompressedData(alignmentBlock, 0, blockLen);

            if (!Filter.Accept(alignmentBlock))
            {
                return(null);
            }

            SAMAlignedSequence alignedSeq = new SAMAlignedSequence();
            int    value;
            UInt32 UnsignedValue;
            // 0-4 bytes
            int refSeqIndex = Helper.GetInt32(alignmentBlock, 0);

            if (refSeqIndex == -1)
            {
                alignedSeq.SetPreValidatedRName("*");
            }
            else
            {
                alignedSeq.SetPreValidatedRName(RefSeqNames[refSeqIndex]);
            }

            // 4-8 bytes
            alignedSeq.Pos = Helper.GetInt32(alignmentBlock, 4) + 1;

            // 8 - 12 bytes "bin<<16|mapQual<<8|read_name_len"
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 8);

            // 10 -12 bytes
            //alignedSeq.Bin = (int)(UnsignedValue & 0xFFFF0000) >> 16;
            // 9th bytes
            alignedSeq.MapQ = (int)(UnsignedValue & 0x0000FF00) >> 8;

            // 8th bytes
            int queryNameLen = (int)(UnsignedValue & 0x000000FF);

            // 12 - 16 bytes
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 12);
            // 14-16 bytes
            int flagValue = (int)(UnsignedValue & 0xFFFF0000) >> 16;

            alignedSeq.Flag = (SAMFlags)flagValue;

            // 12-14 bytes
            int cigarLen = (int)(UnsignedValue & 0x0000FFFF);

            // 16-20 bytes
            int readLen = Helper.GetInt32(alignmentBlock, 16);

            // 20-24 bytes
            int mateRefSeqIndex = Helper.GetInt32(alignmentBlock, 20);

            if (mateRefSeqIndex != -1)
            {
                alignedSeq.SetPreValidatedMRNM(RefSeqNames[mateRefSeqIndex]);
            }
            else
            {
                alignedSeq.SetPreValidatedMRNM("*");
            }

            // 24-28 bytes
            alignedSeq.MPos = Helper.GetInt32(alignmentBlock, 24) + 1;

            // 28-32 bytes
            alignedSeq.ISize = Helper.GetInt32(alignmentBlock, 28);

            // 32-(32+readLen) bytes
            alignedSeq.QName = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, 32, queryNameLen - 1);
            StringBuilder strbuilder = new StringBuilder();
            int           startIndex = 32 + queryNameLen;

            for (int i = startIndex; i < (startIndex + cigarLen * 4); i += 4)
            {
                // Get the CIGAR operation length stored in first 28 bits.
                UInt32 cigarValue = Helper.GetUInt32(alignmentBlock, i);
                strbuilder.Append(((cigarValue & 0xFFFFFFF0) >> 4).ToString(CultureInfo.InvariantCulture));

                // Get the CIGAR operation stored in last 4 bits.
                value = (int)cigarValue & 0x0000000F;

                // MIDNSHP=>0123456
                switch (value)
                {
                case 0:
                    strbuilder.Append("M");
                    break;

                case 1:
                    strbuilder.Append("I");
                    break;

                case 2:
                    strbuilder.Append("D");
                    break;

                case 3:
                    strbuilder.Append("N");
                    break;

                case 4:
                    strbuilder.Append("S");
                    break;

                case 5:
                    strbuilder.Append("H");
                    break;

                case 6:
                    strbuilder.Append("P");
                    break;

                case 7:
                    strbuilder.Append("=");
                    break;

                case 8:
                    strbuilder.Append("X");
                    break;

                default:
                    throw new Exception("Invalid CIGAR of query " + alignedSeq.QName);
                }
            }

            string cigar = strbuilder.ToString();

            if (string.IsNullOrWhiteSpace(cigar))
            {
                alignedSeq.SetPreValidatedCIGAR("*");
            }
            else
            {
                alignedSeq.SetPreValidatedCIGAR(cigar);
            }

            startIndex += cigarLen * 4;
            var sequence      = new byte[readLen];
            int sequenceIndex = 0;
            int index         = startIndex;

            for (; index < (startIndex + (readLen + 1) / 2) - 1; index++)
            {
                // Get first 4 bit value
                value = (alignmentBlock[index] & 0xF0) >> 4;
                sequence[sequenceIndex++] = GetSeqCharAsByte(value);
                // Get last 4 bit value
                value = alignmentBlock[index] & 0x0F;
                sequence[sequenceIndex++] = GetSeqCharAsByte(value);
            }

            value = (alignmentBlock[index] & 0xF0) >> 4;
            sequence[sequenceIndex++] = GetSeqCharAsByte(value);

            if (readLen % 2 == 0)
            {
                value = alignmentBlock[index] & 0x0F;
                sequence[sequenceIndex++] = GetSeqCharAsByte(value);
            }

            startIndex = index + 1;
            byte[] qualValues = new byte[readLen];

            if (alignmentBlock[startIndex] != 0xFF)
            {
                for (int i = startIndex; i < (startIndex + readLen); i++)
                {
                    qualValues[i - startIndex] = (byte)(alignmentBlock[i] + 33);
                }
                //validate quality scores here
                byte badVal;
                bool ok = QualitativeSequence.ValidateQualScores(qualValues, SAMParser.QualityFormatType, out badVal);
                if (!ok)
                {
                    string message = string.Format("Invalid encoded quality score found: {0}", (char)badVal);
                    throw new ArgumentOutOfRangeException("encodedQualityScores", message);
                }
            }
            else
            {
                qualValues = new byte[] { SAMParser.AsteriskAsByte };
            }
            //Values have already been validated when first parsed at this point so no need to again
            SAMParser.ParseQualityNSequence(alignedSeq, Alphabet, sequence, qualValues, false);

            startIndex += readLen;
            if (alignmentBlock.Length > startIndex + 4 && alignmentBlock[startIndex] != 0x0 && alignmentBlock[startIndex + 1] != 0x0)
            {
                for (index = startIndex; index < alignmentBlock.Length;)
                {
                    SAMOptionalField optionalField = new SAMOptionalField();
                    optionalField.Tag = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, index, 2);
                    index            += 2;
                    char vType = (char)alignmentBlock[index++];

                    // SAM format supports [AifZH] for value type.
                    // In BAM, an integer may be stored as a signed 8-bit integer (c), unsigned 8-bit integer (C), signed short (s), unsigned
                    // short (S), signed 32-bit (i) or unsigned 32-bit integer (I), depending on the signed magnitude of the integer. However,
                    // in SAM, all types of integers are presented as type ʻiʼ.

                    //NOTE: Code previously here checked for valid value and threw an exception here, but this exception/validation is checked for in this method below, as while as when the value is set.

                    optionalField.Value = GetOptionalValue(vType, alignmentBlock, ref index).ToString();

                    // Convert to SAM format, where all integers are represented the same way
                    if ("cCsSI".IndexOf(vType) >= 0)
                    {
                        vType = 'i';
                    }
                    optionalField.VType = vType.ToString();

                    alignedSeq.OptionalFields.Add(optionalField);
                }
            }

            return(alignedSeq);
        }