示例#1
0
        // Gets optional field value size.
        private static int GetOptionalFieldValueSize(SAMOptionalField optionalField)
        {
            switch (optionalField.VType)
            {
            case "A":     //  Printable character
            case "c":     //signed 8-bit integer
                return(-1);

            case "C":     //unsigned 8-bit integer
                return(1);

            case "s":    // signed 16 bit integer
            case "S":    //unsinged 16 bit integer
            case "i":    // signed 32 bit integer
            case "I":    // unsigned 32 bit integer
                return(GetOptionalFieldIntValueSize(optionalField.Value));

            case "f":     // float
                return(4);

            case "Z":     // printable string
            case "H":     // HexString
                return(optionalField.Value.Length + 1);

            default:
                throw new FileFormatException(Resource.BAM_InvalidOptValType);
            }
        }
        private static SAMOptionalField CreateSamField(KeyValuePair <string, IList <string> > tag)
        {
            var _res = new SAMOptionalField {
                Tag = tag.Key
            };
            string _value;

            if (tag.Value.Count != 1)
            {
                throw new Exception("Values count doesn't equal 1, unexpected:" + tag.Value.Aggregate("", (s, s1) => s + "," + s1));
            }
            _res.Value = tag.Value[0];
            if (tagTypesLookup.TryGetValue(tag.Key, out _value))
            {
                _res.VType = _value;
            }
            else
            {
                _res.VType = "Z";
            }
            //else if (tag.Key.StartsWith("X") || tag.Key.StartsWith("Y") || tag.Key.StartsWith("Z"))
            //    _res.VType = tag.Key.Substring(1, 1);
            //else
            //    throw new Exception("Unable to locate type for tag:"+tag.Key+",value:"+tag.Value.Aggregate("", (s, s1) => s+","+s1));

            return(_res);
        }
示例#3
0
        // Gets optional field value size.
        private static int GetOptionalFieldValueSize(SAMOptionalField optionalField)
        {
            switch (optionalField.VType)
            {
            case "A":     //  Printable character
            case "c":     //signed 8-bit integer
                return(-1);

            case "C":     //unsigned 8-bit integer
                return(1);

            case "s":    // signed 16 bit integer
            case "S":    //unsinged 16 bit integer
            case "i":    // signed 32 bit integer
            case "I":    // unsigned 32 bit integer
                return(GetOptionalFieldIntValueSize(optionalField.Value));

            case "f":     // float
                return(4);

            case "Z":     // printable string
            case "H":     // HexString
                return(optionalField.Value.Length + 1);

            case "B":    //integer or numeric array
                char type             = optionalField.Value[0];
                int  arrayTypeSize    = GetSizeOfArrayType(type);
                int  numberofelements = optionalField.Value.Split(DelimComma, StringSplitOptions.RemoveEmptyEntries).Length - 1;
                int  elementsSize     = arrayTypeSize * numberofelements;
                int  arraylen         = elementsSize + 1 + 4; // 1 to store array type and 4 to store number of values in array.
                return(arraylen);

            default:
                throw new Exception(Properties.Resource.BAM_InvalidOptValType);
            }
        }
示例#4
0
        // Gets optional field in a byte array.
        private static byte[] GetOptioanField(SAMOptionalField field)
        {
            int valueSize = GetOptionalFieldValueSize(field);

            if (valueSize == 0)
            {
                string message = string.Format(CultureInfo.InvariantCulture, Resource.BAM_InvalidIntValueInOptField, field.Value, field.Tag);
                throw new FormatException(message);
            }

            int arrayLen = valueSize < 0 ? -valueSize : valueSize;

            arrayLen += 3;
            byte[] array = new byte[arrayLen];
            array[0] = (byte)field.Tag[0];
            array[1] = (byte)field.Tag[1];
            array[2] = (byte)field.VType[0];
            byte[] temparray = new byte[4];

            switch (field.VType)
            {
            case "A":      //  Printable character
                array[3] = (byte)field.Value[0];
                break;

            case "c":    //signed 8-bit integer
            case "C":    //unsigned 8-bit integer
            case "s":    // signed 16 bit integer
            case "S":    //unsinged 16 bit integer
            case "i":    // signed 32 bit integer
            case "I":    // unsigned 32 bit integer
                if (valueSize == 1)
                {
                    array[2] = (byte)'C';
                    array[3] = byte.Parse(field.Value, CultureInfo.InvariantCulture);
                }
                else if (valueSize == -1)
                {
                    sbyte sb = sbyte.Parse(field.Value, CultureInfo.InvariantCulture);
                    array[2] = (byte)'c';
                    array[3] = (byte)sb;
                }
                else if (valueSize == 2)
                {
                    UInt16 uint16value = UInt16.Parse(field.Value, CultureInfo.InvariantCulture);
                    temparray = Helper.GetLittleEndianByteArray(uint16value);
                    array[2]  = (byte)'S';
                    array[3]  = temparray[1];
                    array[4]  = temparray[0];
                }
                else if (valueSize == -2)
                {
                    Int16 int16value = Int16.Parse(field.Value, CultureInfo.InvariantCulture);
                    temparray = Helper.GetLittleEndianByteArray(int16value);
                    array[2]  = (byte)'s';
                    array[3]  = temparray[1];
                    array[4]  = temparray[0];
                }
                else if (valueSize == 4)
                {
                    uint uint32value = uint.Parse(field.Value, CultureInfo.InvariantCulture);
                    temparray = Helper.GetLittleEndianByteArray(uint32value);
                    array[2]  = (byte)'I';
                    array[3]  = temparray[3];
                    array[4]  = temparray[2];
                    array[5]  = temparray[1];
                    array[6]  = temparray[0];
                }
                else
                {
                    int int32value = int.Parse(field.Value, CultureInfo.InvariantCulture);
                    temparray = Helper.GetLittleEndianByteArray(int32value);
                    array[2]  = (byte)'i';
                    array[3]  = temparray[3];
                    array[4]  = temparray[2];
                    array[5]  = temparray[1];
                    array[6]  = temparray[0];
                }

                break;

            case "f":     // float
                float floatvalue = float.Parse(field.Value, CultureInfo.InvariantCulture);
                temparray = Helper.GetLittleEndianByteArray(floatvalue);
                array[3]  = temparray[3];
                array[4]  = temparray[2];
                array[5]  = temparray[1];
                array[6]  = temparray[0];
                break;

            case "Z":     // printable string
                temparray = System.Text.ASCIIEncoding.ASCII.GetBytes(field.Value);
                temparray.CopyTo(array, 3);
                array[3 + temparray.Length] = (byte)'\0';
                break;

            case "H":     // HexString
                temparray = System.Text.ASCIIEncoding.ASCII.GetBytes(field.Value);
                temparray.CopyTo(array, 3);
                array[3 + temparray.Length] = (byte)'\0';
                break;

            default:
                throw new FileFormatException(Resource.BAM_InvalidOptValType);
            }

            return(array);
        }
        /// <summary>
        /// Returns an aligned sequence by parses the BAM file.
        /// </summary>
        private SAMAlignedSequence GetAlignedSequence(int start, int end)
        {
            byte[] array = new byte[4];

            ReadUnCompressedData(array, 0, 4);
            int blockLen = Helper.GetInt32(array, 0);

            byte[] alignmentBlock = new byte[blockLen];
            ReadUnCompressedData(alignmentBlock, 0, blockLen);
            SAMAlignedSequence alignedSeq = new SAMAlignedSequence();
            int    value;
            UInt32 UnsignedValue;
            // 0-4 bytes
            int refSeqIndex = Helper.GetInt32(alignmentBlock, 0);

            if (refSeqIndex == -1)
            {
                alignedSeq.RName = "*";
            }
            else
            {
                alignedSeq.RName = refSeqNames[refSeqIndex];
            }


            // 4-8 bytes
            alignedSeq.Pos = Helper.GetInt32(alignmentBlock, 4) + 1;

            // if there is no overlap no need to parse further.
            //     BAMPos > closedEnd
            // => (alignedSeq.Pos - 1) > end -1
            if (alignedSeq.Pos > end)
            {
                return(null);
            }

            // 8 - 12 bytes "bin<<16|mapQual<<8|read_name_len"
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 8);

            // 10 -12 bytes
            alignedSeq.Bin = (int)(UnsignedValue & 0xFFFF0000) >> 16;
            // 9th bytes
            alignedSeq.MapQ = (int)(UnsignedValue & 0x0000FF00) >> 8;
            // 8th bytes
            int queryNameLen = (int)(UnsignedValue & 0x000000FF);

            // 12 - 16 bytes
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 12);
            // 14-16 bytes
            int flagValue = (int)(UnsignedValue & 0xFFFF0000) >> 16;

            alignedSeq.Flag = (SAMFlags)flagValue;
            // 12-14 bytes
            int cigarLen = (int)(UnsignedValue & 0x0000FFFF);

            // 16-20 bytes
            int readLen = Helper.GetInt32(alignmentBlock, 16);

            // 20-24 bytes
            int mateRefSeqIndex = Helper.GetInt32(alignmentBlock, 20);

            if (mateRefSeqIndex != -1)
            {
                alignedSeq.MRNM = refSeqNames[mateRefSeqIndex];
            }
            else
            {
                alignedSeq.MRNM = "*";
            }

            // 24-28 bytes
            alignedSeq.MPos = Helper.GetInt32(alignmentBlock, 24) + 1;

            // 28-32 bytes
            alignedSeq.ISize = Helper.GetInt32(alignmentBlock, 28);

            // 32-(32+readLen) bytes
            alignedSeq.QName = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, 32, queryNameLen - 1);
            StringBuilder strbuilder = new StringBuilder();
            int           startIndex = 32 + queryNameLen;

            for (int i = startIndex; i < (startIndex + cigarLen * 4); i += 4)
            {
                // Get the CIGAR operation length stored in first 28 bits.
                UInt32 cigarValue = Helper.GetUInt32(alignmentBlock, i);
                strbuilder.Append(((cigarValue & 0xFFFFFFF0) >> 4).ToString(CultureInfo.InvariantCulture));

                // Get the CIGAR operation stored in last 4 bits.
                value = (int)cigarValue & 0x0000000F;

                // MIDNSHP=>0123456
                switch (value)
                {
                case 0:
                    strbuilder.Append("M");
                    break;

                case 1:
                    strbuilder.Append("I");
                    break;

                case 2:
                    strbuilder.Append("D");
                    break;

                case 3:
                    strbuilder.Append("N");
                    break;

                case 4:
                    strbuilder.Append("S");
                    break;

                case 5:
                    strbuilder.Append("H");
                    break;

                case 6:
                    strbuilder.Append("P");
                    break;

                case 7:
                    strbuilder.Append("=");
                    break;

                case 8:
                    strbuilder.Append("X");
                    break;

                default:
                    throw new FileFormatException(Properties.Resource.BAM_InvalidCIGAR);
                }
            }

            string cigar = strbuilder.ToString();

            if (string.IsNullOrWhiteSpace(cigar))
            {
                alignedSeq.CIGAR = "*";
            }
            else
            {
                alignedSeq.CIGAR = cigar;
            }

            // if there is no overlap no need to parse further.
            // ZeroBasedRefEnd < start
            // => (alignedSeq.RefEndPos -1) < start
            if (alignedSeq.RefEndPos - 1 < start && alignedSeq.RName != Properties.Resource.SAM_NO_REFERENCE_DEFINED_INDICATOR)
            {
                return(null);
            }

            startIndex += cigarLen * 4;
            strbuilder  = new StringBuilder();
            int index = startIndex;

            for (; index < (startIndex + (readLen + 1) / 2) - 1; index++)
            {
                // Get first 4 bit value
                value = (alignmentBlock[index] & 0xF0) >> 4;
                strbuilder.Append(GetSeqChar(value));
                // Get last 4 bit value
                value = alignmentBlock[index] & 0x0F;
                strbuilder.Append(GetSeqChar(value));
            }

            value = (alignmentBlock[index] & 0xF0) >> 4;
            strbuilder.Append(GetSeqChar(value));
            if (readLen % 2 == 0)
            {
                value = alignmentBlock[index] & 0x0F;
                strbuilder.Append(GetSeqChar(value));
            }

            startIndex = index + 1;
            string strSequence = strbuilder.ToString();

            byte[] qualValues    = new byte[readLen];
            string strQualValues = "*";

            if (alignmentBlock[startIndex] != 0xFF)
            {
                for (int i = startIndex; i < (startIndex + readLen); i++)
                {
                    qualValues[i - startIndex] = (byte)(alignmentBlock[i] + 33);
                }

                strQualValues = System.Text.ASCIIEncoding.ASCII.GetString(qualValues);
            }

            SAMParser.ParseQualityNSequence(alignedSeq, Alphabet, strSequence, strQualValues);

            startIndex += readLen;

            if (alignmentBlock.Length > startIndex + 4 && alignmentBlock[startIndex] != 0x0 && alignmentBlock[startIndex + 1] != 0x0)
            {
                for (index = startIndex; index < alignmentBlock.Length;)
                {
                    SAMOptionalField optionalField = new SAMOptionalField();
                    optionalField.Tag = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, index, 2);
                    index            += 2;
                    char   vType     = (char)alignmentBlock[index++];
                    string valueType = vType.ToString();

                    // SAM format supports [AifZH] for value type.
                    // In BAM, an integer may be stored as a signed 8-bit integer (c), unsigned 8-bit integer (C), signed short (s), unsigned
                    // short (S), signed 32-bit (i) or unsigned 32-bit integer (I), depending on the signed magnitude of the integer. However,
                    // in SAM, all types of integers are presented as type ʻiʼ.
                    string message = Helper.IsValidPatternValue("VType", valueType, BAMOptionalFieldRegex);
                    if (!string.IsNullOrEmpty(message))
                    {
                        throw new FormatException(message);
                    }


                    optionalField.Value = GetOptionalValue(vType, alignmentBlock, ref index).ToString();

                    // Convert to SAM format.
                    if ("cCsSI".IndexOf(vType) >= 0)
                    {
                        valueType = "i";
                    }

                    optionalField.VType = valueType;

                    alignedSeq.OptionalFields.Add(optionalField);
                }
            }

            return(alignedSeq);
        }
        protected override SAMAlignedSequence GetAlignedSequence()
        {
            byte[] array = new byte[4];

            ReadUnCompressedData(array, 0, 4);
            int blockLen = Helper.GetInt32(array, 0);

            byte[] alignmentBlock = new byte[blockLen];
            ReadUnCompressedData(alignmentBlock, 0, blockLen);

            if (!Filter.Accept(alignmentBlock))
            {
                return(null);
            }

            SAMAlignedSequence alignedSeq = new SAMAlignedSequence();
            int    value;
            UInt32 UnsignedValue;
            // 0-4 bytes
            int refSeqIndex = Helper.GetInt32(alignmentBlock, 0);

            if (refSeqIndex == -1)
            {
                alignedSeq.SetPreValidatedRName("*");
            }
            else
            {
                alignedSeq.SetPreValidatedRName(RefSeqNames[refSeqIndex]);
            }

            // 4-8 bytes
            alignedSeq.Pos = Helper.GetInt32(alignmentBlock, 4) + 1;

            // 8 - 12 bytes "bin<<16|mapQual<<8|read_name_len"
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 8);

            // 10 -12 bytes
            //alignedSeq.Bin = (int)(UnsignedValue & 0xFFFF0000) >> 16;
            // 9th bytes
            alignedSeq.MapQ = (int)(UnsignedValue & 0x0000FF00) >> 8;

            // 8th bytes
            int queryNameLen = (int)(UnsignedValue & 0x000000FF);

            // 12 - 16 bytes
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 12);
            // 14-16 bytes
            int flagValue = (int)(UnsignedValue & 0xFFFF0000) >> 16;

            alignedSeq.Flag = (SAMFlags)flagValue;

            // 12-14 bytes
            int cigarLen = (int)(UnsignedValue & 0x0000FFFF);

            // 16-20 bytes
            int readLen = Helper.GetInt32(alignmentBlock, 16);

            // 20-24 bytes
            int mateRefSeqIndex = Helper.GetInt32(alignmentBlock, 20);

            if (mateRefSeqIndex != -1)
            {
                alignedSeq.SetPreValidatedMRNM(RefSeqNames[mateRefSeqIndex]);
            }
            else
            {
                alignedSeq.SetPreValidatedMRNM("*");
            }

            // 24-28 bytes
            alignedSeq.MPos = Helper.GetInt32(alignmentBlock, 24) + 1;

            // 28-32 bytes
            alignedSeq.ISize = Helper.GetInt32(alignmentBlock, 28);

            // 32-(32+readLen) bytes
            alignedSeq.QName = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, 32, queryNameLen - 1);
            StringBuilder strbuilder = new StringBuilder();
            int           startIndex = 32 + queryNameLen;

            for (int i = startIndex; i < (startIndex + cigarLen * 4); i += 4)
            {
                // Get the CIGAR operation length stored in first 28 bits.
                UInt32 cigarValue = Helper.GetUInt32(alignmentBlock, i);
                strbuilder.Append(((cigarValue & 0xFFFFFFF0) >> 4).ToString(CultureInfo.InvariantCulture));

                // Get the CIGAR operation stored in last 4 bits.
                value = (int)cigarValue & 0x0000000F;

                // MIDNSHP=>0123456
                switch (value)
                {
                case 0:
                    strbuilder.Append("M");
                    break;

                case 1:
                    strbuilder.Append("I");
                    break;

                case 2:
                    strbuilder.Append("D");
                    break;

                case 3:
                    strbuilder.Append("N");
                    break;

                case 4:
                    strbuilder.Append("S");
                    break;

                case 5:
                    strbuilder.Append("H");
                    break;

                case 6:
                    strbuilder.Append("P");
                    break;

                case 7:
                    strbuilder.Append("=");
                    break;

                case 8:
                    strbuilder.Append("X");
                    break;

                default:
                    throw new Exception("Invalid CIGAR of query " + alignedSeq.QName);
                }
            }

            string cigar = strbuilder.ToString();

            if (string.IsNullOrWhiteSpace(cigar))
            {
                alignedSeq.SetPreValidatedCIGAR("*");
            }
            else
            {
                alignedSeq.SetPreValidatedCIGAR(cigar);
            }

            startIndex += cigarLen * 4;
            var sequence      = new byte[readLen];
            int sequenceIndex = 0;
            int index         = startIndex;

            for (; index < (startIndex + (readLen + 1) / 2) - 1; index++)
            {
                // Get first 4 bit value
                value = (alignmentBlock[index] & 0xF0) >> 4;
                sequence[sequenceIndex++] = GetSeqCharAsByte(value);
                // Get last 4 bit value
                value = alignmentBlock[index] & 0x0F;
                sequence[sequenceIndex++] = GetSeqCharAsByte(value);
            }

            value = (alignmentBlock[index] & 0xF0) >> 4;
            sequence[sequenceIndex++] = GetSeqCharAsByte(value);

            if (readLen % 2 == 0)
            {
                value = alignmentBlock[index] & 0x0F;
                sequence[sequenceIndex++] = GetSeqCharAsByte(value);
            }

            startIndex = index + 1;
            byte[] qualValues = new byte[readLen];

            if (alignmentBlock[startIndex] != 0xFF)
            {
                for (int i = startIndex; i < (startIndex + readLen); i++)
                {
                    qualValues[i - startIndex] = (byte)(alignmentBlock[i] + 33);
                }
                //validate quality scores here
                byte badVal;
                bool ok = QualitativeSequence.ValidateQualScores(qualValues, SAMParser.QualityFormatType, out badVal);
                if (!ok)
                {
                    string message = string.Format("Invalid encoded quality score found: {0}", (char)badVal);
                    throw new ArgumentOutOfRangeException("encodedQualityScores", message);
                }
            }
            else
            {
                qualValues = new byte[] { SAMParser.AsteriskAsByte };
            }
            //Values have already been validated when first parsed at this point so no need to again
            SAMParser.ParseQualityNSequence(alignedSeq, Alphabet, sequence, qualValues, false);

            startIndex += readLen;
            if (alignmentBlock.Length > startIndex + 4 && alignmentBlock[startIndex] != 0x0 && alignmentBlock[startIndex + 1] != 0x0)
            {
                for (index = startIndex; index < alignmentBlock.Length;)
                {
                    SAMOptionalField optionalField = new SAMOptionalField();
                    optionalField.Tag = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, index, 2);
                    index            += 2;
                    char vType = (char)alignmentBlock[index++];

                    // SAM format supports [AifZH] for value type.
                    // In BAM, an integer may be stored as a signed 8-bit integer (c), unsigned 8-bit integer (C), signed short (s), unsigned
                    // short (S), signed 32-bit (i) or unsigned 32-bit integer (I), depending on the signed magnitude of the integer. However,
                    // in SAM, all types of integers are presented as type ʻiʼ.

                    //NOTE: Code previously here checked for valid value and threw an exception here, but this exception/validation is checked for in this method below, as while as when the value is set.

                    optionalField.Value = GetOptionalValue(vType, alignmentBlock, ref index).ToString();

                    // Convert to SAM format, where all integers are represented the same way
                    if ("cCsSI".IndexOf(vType) >= 0)
                    {
                        vType = 'i';
                    }
                    optionalField.VType = vType.ToString();

                    alignedSeq.OptionalFields.Add(optionalField);
                }
            }

            return(alignedSeq);
        }
示例#7
0
        private static void UpdateArrayType(byte[] array, SAMOptionalField field)
        {
            byte[] temparray     = new byte[4];
            char   arraytype     = field.Value[0];
            int    arrayTypeSize = GetSizeOfArrayType(arraytype);

            string[] elements = field.Value.Split(DelimComma, StringSplitOptions.RemoveEmptyEntries);
            array[3] = (byte)arraytype;
            int arrayIndex = 4;

            temparray           = Helper.GetLittleEndianByteArray(elements.Length - 1);
            array[arrayIndex++] = temparray[0];
            array[arrayIndex++] = temparray[1];
            array[arrayIndex++] = temparray[2];
            array[arrayIndex++] = temparray[3];


            //elemetns[0] contains array type;
            for (int i = 1; i < elements.Length; i++)
            {
                switch (arraytype)
                {
                case 'A':      //  Printable character
                    temparray[0] = (byte)elements[i][0];
                    break;

                case 'c':     //signed 8-bit integer
                    temparray[0] = (byte)sbyte.Parse(elements[i], CultureInfo.InvariantCulture);
                    break;

                case 'C':     //unsigned 8-bit integer
                    temparray[0] = byte.Parse(elements[i], CultureInfo.InvariantCulture);
                    break;

                case 's':     // signed 16 bit integer
                    Int16 int16value = Int16.Parse(elements[i], CultureInfo.InvariantCulture);
                    temparray = Helper.GetLittleEndianByteArray(int16value);
                    break;

                case 'S':    //unsinged 16 bit integer
                    UInt16 uint16value = UInt16.Parse(elements[i], CultureInfo.InvariantCulture);
                    temparray = Helper.GetLittleEndianByteArray(uint16value);
                    break;

                case 'i':     // signed 32 bit integer
                    int int32value = int.Parse(elements[i], CultureInfo.InvariantCulture);
                    temparray = Helper.GetLittleEndianByteArray(int32value);
                    break;

                case 'I':     // unsigned 32 bit integer
                    uint uint32value = uint.Parse(elements[i], CultureInfo.InvariantCulture);
                    temparray = Helper.GetLittleEndianByteArray(uint32value);
                    break;

                case 'f':     // float
                    float floatvalue = float.Parse(elements[i], CultureInfo.InvariantCulture);
                    temparray = Helper.GetLittleEndianByteArray(floatvalue);
                    break;

                default:
                    throw new Exception(string.Format(Properties.Resource.BAM_InvalidOptValType, arraytype));
                }

                for (int tempIndex = 0; tempIndex < arrayTypeSize; tempIndex++)
                {
                    array[arrayIndex++] = temparray[tempIndex];
                }
            }
        }