/// <summary>
        /// jh
        /// </summary>
        /// <returns></returns>
        public SAMAlignedSequence Clone()
        {
            SAMAlignedSequence n = new SAMAlignedSequence();

            n.CIGAR         = this.CIGAR;
            n.Bin           = this.Bin;
            n.Flag          = this.Flag;
            n.ISize         = this.ISize;
            n.MapQ          = this.MapQ;
            n.MPos          = this.MPos;
            n.MRNM          = this.MRNM;
            n.Pos           = this.Pos;
            n.QName         = this.QName;
            n.QuerySequence = new Bio.Sequence(this.QuerySequence.Alphabet, this.QuerySequence.ConvertToString());

            n.RName = this.RName;
            foreach (SAMOptionalField Item in this.OptionalFields)
            {
                SAMOptionalField nItem = new SAMOptionalField();
                nItem.Tag   = Item.Tag;
                nItem.Value = Item.Value;
                nItem.VType = Item.VType;
                n.OptionalFields.Add(nItem);
            }


            return(n);
        }
Beispiel #2
0
        // parses sequence.
        private void ParseSequences(SequenceAlignmentMap seqAlignment, BioTextReader bioReader, bool isReadOnly)
        {
            while (bioReader.HasLines && !bioReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
            {
                string[]           tokens     = bioReader.Line.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries);
                SAMAlignedSequence alignedSeq = new SAMAlignedSequence();

                alignedSeq.QName = tokens[0];
                alignedSeq.Flag  = SAMAlignedSequenceHeader.GetFlag(tokens[1]);
                alignedSeq.RName = tokens[2];
                alignedSeq.Pos   = int.Parse(tokens[3], CultureInfo.InvariantCulture);
                alignedSeq.MapQ  = int.Parse(tokens[4], CultureInfo.InvariantCulture);
                alignedSeq.CIGAR = tokens[5];
                alignedSeq.MRNM  = tokens[6].Equals("=") ? alignedSeq.RName : tokens[6];
                alignedSeq.MPos  = int.Parse(tokens[7], CultureInfo.InvariantCulture);
                alignedSeq.ISize = int.Parse(tokens[8], CultureInfo.InvariantCulture);
                string message = alignedSeq.IsValidHeader();

                if (!string.IsNullOrEmpty(message))
                {
                    throw new FormatException(message);
                }

                ISequence refSeq = null;

                if (RefSequences != null && RefSequences.Count > 0)
                {
                    refSeq = RefSequences.FirstOrDefault(R => string.Compare(R.ID, alignedSeq.RName, StringComparison.OrdinalIgnoreCase) == 0);
                }

                ParseQualityNSequence(alignedSeq, Alphabet, Encoding, tokens[9], tokens[10], refSeq, isReadOnly);
                SAMOptionalField optField = null;
                for (int i = 11; i < tokens.Length; i++)
                {
                    optField = new SAMOptionalField();
                    string optionalFieldRegExpn = OptionalFieldLinePattern;
                    if (!Helper.IsValidRegexValue(optionalFieldRegExpn, tokens[i]))
                    {
                        message = string.Format(CultureInfo.CurrentCulture, Resource.InvalidOptionalField, tokens[i]);
                        throw new FormatException(message);
                    }

                    string[] opttokens = tokens[i].Split(colonDelim, StringSplitOptions.RemoveEmptyEntries);
                    optField.Tag   = opttokens[0];
                    optField.VType = opttokens[1];
                    optField.Value = opttokens[2];
                    message        = optField.IsValid();
                    if (!string.IsNullOrEmpty(message))
                    {
                        throw new FormatException(message);
                    }

                    alignedSeq.OptionalFields.Add(optField);
                }

                seqAlignment.QuerySequences.Add(alignedSeq);
                bioReader.GoToNextLine();
            }
        }
Beispiel #3
0
 public void AddOptionalField(SAMOptionalField sof)
 {
     if (UsingReadOnlyOptionFieldCollection)
     {
         changeFromReadOnlyToEditableCollection();
     }
     else if (editableOptionalFieldCollection == null)
     {
         editableOptionalFieldCollection = new Dictionary <string, SAMOptionalField>();
     }
     editableOptionalFieldCollection[sof.Tag] = sof;
 }
Beispiel #4
0
        /// <summary>
        /// Convert this read only collection into a dictionary of option fields
        /// </summary>
        /// <returns></returns>
        public Dictionary <string, SAMOptionalField> ConvertToDictionary()
        {
            Dictionary <string, SAMOptionalField> toReturn = new Dictionary <string, SAMOptionalField>(tagToDataTypeAndLocation.Count);

            foreach (var kv in tagToDataTypeAndLocation)
            {
                char             type  = kv.Value.Key;
                object           value = this[kv.Key].ToString();
                string           tag   = kv.Key;
                SAMOptionalField sof   = new SAMOptionalField(tag, value, type);
                toReturn[tag] = sof;
            }
            return(toReturn);
        }
Beispiel #5
0
        /// <summary>
        /// Parse a single sequencer.
        /// </summary>
        /// <param name="bioText">sequence alignment text.</param>
        /// <param name="alphabet">Alphabet of the sequences.</param>
        /// <param name="referenceSequences">Reference sequences.</param>
        private static SAMAlignedSequence ParseSequence(string bioText, IAlphabet alphabet, IList <ISequence> referenceSequences)
        {
            const int optionalTokenStartingIndex = 11;

            string[] tokens = bioText.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries);

            SAMAlignedSequence alignedSeq = new SAMAlignedSequence();

            alignedSeq.QName = tokens[0];
            alignedSeq.Flag  = SAMAlignedSequenceHeader.GetFlag(tokens[1]);
            alignedSeq.RName = tokens[2];
            alignedSeq.Pos   = int.Parse(tokens[3], CultureInfo.InvariantCulture);
            alignedSeq.MapQ  = int.Parse(tokens[4], CultureInfo.InvariantCulture);
            alignedSeq.CIGAR = tokens[5];
            alignedSeq.MRNM  = tokens[6].Equals("=") ? alignedSeq.RName : tokens[6];
            alignedSeq.MPos  = int.Parse(tokens[7], CultureInfo.InvariantCulture);
            alignedSeq.ISize = int.Parse(tokens[8], CultureInfo.InvariantCulture);

            ISequence refSeq = null;

            if (referenceSequences != null && referenceSequences.Count > 0)
            {
                refSeq = referenceSequences.FirstOrDefault(R => string.Compare(R.ID, alignedSeq.RName, StringComparison.OrdinalIgnoreCase) == 0);
            }

            ParseQualityNSequence(alignedSeq, alphabet, tokens[9], tokens[10], refSeq);
            SAMOptionalField optField = null;
            string           message;

            for (int i = optionalTokenStartingIndex; i < tokens.Length; i++)
            {
                optField = new SAMOptionalField();
                if (!Helper.IsValidRegexValue(OptionalFieldRegex, tokens[i]))
                {
                    message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidOptionalField, tokens[i]);
                    throw new FormatException(message);
                }

                string[] opttokens = tokens[i].Split(colonDelim, StringSplitOptions.RemoveEmptyEntries);
                optField.Tag   = opttokens[0];
                optField.VType = opttokens[1];
                optField.Value = opttokens[2];

                alignedSeq.OptionalFields.Add(optField);
            }

            return(alignedSeq);
        }
Beispiel #6
0
        /// <summary>
        /// Parse a single sequencer.
        /// </summary>
        /// <param name="bioText">sequence alignment text.</param>
        /// <param name="alphabet">Alphabet of the sequences.</param>
        public static SAMAlignedSequence ParseSequence(string bioText, IAlphabet alphabet)
        {
            const int optionalTokenStartingIndex = 11;

            string[] tokens = bioText.Split(TabDelim, StringSplitOptions.RemoveEmptyEntries);

            SAMAlignedSequence alignedSeq = new SAMAlignedSequence
            {
                QName = tokens[0],
                Flag  = SAMAlignedSequenceHeader.GetFlag(tokens[1]),
                RName = tokens[2],
                Pos   = int.Parse(tokens[3]),
                MapQ  = int.Parse(tokens[4]),
                CIGAR = tokens[5]
            };

            alignedSeq.MRNM  = tokens[6].Equals("=") ? alignedSeq.RName : tokens[6];
            alignedSeq.MPos  = int.Parse(tokens[7]);
            alignedSeq.ISize = int.Parse(tokens[8]);

            ParseQualityNSequence(alignedSeq, alphabet, tokens[9], tokens[10]);

            for (int i = optionalTokenStartingIndex; i < tokens.Length; i++)
            {
                SAMOptionalField optField = new SAMOptionalField();
                if (!Helper.IsValidRegexValue(OptionalFieldRegex, tokens[i]))
                {
                    throw new FormatException(string.Format(Properties.Resource.InvalidOptionalField, tokens[i]));
                }

                string[] opttokens = tokens[i].Split(ColonDelim, StringSplitOptions.RemoveEmptyEntries);
                optField.Tag   = opttokens[0];
                optField.VType = opttokens[1];
                optField.Value = opttokens[2];

                alignedSeq.OptionalFields.Add(optField);
            }

            return(alignedSeq);
        }
Beispiel #7
0
        // Gets optional field in a byte array.
        private static byte[] GetOptioanField(SAMOptionalField field)
        {
            int valueSize = GetOptionalFieldValueSize(field);
            if (valueSize == 0)
            {
                string message = string.Format(CultureInfo.InvariantCulture, Properties.Resource.BAM_InvalidIntValueInOptField, field.Value, field.Tag);
                throw new FormatException(message);
            }

            int arrayLen = valueSize < 0 ? -valueSize : valueSize;
            arrayLen += 3;
            byte[] array = new byte[arrayLen];
            array[0] = (byte)field.Tag[0];
            array[1] = (byte)field.Tag[1];
            array[2] = (byte)field.VType[0];
            byte[] temparray = new byte[4];

            switch (field.VType)
            {
                case "A":  //  Printable character
                    array[3] = (byte)field.Value[0];
                    break;
                case "c": //signed 8-bit integer
                case "C": //unsigned 8-bit integer
                case "s": // signed 16 bit integer
                case "S"://unsinged 16 bit integer
                case "i": // signed 32 bit integer
                case "I": // unsigned 32 bit integer
                    if (valueSize == 1)
                    {
                        array[2] = (byte)'C';
                        array[3] = byte.Parse(field.Value, CultureInfo.InvariantCulture);
                    }
                    else if (valueSize == -1)
                    {
                        sbyte sb = sbyte.Parse(field.Value, CultureInfo.InvariantCulture);
                        array[2] = (byte)'c';
                        array[3] = (byte)sb;
                    }
                    else if (valueSize == 2)
                    {
                        UInt16 uint16value = UInt16.Parse(field.Value, CultureInfo.InvariantCulture);
                        temparray = Helper.GetLittleEndianByteArray(uint16value);
                        array[2] = (byte)'S';
                        array[3] = temparray[0];
                        array[4] = temparray[1];
                    }
                    else if (valueSize == -2)
                    {
                        Int16 int16value = Int16.Parse(field.Value, CultureInfo.InvariantCulture);
                        temparray = Helper.GetLittleEndianByteArray(int16value);
                        array[2] = (byte)'s';
                        array[3] = temparray[0];
                        array[4] = temparray[1];
                    }
                    else if (valueSize == 4)
                    {
                        uint uint32value = uint.Parse(field.Value, CultureInfo.InvariantCulture);
                        temparray = Helper.GetLittleEndianByteArray(uint32value);
                        array[2] = (byte)'I';
                        array[3] = temparray[0];
                        array[4] = temparray[1];
                        array[5] = temparray[2];
                        array[6] = temparray[3];
                    }
                    else
                    {
                        int int32value = int.Parse(field.Value, CultureInfo.InvariantCulture);
                        temparray = Helper.GetLittleEndianByteArray(int32value);
                        array[2] = (byte)'i';
                        array[3] = temparray[0];
                        array[4] = temparray[1];
                        array[5] = temparray[2];
                        array[6] = temparray[3];
                    }

                    break;
                case "f": // float
                    float floatvalue = float.Parse(field.Value, CultureInfo.InvariantCulture);
                    temparray = Helper.GetLittleEndianByteArray(floatvalue);
                    array[3] = temparray[0];
                    array[4] = temparray[1];
                    array[5] = temparray[2];
                    array[6] = temparray[3];
                    break;

                case "Z": // printable string 
                    temparray = Encoding.UTF8.GetBytes(field.Value);
                    temparray.CopyTo(array, 3);
                    array[3 + temparray.Length] = (byte)'\0';
                    break;
                case "H": // HexString
                    temparray = Encoding.UTF8.GetBytes(field.Value);
                    temparray.CopyTo(array, 3);
                    array[3 + temparray.Length] = (byte)'\0';
                    break;
                case "B": // integer or numeric array.
                    UpdateArrayType(array, field);
                    break;
                default:
                    throw new Exception(Properties.Resource.BAM_InvalidOptValType);
            }

            return array;
        }
Beispiel #8
0
 // Gets optional field value size.
 private static int GetOptionalFieldValueSize(SAMOptionalField optionalField)
 {
     switch (optionalField.VType)
     {
         case "A":  //  Printable character
         case "c": //signed 8-bit integer
             return -1;
         case "C": //unsigned 8-bit integer
             return 1;
         case "s": // signed 16 bit integer
         case "S"://unsinged 16 bit integer
         case "i": // signed 32 bit integer
         case "I": // unsigned 32 bit integer
             return GetOptionalFieldIntValueSize(optionalField.Value);
         case "f": // float
             return 4;
         case "Z": // printable string 
         case "H": // HexString
             return optionalField.Value.Length + 1;
         case "B"://integer or numeric array
             char type = optionalField.Value[0];
             int arrayTypeSize = GetSizeOfArrayType(type);
             int numberofelements = optionalField.Value.Split(DelimComma, StringSplitOptions.RemoveEmptyEntries).Length - 1;
             int elementsSize = arrayTypeSize * numberofelements;
             int arraylen = elementsSize + 1 + 4;  // 1 to store array type and 4 to store number of values in array.
             return arraylen;
         default:
             throw new Exception(Properties.Resource.BAM_InvalidOptValType);
     }
 }
Beispiel #9
0
        private static void UpdateArrayType(byte[] array, SAMOptionalField field)
        {
            byte[] temparray = new byte[4];
            char arraytype = field.Value[0];
            int arrayTypeSize = GetSizeOfArrayType(arraytype);
            string[] elements = field.Value.Split(DelimComma, StringSplitOptions.RemoveEmptyEntries);
            array[3] = (byte)arraytype;
            int arrayIndex = 4;

            temparray = Helper.GetLittleEndianByteArray(elements.Length - 1);
            array[arrayIndex++] = temparray[0];
            array[arrayIndex++] = temparray[1];
            array[arrayIndex++] = temparray[2];
            array[arrayIndex++] = temparray[3];


            //elemetns[0] contains array type;
            for (int i = 1; i < elements.Length; i++)
            {
                switch (arraytype)
                {
                    case 'A':  //  Printable character
                        temparray[0] = (byte)elements[i][0];
                        break;
                    case 'c': //signed 8-bit integer
                        temparray[0] = (byte)sbyte.Parse(elements[i], CultureInfo.InvariantCulture);
                        break;
                    case 'C': //unsigned 8-bit integer
                        temparray[0] = byte.Parse(elements[i], CultureInfo.InvariantCulture);
                        break;
                    case 's': // signed 16 bit integer
                        Int16 int16value = Int16.Parse(elements[i], CultureInfo.InvariantCulture);
                        temparray = Helper.GetLittleEndianByteArray(int16value);
                        break;
                    case 'S'://unsinged 16 bit integer
                        UInt16 uint16value = UInt16.Parse(elements[i], CultureInfo.InvariantCulture);
                        temparray = Helper.GetLittleEndianByteArray(uint16value);
                        break;
                    case 'i': // signed 32 bit integer
                        int int32value = int.Parse(elements[i], CultureInfo.InvariantCulture);
                        temparray = Helper.GetLittleEndianByteArray(int32value);
                        break;
                    case 'I': // unsigned 32 bit integer
                        uint uint32value = uint.Parse(elements[i], CultureInfo.InvariantCulture);
                        temparray = Helper.GetLittleEndianByteArray(uint32value);
                        break;
                    case 'f': // float
                        float floatvalue = float.Parse(elements[i], CultureInfo.InvariantCulture);
                        temparray = Helper.GetLittleEndianByteArray(floatvalue);
                        break;
                    default:
                        throw new Exception(string.Format(Properties.Resource.BAM_InvalidOptValType, arraytype));

                }

                for (int tempIndex = 0; tempIndex < arrayTypeSize; tempIndex++)
                {
                    array[arrayIndex++] = temparray[tempIndex];
                }
            }
        }
Beispiel #10
0
        /// <summary>
        /// Returns an aligned sequence by parses the BAM file.
        /// </summary>
        private SAMAlignedSequence GetAlignedSequence(int start, int end)
        {
            byte[] array = new byte[4];

            ReadUnCompressedData(array, 0, 4);
            int blockLen = Helper.GetInt32(array, 0);
            byte[] alignmentBlock = new byte[blockLen];
            ReadUnCompressedData(alignmentBlock, 0, blockLen);
            SAMAlignedSequence alignedSeq = new SAMAlignedSequence();
            int value;
            UInt32 UnsignedValue;
            // 0-4 bytes
            int refSeqIndex = Helper.GetInt32(alignmentBlock, 0);

            if (refSeqIndex == -1)
                alignedSeq.RName = "*";
            else
                alignedSeq.RName = refSeqNames[refSeqIndex];

            // 4-8 bytes
            alignedSeq.Pos = Helper.GetInt32(alignmentBlock, 4) + 1;

            // if there is no overlap no need to parse further.
            //     BAMPos > closedEnd
            // => (alignedSeq.Pos - 1) > end -1
            if (alignedSeq.Pos > end)
            {
                return null;
            }

            // 8 - 12 bytes "bin<<16|mapQual<<8|read_name_len"
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 8);

            // 10 -12 bytes
            alignedSeq.Bin = (int)(UnsignedValue & 0xFFFF0000) >> 16;
            // 9th bytes
            alignedSeq.MapQ = (int)(UnsignedValue & 0x0000FF00) >> 8;
            // 8th bytes
            int queryNameLen = (int)(UnsignedValue & 0x000000FF);

            // 12 - 16 bytes
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 12);
            // 14-16 bytes
            int flagValue = (int)(UnsignedValue & 0xFFFF0000) >> 16;
            alignedSeq.Flag = (SAMFlags)flagValue;
            // 12-14 bytes
            int cigarLen = (int)(UnsignedValue & 0x0000FFFF);

            // 16-20 bytes
            int readLen = Helper.GetInt32(alignmentBlock, 16);

            // 20-24 bytes
            int mateRefSeqIndex = Helper.GetInt32(alignmentBlock, 20);
            if (mateRefSeqIndex != -1)
            {
                alignedSeq.MRNM = refSeqNames[mateRefSeqIndex];
            }
            else
            {
                alignedSeq.MRNM = "*";
            }

            // 24-28 bytes
            alignedSeq.MPos = Helper.GetInt32(alignmentBlock, 24) + 1;

            // 28-32 bytes
            alignedSeq.ISize = Helper.GetInt32(alignmentBlock, 28);

            // 32-(32+readLen) bytes
            alignedSeq.QName = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, 32, queryNameLen - 1);
            StringBuilder strbuilder = new StringBuilder();
            int startIndex = 32 + queryNameLen;

            for (int i = startIndex; i < (startIndex + cigarLen * 4); i += 4)
            {
                // Get the CIGAR operation length stored in first 28 bits.
                UInt32 cigarValue = Helper.GetUInt32(alignmentBlock, i);
                strbuilder.Append(((cigarValue & 0xFFFFFFF0) >> 4).ToString(CultureInfo.InvariantCulture));

                // Get the CIGAR operation stored in last 4 bits.
                value = (int)cigarValue & 0x0000000F;

                // MIDNSHP=>0123456
                switch (value)
                {
                    case 0:
                        strbuilder.Append("M");
                        break;
                    case 1:
                        strbuilder.Append("I");
                        break;
                    case 2:
                        strbuilder.Append("D");
                        break;
                    case 3:
                        strbuilder.Append("N");
                        break;
                    case 4:
                        strbuilder.Append("S");
                        break;
                    case 5:
                        strbuilder.Append("H");
                        break;
                    case 6:
                        strbuilder.Append("P");
                        break;
                    case 7:
                        strbuilder.Append("=");
                        break;
                    case 8:
                        strbuilder.Append("X");
                        break;
                    default:
                        throw new FileFormatException(Properties.Resource.BAM_InvalidCIGAR);
                }
            }

            string cigar = strbuilder.ToString();
            if (string.IsNullOrWhiteSpace(cigar))
            {
                alignedSeq.CIGAR = "*";
            }
            else
            {
                alignedSeq.CIGAR = cigar;
            }

            // if there is no overlap no need to parse further.
            // ZeroBasedRefEnd < start
            // => (alignedSeq.RefEndPos -1) < start
            if (alignedSeq.RefEndPos - 1 < start && alignedSeq.RName!=Properties.Resource.SAM_NO_REFERENCE_DEFINED_INDICATOR)
            {
                return null;
            }

            startIndex += cigarLen * 4;
            strbuilder = new StringBuilder();
            int index = startIndex;
            for (; index < (startIndex + (readLen + 1) / 2) - 1; index++)
            {
                // Get first 4 bit value
                value = (alignmentBlock[index] & 0xF0) >> 4;
                strbuilder.Append(GetSeqChar(value));
                // Get last 4 bit value
                value = alignmentBlock[index] & 0x0F;
                strbuilder.Append(GetSeqChar(value));
            }

            value = (alignmentBlock[index] & 0xF0) >> 4;
            strbuilder.Append(GetSeqChar(value));
            if (readLen % 2 == 0)
            {
                value = alignmentBlock[index] & 0x0F;
                strbuilder.Append(GetSeqChar(value));
            }

            startIndex = index + 1;
            string strSequence = strbuilder.ToString();
            byte[] qualValues = new byte[readLen];
            string strQualValues = "*";

            if (alignmentBlock[startIndex] != 0xFF)
            {
                for (int i = startIndex; i < (startIndex + readLen); i++)
                {
                    qualValues[i - startIndex] = (byte)(alignmentBlock[i] + 33);
                }

                strQualValues = System.Text.ASCIIEncoding.ASCII.GetString(qualValues);
            }

            SAMParser.ParseQualityNSequence(alignedSeq, Alphabet, strSequence, strQualValues);

            startIndex += readLen;
            if (alignmentBlock.Length > startIndex + 4 && alignmentBlock[startIndex] != 0x0 && alignmentBlock[startIndex + 1] != 0x0)
            {
                for (index = startIndex; index < alignmentBlock.Length; )
                {
                    SAMOptionalField optionalField = new SAMOptionalField();
                    optionalField.Tag = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, index, 2);
                    index += 2;
                    char vType = (char)alignmentBlock[index++];
                    string valueType = vType.ToString();

                    // SAM format supports [AifZH] for value type.
                    // In BAM, an integer may be stored as a signed 8-bit integer (c), unsigned 8-bit integer (C), signed short (s), unsigned
                    // short (S), signed 32-bit (i) or unsigned 32-bit integer (I), depending on the signed magnitude of the integer. However,
                    // in SAM, all types of integers are presented as type ʻiʼ.
                    string message = Helper.IsValidPatternValue("VType", valueType, BAMOptionalFieldRegex);
                    if (!string.IsNullOrEmpty(message))
                    {
                        throw new FormatException(message);
                    }

                    optionalField.Value = GetOptionalValue(vType, alignmentBlock, ref index).ToString();

                    // Convert to SAM format.
                    if ("cCsSI".IndexOf(vType) >= 0)
                    {
                        valueType = "i";
                    }

                    optionalField.VType = valueType;

                    alignedSeq.OptionalFields.Add(optionalField);
                }
            }

            return alignedSeq;
        }