// Gets optional field value size. private static int GetOptionalFieldValueSize(SAMOptionalField optionalField) { switch (optionalField.VType) { case "A": // Printable character case "c": //signed 8-bit integer return(-1); case "C": //unsigned 8-bit integer return(1); case "s": // signed 16 bit integer case "S": //unsinged 16 bit integer case "i": // signed 32 bit integer case "I": // unsigned 32 bit integer return(GetOptionalFieldIntValueSize(optionalField.Value)); case "f": // float return(4); case "Z": // printable string case "H": // HexString return(optionalField.Value.Length + 1); default: throw new FileFormatException(Resource.BAM_InvalidOptValType); } }
private static SAMOptionalField CreateSamField(KeyValuePair <string, IList <string> > tag) { var _res = new SAMOptionalField { Tag = tag.Key }; string _value; if (tag.Value.Count != 1) { throw new Exception("Values count doesn't equal 1, unexpected:" + tag.Value.Aggregate("", (s, s1) => s + "," + s1)); } _res.Value = tag.Value[0]; if (tagTypesLookup.TryGetValue(tag.Key, out _value)) { _res.VType = _value; } else { _res.VType = "Z"; } //else if (tag.Key.StartsWith("X") || tag.Key.StartsWith("Y") || tag.Key.StartsWith("Z")) // _res.VType = tag.Key.Substring(1, 1); //else // throw new Exception("Unable to locate type for tag:"+tag.Key+",value:"+tag.Value.Aggregate("", (s, s1) => s+","+s1)); return(_res); }
// Gets optional field value size. private static int GetOptionalFieldValueSize(SAMOptionalField optionalField) { switch (optionalField.VType) { case "A": // Printable character case "c": //signed 8-bit integer return(-1); case "C": //unsigned 8-bit integer return(1); case "s": // signed 16 bit integer case "S": //unsinged 16 bit integer case "i": // signed 32 bit integer case "I": // unsigned 32 bit integer return(GetOptionalFieldIntValueSize(optionalField.Value)); case "f": // float return(4); case "Z": // printable string case "H": // HexString return(optionalField.Value.Length + 1); case "B": //integer or numeric array char type = optionalField.Value[0]; int arrayTypeSize = GetSizeOfArrayType(type); int numberofelements = optionalField.Value.Split(DelimComma, StringSplitOptions.RemoveEmptyEntries).Length - 1; int elementsSize = arrayTypeSize * numberofelements; int arraylen = elementsSize + 1 + 4; // 1 to store array type and 4 to store number of values in array. return(arraylen); default: throw new Exception(Properties.Resource.BAM_InvalidOptValType); } }
// Gets optional field in a byte array. private static byte[] GetOptioanField(SAMOptionalField field) { int valueSize = GetOptionalFieldValueSize(field); if (valueSize == 0) { string message = string.Format(CultureInfo.InvariantCulture, Resource.BAM_InvalidIntValueInOptField, field.Value, field.Tag); throw new FormatException(message); } int arrayLen = valueSize < 0 ? -valueSize : valueSize; arrayLen += 3; byte[] array = new byte[arrayLen]; array[0] = (byte)field.Tag[0]; array[1] = (byte)field.Tag[1]; array[2] = (byte)field.VType[0]; byte[] temparray = new byte[4]; switch (field.VType) { case "A": // Printable character array[3] = (byte)field.Value[0]; break; case "c": //signed 8-bit integer case "C": //unsigned 8-bit integer case "s": // signed 16 bit integer case "S": //unsinged 16 bit integer case "i": // signed 32 bit integer case "I": // unsigned 32 bit integer if (valueSize == 1) { array[2] = (byte)'C'; array[3] = byte.Parse(field.Value, CultureInfo.InvariantCulture); } else if (valueSize == -1) { sbyte sb = sbyte.Parse(field.Value, CultureInfo.InvariantCulture); array[2] = (byte)'c'; array[3] = (byte)sb; } else if (valueSize == 2) { UInt16 uint16value = UInt16.Parse(field.Value, CultureInfo.InvariantCulture); temparray = Helper.GetLittleEndianByteArray(uint16value); array[2] = (byte)'S'; array[3] = temparray[1]; array[4] = temparray[0]; } else if (valueSize == -2) { Int16 int16value = Int16.Parse(field.Value, CultureInfo.InvariantCulture); temparray = Helper.GetLittleEndianByteArray(int16value); array[2] = (byte)'s'; array[3] = temparray[1]; array[4] = temparray[0]; } else if (valueSize == 4) { uint uint32value = uint.Parse(field.Value, CultureInfo.InvariantCulture); temparray = Helper.GetLittleEndianByteArray(uint32value); array[2] = (byte)'I'; array[3] = temparray[3]; array[4] = temparray[2]; array[5] = temparray[1]; array[6] = temparray[0]; } else { int int32value = int.Parse(field.Value, CultureInfo.InvariantCulture); temparray = Helper.GetLittleEndianByteArray(int32value); array[2] = (byte)'i'; array[3] = temparray[3]; array[4] = temparray[2]; array[5] = temparray[1]; array[6] = temparray[0]; } break; case "f": // float float floatvalue = float.Parse(field.Value, CultureInfo.InvariantCulture); temparray = Helper.GetLittleEndianByteArray(floatvalue); array[3] = temparray[3]; array[4] = temparray[2]; array[5] = temparray[1]; array[6] = temparray[0]; break; case "Z": // printable string temparray = System.Text.ASCIIEncoding.ASCII.GetBytes(field.Value); temparray.CopyTo(array, 3); array[3 + temparray.Length] = (byte)'\0'; break; case "H": // HexString temparray = System.Text.ASCIIEncoding.ASCII.GetBytes(field.Value); temparray.CopyTo(array, 3); array[3 + temparray.Length] = (byte)'\0'; break; default: throw new FileFormatException(Resource.BAM_InvalidOptValType); } return(array); }
/// <summary> /// Returns an aligned sequence by parses the BAM file. /// </summary> private SAMAlignedSequence GetAlignedSequence(int start, int end) { byte[] array = new byte[4]; ReadUnCompressedData(array, 0, 4); int blockLen = Helper.GetInt32(array, 0); byte[] alignmentBlock = new byte[blockLen]; ReadUnCompressedData(alignmentBlock, 0, blockLen); SAMAlignedSequence alignedSeq = new SAMAlignedSequence(); int value; UInt32 UnsignedValue; // 0-4 bytes int refSeqIndex = Helper.GetInt32(alignmentBlock, 0); if (refSeqIndex == -1) { alignedSeq.RName = "*"; } else { alignedSeq.RName = refSeqNames[refSeqIndex]; } // 4-8 bytes alignedSeq.Pos = Helper.GetInt32(alignmentBlock, 4) + 1; // if there is no overlap no need to parse further. // BAMPos > closedEnd // => (alignedSeq.Pos - 1) > end -1 if (alignedSeq.Pos > end) { return(null); } // 8 - 12 bytes "bin<<16|mapQual<<8|read_name_len" UnsignedValue = Helper.GetUInt32(alignmentBlock, 8); // 10 -12 bytes alignedSeq.Bin = (int)(UnsignedValue & 0xFFFF0000) >> 16; // 9th bytes alignedSeq.MapQ = (int)(UnsignedValue & 0x0000FF00) >> 8; // 8th bytes int queryNameLen = (int)(UnsignedValue & 0x000000FF); // 12 - 16 bytes UnsignedValue = Helper.GetUInt32(alignmentBlock, 12); // 14-16 bytes int flagValue = (int)(UnsignedValue & 0xFFFF0000) >> 16; alignedSeq.Flag = (SAMFlags)flagValue; // 12-14 bytes int cigarLen = (int)(UnsignedValue & 0x0000FFFF); // 16-20 bytes int readLen = Helper.GetInt32(alignmentBlock, 16); // 20-24 bytes int mateRefSeqIndex = Helper.GetInt32(alignmentBlock, 20); if (mateRefSeqIndex != -1) { alignedSeq.MRNM = refSeqNames[mateRefSeqIndex]; } else { alignedSeq.MRNM = "*"; } // 24-28 bytes alignedSeq.MPos = Helper.GetInt32(alignmentBlock, 24) + 1; // 28-32 bytes alignedSeq.ISize = Helper.GetInt32(alignmentBlock, 28); // 32-(32+readLen) bytes alignedSeq.QName = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, 32, queryNameLen - 1); StringBuilder strbuilder = new StringBuilder(); int startIndex = 32 + queryNameLen; for (int i = startIndex; i < (startIndex + cigarLen * 4); i += 4) { // Get the CIGAR operation length stored in first 28 bits. UInt32 cigarValue = Helper.GetUInt32(alignmentBlock, i); strbuilder.Append(((cigarValue & 0xFFFFFFF0) >> 4).ToString(CultureInfo.InvariantCulture)); // Get the CIGAR operation stored in last 4 bits. value = (int)cigarValue & 0x0000000F; // MIDNSHP=>0123456 switch (value) { case 0: strbuilder.Append("M"); break; case 1: strbuilder.Append("I"); break; case 2: strbuilder.Append("D"); break; case 3: strbuilder.Append("N"); break; case 4: strbuilder.Append("S"); break; case 5: strbuilder.Append("H"); break; case 6: strbuilder.Append("P"); break; case 7: strbuilder.Append("="); break; case 8: strbuilder.Append("X"); break; default: throw new FileFormatException(Properties.Resource.BAM_InvalidCIGAR); } } string cigar = strbuilder.ToString(); if (string.IsNullOrWhiteSpace(cigar)) { alignedSeq.CIGAR = "*"; } else { alignedSeq.CIGAR = cigar; } // if there is no overlap no need to parse further. // ZeroBasedRefEnd < start // => (alignedSeq.RefEndPos -1) < start if (alignedSeq.RefEndPos - 1 < start && alignedSeq.RName != Properties.Resource.SAM_NO_REFERENCE_DEFINED_INDICATOR) { return(null); } startIndex += cigarLen * 4; strbuilder = new StringBuilder(); int index = startIndex; for (; index < (startIndex + (readLen + 1) / 2) - 1; index++) { // Get first 4 bit value value = (alignmentBlock[index] & 0xF0) >> 4; strbuilder.Append(GetSeqChar(value)); // Get last 4 bit value value = alignmentBlock[index] & 0x0F; strbuilder.Append(GetSeqChar(value)); } value = (alignmentBlock[index] & 0xF0) >> 4; strbuilder.Append(GetSeqChar(value)); if (readLen % 2 == 0) { value = alignmentBlock[index] & 0x0F; strbuilder.Append(GetSeqChar(value)); } startIndex = index + 1; string strSequence = strbuilder.ToString(); byte[] qualValues = new byte[readLen]; string strQualValues = "*"; if (alignmentBlock[startIndex] != 0xFF) { for (int i = startIndex; i < (startIndex + readLen); i++) { qualValues[i - startIndex] = (byte)(alignmentBlock[i] + 33); } strQualValues = System.Text.ASCIIEncoding.ASCII.GetString(qualValues); } SAMParser.ParseQualityNSequence(alignedSeq, Alphabet, strSequence, strQualValues); startIndex += readLen; if (alignmentBlock.Length > startIndex + 4 && alignmentBlock[startIndex] != 0x0 && alignmentBlock[startIndex + 1] != 0x0) { for (index = startIndex; index < alignmentBlock.Length;) { SAMOptionalField optionalField = new SAMOptionalField(); optionalField.Tag = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, index, 2); index += 2; char vType = (char)alignmentBlock[index++]; string valueType = vType.ToString(); // SAM format supports [AifZH] for value type. // In BAM, an integer may be stored as a signed 8-bit integer (c), unsigned 8-bit integer (C), signed short (s), unsigned // short (S), signed 32-bit (i) or unsigned 32-bit integer (I), depending on the signed magnitude of the integer. However, // in SAM, all types of integers are presented as type ʻiʼ. string message = Helper.IsValidPatternValue("VType", valueType, BAMOptionalFieldRegex); if (!string.IsNullOrEmpty(message)) { throw new FormatException(message); } optionalField.Value = GetOptionalValue(vType, alignmentBlock, ref index).ToString(); // Convert to SAM format. if ("cCsSI".IndexOf(vType) >= 0) { valueType = "i"; } optionalField.VType = valueType; alignedSeq.OptionalFields.Add(optionalField); } } return(alignedSeq); }
protected override SAMAlignedSequence GetAlignedSequence() { byte[] array = new byte[4]; ReadUnCompressedData(array, 0, 4); int blockLen = Helper.GetInt32(array, 0); byte[] alignmentBlock = new byte[blockLen]; ReadUnCompressedData(alignmentBlock, 0, blockLen); if (!Filter.Accept(alignmentBlock)) { return(null); } SAMAlignedSequence alignedSeq = new SAMAlignedSequence(); int value; UInt32 UnsignedValue; // 0-4 bytes int refSeqIndex = Helper.GetInt32(alignmentBlock, 0); if (refSeqIndex == -1) { alignedSeq.SetPreValidatedRName("*"); } else { alignedSeq.SetPreValidatedRName(RefSeqNames[refSeqIndex]); } // 4-8 bytes alignedSeq.Pos = Helper.GetInt32(alignmentBlock, 4) + 1; // 8 - 12 bytes "bin<<16|mapQual<<8|read_name_len" UnsignedValue = Helper.GetUInt32(alignmentBlock, 8); // 10 -12 bytes //alignedSeq.Bin = (int)(UnsignedValue & 0xFFFF0000) >> 16; // 9th bytes alignedSeq.MapQ = (int)(UnsignedValue & 0x0000FF00) >> 8; // 8th bytes int queryNameLen = (int)(UnsignedValue & 0x000000FF); // 12 - 16 bytes UnsignedValue = Helper.GetUInt32(alignmentBlock, 12); // 14-16 bytes int flagValue = (int)(UnsignedValue & 0xFFFF0000) >> 16; alignedSeq.Flag = (SAMFlags)flagValue; // 12-14 bytes int cigarLen = (int)(UnsignedValue & 0x0000FFFF); // 16-20 bytes int readLen = Helper.GetInt32(alignmentBlock, 16); // 20-24 bytes int mateRefSeqIndex = Helper.GetInt32(alignmentBlock, 20); if (mateRefSeqIndex != -1) { alignedSeq.SetPreValidatedMRNM(RefSeqNames[mateRefSeqIndex]); } else { alignedSeq.SetPreValidatedMRNM("*"); } // 24-28 bytes alignedSeq.MPos = Helper.GetInt32(alignmentBlock, 24) + 1; // 28-32 bytes alignedSeq.ISize = Helper.GetInt32(alignmentBlock, 28); // 32-(32+readLen) bytes alignedSeq.QName = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, 32, queryNameLen - 1); StringBuilder strbuilder = new StringBuilder(); int startIndex = 32 + queryNameLen; for (int i = startIndex; i < (startIndex + cigarLen * 4); i += 4) { // Get the CIGAR operation length stored in first 28 bits. UInt32 cigarValue = Helper.GetUInt32(alignmentBlock, i); strbuilder.Append(((cigarValue & 0xFFFFFFF0) >> 4).ToString(CultureInfo.InvariantCulture)); // Get the CIGAR operation stored in last 4 bits. value = (int)cigarValue & 0x0000000F; // MIDNSHP=>0123456 switch (value) { case 0: strbuilder.Append("M"); break; case 1: strbuilder.Append("I"); break; case 2: strbuilder.Append("D"); break; case 3: strbuilder.Append("N"); break; case 4: strbuilder.Append("S"); break; case 5: strbuilder.Append("H"); break; case 6: strbuilder.Append("P"); break; case 7: strbuilder.Append("="); break; case 8: strbuilder.Append("X"); break; default: throw new Exception("Invalid CIGAR of query " + alignedSeq.QName); } } string cigar = strbuilder.ToString(); if (string.IsNullOrWhiteSpace(cigar)) { alignedSeq.SetPreValidatedCIGAR("*"); } else { alignedSeq.SetPreValidatedCIGAR(cigar); } startIndex += cigarLen * 4; var sequence = new byte[readLen]; int sequenceIndex = 0; int index = startIndex; for (; index < (startIndex + (readLen + 1) / 2) - 1; index++) { // Get first 4 bit value value = (alignmentBlock[index] & 0xF0) >> 4; sequence[sequenceIndex++] = GetSeqCharAsByte(value); // Get last 4 bit value value = alignmentBlock[index] & 0x0F; sequence[sequenceIndex++] = GetSeqCharAsByte(value); } value = (alignmentBlock[index] & 0xF0) >> 4; sequence[sequenceIndex++] = GetSeqCharAsByte(value); if (readLen % 2 == 0) { value = alignmentBlock[index] & 0x0F; sequence[sequenceIndex++] = GetSeqCharAsByte(value); } startIndex = index + 1; byte[] qualValues = new byte[readLen]; if (alignmentBlock[startIndex] != 0xFF) { for (int i = startIndex; i < (startIndex + readLen); i++) { qualValues[i - startIndex] = (byte)(alignmentBlock[i] + 33); } //validate quality scores here byte badVal; bool ok = QualitativeSequence.ValidateQualScores(qualValues, SAMParser.QualityFormatType, out badVal); if (!ok) { string message = string.Format("Invalid encoded quality score found: {0}", (char)badVal); throw new ArgumentOutOfRangeException("encodedQualityScores", message); } } else { qualValues = new byte[] { SAMParser.AsteriskAsByte }; } //Values have already been validated when first parsed at this point so no need to again SAMParser.ParseQualityNSequence(alignedSeq, Alphabet, sequence, qualValues, false); startIndex += readLen; if (alignmentBlock.Length > startIndex + 4 && alignmentBlock[startIndex] != 0x0 && alignmentBlock[startIndex + 1] != 0x0) { for (index = startIndex; index < alignmentBlock.Length;) { SAMOptionalField optionalField = new SAMOptionalField(); optionalField.Tag = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, index, 2); index += 2; char vType = (char)alignmentBlock[index++]; // SAM format supports [AifZH] for value type. // In BAM, an integer may be stored as a signed 8-bit integer (c), unsigned 8-bit integer (C), signed short (s), unsigned // short (S), signed 32-bit (i) or unsigned 32-bit integer (I), depending on the signed magnitude of the integer. However, // in SAM, all types of integers are presented as type ʻiʼ. //NOTE: Code previously here checked for valid value and threw an exception here, but this exception/validation is checked for in this method below, as while as when the value is set. optionalField.Value = GetOptionalValue(vType, alignmentBlock, ref index).ToString(); // Convert to SAM format, where all integers are represented the same way if ("cCsSI".IndexOf(vType) >= 0) { vType = 'i'; } optionalField.VType = vType.ToString(); alignedSeq.OptionalFields.Add(optionalField); } } return(alignedSeq); }
private static void UpdateArrayType(byte[] array, SAMOptionalField field) { byte[] temparray = new byte[4]; char arraytype = field.Value[0]; int arrayTypeSize = GetSizeOfArrayType(arraytype); string[] elements = field.Value.Split(DelimComma, StringSplitOptions.RemoveEmptyEntries); array[3] = (byte)arraytype; int arrayIndex = 4; temparray = Helper.GetLittleEndianByteArray(elements.Length - 1); array[arrayIndex++] = temparray[0]; array[arrayIndex++] = temparray[1]; array[arrayIndex++] = temparray[2]; array[arrayIndex++] = temparray[3]; //elemetns[0] contains array type; for (int i = 1; i < elements.Length; i++) { switch (arraytype) { case 'A': // Printable character temparray[0] = (byte)elements[i][0]; break; case 'c': //signed 8-bit integer temparray[0] = (byte)sbyte.Parse(elements[i], CultureInfo.InvariantCulture); break; case 'C': //unsigned 8-bit integer temparray[0] = byte.Parse(elements[i], CultureInfo.InvariantCulture); break; case 's': // signed 16 bit integer Int16 int16value = Int16.Parse(elements[i], CultureInfo.InvariantCulture); temparray = Helper.GetLittleEndianByteArray(int16value); break; case 'S': //unsinged 16 bit integer UInt16 uint16value = UInt16.Parse(elements[i], CultureInfo.InvariantCulture); temparray = Helper.GetLittleEndianByteArray(uint16value); break; case 'i': // signed 32 bit integer int int32value = int.Parse(elements[i], CultureInfo.InvariantCulture); temparray = Helper.GetLittleEndianByteArray(int32value); break; case 'I': // unsigned 32 bit integer uint uint32value = uint.Parse(elements[i], CultureInfo.InvariantCulture); temparray = Helper.GetLittleEndianByteArray(uint32value); break; case 'f': // float float floatvalue = float.Parse(elements[i], CultureInfo.InvariantCulture); temparray = Helper.GetLittleEndianByteArray(floatvalue); break; default: throw new Exception(string.Format(Properties.Resource.BAM_InvalidOptValType, arraytype)); } for (int tempIndex = 0; tempIndex < arrayTypeSize; tempIndex++) { array[arrayIndex++] = temparray[tempIndex]; } } }