Пример #1
0
 public void AddRead(SAMAlignedSequence seq)
 {
     TotalReadCount++;
     ReadsByIndividuals.AddRead(seq);
     if (seq.RefEndPos > GenomeLocation.End)
     {
         GenomeLocation.End = seq.RefEndPos;
     }
 }
Пример #2
0
 /// <summary>
 /// Initializes a new instance of the <see cref="Bio.IO.PacBio.PacBioCCSRead"/> class. From an initially parsed BAM file.
 /// </summary>
 /// <param name="s">S.</param>
 public PacBioCCSRead(SAMAlignedSequence s)
 {
     /* TODO: Converting from binary to string and back is beyond silly...
      * no performance hit worth worrying about at present, but in the future it might be worth
      * going directly from binary to the type rather than through string intermediates */
     foreach (var v in s.OptionalFields)
     {
         if (v.Tag == "sn")
         {
             var snrs = v.Value.Split(',').Skip(1).Select(x => Convert.ToSingle(x)).ToArray();
             SnrA = snrs [0];
             SnrC = snrs [1];
             SnrG = snrs [2];
             SnrT = snrs [3];
         }
         else if (v.Tag == "zm")
         {
             HoleNumber = (int)Convert.ToInt32(v.Value);
         }
         else if (v.Tag == "pq")
         {
             // This tag is now deprecated by the rq tag
             ReadQuality = Convert.ToSingle(v.Value);
         }
         else if (v.Tag == "rq" && ReadQuality < 0)
         {
             ReadQuality = Convert.ToSingle(v.Value);
         }
         else if (v.Tag == "za")
         {
             AvgZscore = (float)Convert.ToSingle(v.Value);
         }
         else if (v.Tag == "rs")
         {
             statusCounts = v.Value.Split(',').Skip(1).Select(x => Convert.ToInt32(x)).ToArray();
         }
         else if (v.Tag == "np")
         {
             NumPasses = Convert.ToInt32(v.Value);
         }
         else if (v.Tag == "RG")
         {
             ReadGroup = v.Value;
         }
         else if (v.Tag == "zs")
         {
             ZScores = v.Value.Split(',').Skip(1).Select(x => Convert.ToSingle(x)).ToArray();
         }
     }
     // TODO: We should use String.Intern here, but not available in PCL...
     // Movie = String.Intern(s.QuerySequence.ID.Split ('/') [0]);
     Movie    = s.QuerySequence.ID.Split('/') [0];
     Sequence = s.QuerySequence as QualitativeSequence;
 }
Пример #3
0
        // Validates the alignment.
        private SequenceAlignmentMap ValidateAlignment(ISequenceAlignment sequenceAlignment)
        {
            SequenceAlignmentMap seqAlignmentMap = sequenceAlignment as SequenceAlignmentMap;

            if (seqAlignmentMap != null)
            {
                ValidateAlignmentHeader(seqAlignmentMap.Header);
                if (CreateSortedBAMFile && SortType == BAMSortByFields.ChromosomeNameAndCoordinates)
                {
                    this.refSequences = SortSequenceRanges(seqAlignmentMap.Header.GetReferenceSequenceRanges());
                }
                else
                {
                    this.refSequences = seqAlignmentMap.Header.GetReferenceSequenceRanges();
                }

                return(seqAlignmentMap);
            }

            SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader;

            if (header == null)
            {
                throw new ArgumentException(Properties.Resource.SAMAlignmentHeaderNotFound);
            }

            ValidateAlignmentHeader(header);

            seqAlignmentMap = new SequenceAlignmentMap(header);
            if (CreateSortedBAMFile && SortType == BAMSortByFields.ChromosomeNameAndCoordinates)
            {
                this.refSequences = SortSequenceRanges(seqAlignmentMap.Header.GetReferenceSequenceRanges());
            }
            else
            {
                this.refSequences = seqAlignmentMap.Header.GetReferenceSequenceRanges();
            }

            foreach (IAlignedSequence alignedSeq in sequenceAlignment.AlignedSequences)
            {
                SAMAlignedSequenceHeader alignedHeader = alignedSeq.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader;
                if (alignedHeader == null)
                {
                    throw new ArgumentException(Properties.Resource.SAMAlignedSequenceHeaderNotFound);
                }

                SAMAlignedSequence samAlignedSeq = new SAMAlignedSequence(alignedHeader);
                samAlignedSeq.QuerySequence = alignedSeq.Sequences[0];
                seqAlignmentMap.QuerySequences.Add(samAlignedSeq);
            }

            return(seqAlignmentMap);
        }
Пример #4
0
 /// <summary>
 /// Writes aligned sequence to output stream.
 /// </summary>
 /// <param name="header">Alignment header.</param>
 /// <param name="alignedSequence">Aligned sequence to write.</param>
 private void WriteAlignedSequence(SAMAlignmentHeader header, SAMAlignedSequence alignedSequence)
 {
     if (UnCompressedBAM || BAMOutput)
     {
         // Incase of compressed bamoutput uncompressed file will be compressed before sending it to output stream.
         bamformatter.WriteAlignedSequence(header, alignedSequence, bamUncompressedOutStream);
     }
     else
     {
         SAMFormatter.WriteSAMAlignedSequence(alignedSequence, writer);
     }
 }
Пример #5
0
        // Validates the alignment.
        private SequenceAlignmentMap ValidateAlignment(ISequenceAlignment sequenceAlignment)
        {
            SequenceAlignmentMap seqAlignmentMap = sequenceAlignment as SequenceAlignmentMap;

            if (seqAlignmentMap != null)
            {
                ValidateAlignmentHeader(seqAlignmentMap.Header);
                _refSequences = SortSequenceRanges(seqAlignmentMap.Header.GetReferenceSequenceRanges());

                foreach (SAMAlignedSequence alignedSequence in seqAlignmentMap.QuerySequences)
                {
                    string message = alignedSequence.IsValidHeader();
                    if (!string.IsNullOrEmpty(message))
                    {
                        throw new ArgumentException(message);
                    }

                    ValidateSQHeader(alignedSequence.RName);
                }

                return(seqAlignmentMap);
            }

            SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader;

            if (header == null)
            {
                throw new ArgumentException(Resource.SAMAlignmentHeaderNotFound);
            }

            ValidateAlignmentHeader(header);

            seqAlignmentMap = new SequenceAlignmentMap(header);
            _refSequences   = SortSequenceRanges(seqAlignmentMap.Header.GetReferenceSequenceRanges());

            foreach (IAlignedSequence alignedSeq in sequenceAlignment.AlignedSequences)
            {
                SAMAlignedSequenceHeader alignedHeader = alignedSeq.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader;
                if (alignedHeader == null)
                {
                    throw new ArgumentException(Resource.SAMAlignedSequenceHeaderNotFound);
                }

                ValidateAlignedSequenceHeader(alignedHeader);
                ValidateSQHeader(alignedHeader.RName);

                SAMAlignedSequence samAlignedSeq = new SAMAlignedSequence(alignedHeader);
                samAlignedSeq.QuerySequence = alignedSeq.Sequences[0];
            }

            return(seqAlignmentMap);
        }
Пример #6
0
 /// <summary>
 /// Method throws an exception if sequence violates any assumption made by this class anywhere.
 /// Avoids, separate checks within each method.
 /// </summary>
 /// <param name="seq"></param>
 private void validateSequence(SAMAlignedSequence seq)
 {
     if (seq == null) {
         throw new ArgumentNullException("seq");
     }
     if (String.IsNullOrEmpty(seq.RName) || 
         seq.RefEndPos <= seq.Pos || 
         String.IsNullOrEmpty(seq.CIGAR) || 
         seq.CIGAR =="*" ||
         !(seq.QuerySequence is QualitativeSequence) )
     {
         throw new ArgumentException("Tried to build a pileup with an invalid sequence.  Sequence was:\n"+
             seq.ToString());
     }
 }
Пример #7
0
        /// <summary>
        /// Displays the Aligned sequence
        /// </summary>
        private void DisplaySeqAlignments(SAMAlignedSequence alignedSequence, FileStream stream = null)
        {
            // Get Aligned sequences
            _write.Write("\n");
            string seq = "*";

            if (alignedSequence.QuerySequence.Count > 0)
            {
                seq = alignedSequence.QuerySequence.ToString();
            }

            string qualValues = "*";

            QualitativeSequence qualSeq = alignedSequence.QuerySequence as QualitativeSequence;

            if (qualSeq != null)
            {
                byte[] bytes = qualSeq.Scores;
                qualValues = System.Text.ASCIIEncoding.ASCII.GetString(bytes);
            }

            string flag = string.Empty;

            if (FlagInHex)
            {
                flag = String.Format("0x" + "{0:x2}", (int)alignedSequence.Flag);
            }
            else if (FlagAsString)
            {
                flag = GetFlagDesc(alignedSequence.Flag);
            }
            else
            {
                flag = ((int)alignedSequence.Flag).ToString();
            }

            _write.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}",
                         alignedSequence.QName, flag, alignedSequence.RName,
                         alignedSequence.Pos, alignedSequence.MapQ, alignedSequence.CIGAR,
                         alignedSequence.MRNM.Equals(alignedSequence.RName) ? "=" : alignedSequence.MRNM,
                         alignedSequence.MPos, alignedSequence.ISize, seq, qualValues);

            for (int j = 0; j < alignedSequence.OptionalFields.Count; j++)
            {
                _write.Write("\t{0}:{1}:{2}", alignedSequence.OptionalFields[j].Tag,
                             alignedSequence.OptionalFields[j].VType, alignedSequence.OptionalFields[j].Value);
            }
        }
Пример #8
0
        /// <summary>
        /// Gets Aligned seqeunces in the Specified BAM file.
        /// </summary>
        /// <param name="textReader">BAM file stream.</param>
        private IEnumerable <SAMAlignedSequence> GetAlignedSequence(Stream bamStream)
        {
            bool isFilterRequired = IsFilterApplied();
            bool display          = true;

            while (!bamparser.IsEOF())
            {
                SAMAlignedSequence alignedSequence = bamparser.GetAlignedSequence(false);
                if (isFilterRequired)
                {
                    display = Filter(alignedSequence);
                }

                if (display)
                {
                    yield return(alignedSequence);
                }
            }
        }
Пример #9
0
        // Gets the length of the optional fields in a SAMAlignedSequence object.
        private static int GetAuxiliaryDataLength(SAMAlignedSequence alignedSeq)
        {
            int size = 0;

            foreach (SAMOptionalField field in alignedSeq.OptionalFields)
            {
                size += 3;
                int valueSize = GetOptionalFieldValueSize(field);
                if (valueSize == 0)
                {
                    string message = string.Format(CultureInfo.InvariantCulture, Resource.BAM_InvalidIntValueInOptFieldOfAlignedSeq, field.Value, field.Tag, alignedSeq.QName);
                    throw new FormatException(message);
                }

                size += valueSize < 0 ? -valueSize : valueSize;
            }

            return(size);
        }
Пример #10
0
        public void ValidateBAMParseAlignedSeqWithSeqPointer()
        {
            // Get values from XML node.
            string expectedSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.BAMToSAMConversionNode, Constants.ExpectedSeqWithPointersNode);
            string samFilePath = _utilityObj._xmlUtil.GetTextValue(
                Constants.BAMToSAMConversionNode, Constants.FilePathNode);
            string startingLineForPointer = _utilityObj._xmlUtil.GetTextValue(
                Constants.BAMToSAMConversionNode, Constants.LineNumberToPointNode);
            string startIndex = _utilityObj._xmlUtil.GetTextValue(
                Constants.BAMToSAMConversionNode, Constants.StartIndexNode);
            string endIndex = _utilityObj._xmlUtil.GetTextValue(
                Constants.BAMToSAMConversionNode, Constants.EndIndexNode);

            // Parse a BAM file
            using (BAMParser parserObj = new BAMParser())
            {
                parserObj.EnforceDataVirtualization = true;

                SequenceAlignmentMap seqList = parserObj.Parse(samFilePath);
                Assert.IsNotNull(seqList);

                // Get a pointer object
                SequencePointer pointerObj =
                    GetBAMSequencePointer(Int32.Parse(startingLineForPointer, (IFormatProvider)null),
                                          Int32.Parse(startIndex, (IFormatProvider)null), Int32.Parse(endIndex, (IFormatProvider)null));

                // Parse a BAM file using Sequence Pointer.
                SAMAlignedSequence alignedSeq = (
                    SAMAlignedSequence)parserObj.ParseAlignedSequence(pointerObj);

                // Validate parsed SAM aligned sequence.
                Assert.AreEqual(expectedSequence,
                                alignedSeq.QuerySequence.ToString());

                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "BAM Parser BVT : Sequence alignment aligned seq {0} validate successfully",
                                                alignedSeq.Sequences[0].ToString()));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "BAM Parser BVT : Sequence alignment aligned seq validate successfully"));
            }
        }
Пример #11
0
        /// <summary>
        /// Writes SAMAlignedSequence to specified stream.
        /// </summary>
        /// <param name="header">Header from SAM object.</param>
        /// <param name="alignedSeq">SAMAlignedSequence object.</param>
        /// <param name="writer">Stream to write.</param>
        public void WriteAlignedSequence(SAMAlignmentHeader header, SAMAlignedSequence alignedSeq, Stream writer)
        {
            if (header == null)
            {
                throw new ArgumentNullException("header");
            }

            if (alignedSeq == null)
            {
                throw new ArgumentNullException("alignedSeq");
            }

            if (writer == null)
            {
                throw new ArgumentNullException("writer");
            }

            _refSequences = SortSequenceRanges(header.GetReferenceSequenceRanges());
            WriteAlignedSequence(alignedSeq, writer);
        }
Пример #12
0
            /// <summary>
            /// Update the linear index array based on an aligned read and its current coordinates
            /// </summary>
            /// <param name="alignedSeq"></param>
            /// <param name="offset"></param>
            internal void UpdateLinearArrayIndex(SAMAlignedSequence alignedSeq, FileOffset offset)
            {
                int pos = alignedSeq.Pos > 0 ? alignedSeq.Pos - 1 : 0;
                int end = alignedSeq.RefEndPos > 0 ? alignedSeq.RefEndPos - 1 : 0;

                pos = pos >> 14;
                end = end >> 14;
                if (end > largestBinSeen)
                {
                    largestBinSeen = end;
                }
                for (int i = pos; i <= end; i++)
                {
                    var cur = offSetArray[i];
                    //TODO: Is second check necessary?  Seems to always be true as we are doing things in order
                    if (cur.BothDataElements == 0 || cur > offset)
                    {
                        offSetArray[i] = offset;
                    }
                }
            }
Пример #13
0
        public void ValidateSAMAlignedSequenceGetObjectData()
        {
            SerializationInfo info    = null;
            StreamingContext  context = new StreamingContext(StreamingContextStates.All);

            SAMAlignedSequence sdObj = new SAMAlignedSequence();

            try
            {
                sdObj.GetObjectData(info, context);
                Assert.Fail();
            }
            catch (ArgumentNullException)
            {
                info = new SerializationInfo(typeof(SAMAlignedSequence),
                                             new FormatterConverter());
                sdObj.GetObjectData(info, context);
            }

            ApplicationLog.WriteLine("SAMAlignedSequence P1 : Successfully validated GetObjectData() method");
        }
Пример #14
0
        /// <summary>
        /// Sort the index of SequenceAlignmentMap by QName.
        /// Fill the index (sorted by QName) into a list, when the list size reaches
        /// the maximum limit, write the list to file and clear the list.
        /// </summary>
        private IList <string> SortByReadNames()
        {
            IList <string> files = new List <string>();

            var sortedList = new System.Collections.Generic.SortedList <object, string>();

            for (int index = 0; index < sequenceAlignMap.QuerySequences.Count; index++)
            {
                SAMAlignedSequence alignedSeq = sequenceAlignMap.QuerySequences[index];
                string             indices    = string.Empty;
                if (!sortedList.TryGetValue(alignedSeq.QName, out indices))
                {
                    sortedList.Add(alignedSeq.QName, index.ToString(CultureInfo.InvariantCulture));
                }
                else
                {
                    indices = string.Format(CultureInfo.InvariantCulture, "{0},{1}", indices, index.ToString(CultureInfo.InvariantCulture));
                    sortedList[alignedSeq.QName] = indices;
                }

                if (sortedList.Count >= SortedListMaxCount)
                {
                    if (files == null)
                    {
                        files = new List <string>();
                    }

                    files.Add(WriteToFile(sortedList));
                    sortedList.Clear();
                }
            }

            if (sortedList.Count > 0)
            {
                files.Add(WriteToFile(sortedList));
                sortedList.Clear();
            }

            return(files);
        }
Пример #15
0
        // Search the BAM file for the next valid read aligned against the current contig.
        // Update read/base pairs statistics.
        private void Search_Reads(BAMParser parser, ref SAMAlignedSequence next_alignment, string contig_name,
                                  ref long number_of_aligned_reads, ref long number_of_aligned_base_pairs, ref long number_of_used_reads,
                                  ref long number_of_used_base_pairs, Queue <Padded_Read> read_queue, long current_position)
        {
            while (next_alignment != null &&
                   !next_alignment.IsDummyRead &&
                   next_alignment.RName == contig_name &&
                   (next_alignment.Pos - 1) == current_position)
            {
                // The next alignment overlaps with current position, so continue.
                number_of_aligned_reads++;
                number_of_aligned_base_pairs += next_alignment.QuerySequence.Count;

                // Maybe we should let the mininum alignment quality be a parameter.
                // We currently leave it for the user to pre-filter the BAM file.
                if (next_alignment.MapQ > 0)
                {
                    number_of_used_reads++;
                    number_of_used_base_pairs += next_alignment.QuerySequence.Count;
                    read_queue.Enqueue(new Padded_Read(next_alignment));
                }

                #region Parse BAM file until next alignment is found
                if (!parser.IsEOF())
                {
                    next_alignment = parser.GetAlignedSequence(true);

                    while ((next_alignment == null || next_alignment.RName == "*" || next_alignment.IsDummyRead) && !parser.IsEOF())
                    {
                        next_alignment = parser.GetAlignedSequence(true);
                    }
                }
                else
                {
                    next_alignment = null;
                }
                #endregion Parse BAM file until next alignment is found
            }
        }
Пример #16
0
        public string SAMToString(SAMAlignedSequence sam)
        {
            if (sam == null)
            {
                return(null);
            }

            return(string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}",
                                 sam.QName,
                                 (int)sam.Flag,
                                 sam.RName,
                                 sam.Pos,
                                 sam.MapQ,
                                 sam.CIGAR,
                                 sam.MRNM,
                                 sam.MPos,
                                 sam.ISize,
                                 sam.GetQuerySequenceString(),
                                 sam.GetQualityScoresString(),
                                 (from of in sam.OptionalFields
                                  select string.Format("{0}:{1}:{2}", of.Tag, of.VType, of.Value)).Merge("\t")));
        }
Пример #17
0
        // Validates the alignment.
        private SequenceAlignmentMap ValidateAlignment(ISequenceAlignment sequenceAlignment)
        {
            SequenceAlignmentMap seqAlignmentMap = sequenceAlignment as SequenceAlignmentMap;

            if (seqAlignmentMap != null)
            {
                ValidateAlignmentHeader(seqAlignmentMap.Header);
                _refSequences = SortSequenceRanges(seqAlignmentMap.Header.GetReferenceSequenceRanges());
                return(seqAlignmentMap);
            }

            SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader;

            if (header == null)
            {
                throw new ArgumentException(Resource.SAMAlignmentHeaderNotFound);
            }

            ValidateAlignmentHeader(header);

            seqAlignmentMap = new SequenceAlignmentMap(header);
            _refSequences   = SortSequenceRanges(seqAlignmentMap.Header.GetReferenceSequenceRanges());

            foreach (IAlignedSequence alignedSeq in sequenceAlignment.AlignedSequences)
            {
                SAMAlignedSequenceHeader alignedHeader = alignedSeq.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader;
                if (alignedHeader == null)
                {
                    throw new ArgumentException(Resource.SAMAlignedSequenceHeaderNotFound);
                }

                SAMAlignedSequence samAlignedSeq = new SAMAlignedSequence(alignedHeader);
                samAlignedSeq.QuerySequence = alignedSeq.Sequences[0];
                seqAlignmentMap.QuerySequences.Add(samAlignedSeq);
            }

            return(seqAlignmentMap);
        }
Пример #18
0
        public void ValidateSAMParseAlignedSeqWithSeqPointer()
        {
            // Get values from XML node.
            string expectedSequence = Utility._xmlUtil.GetTextValue(
                Constants.SAMFileWithAllFieldsNode, Constants.ExpectedSeqWithPointersNode);
            string samFilePath = Utility._xmlUtil.GetTextValue(
                Constants.SAMFileWithAllFieldsNode, Constants.FilePathNode);
            string lineNumberForPointer = Utility._xmlUtil.GetTextValue(
                Constants.SAMFileWithAllFieldsNode, Constants.LineNumberToPointNode);

            // Parse a SAM file
            SAMParser parserObj = new SAMParser();

            parserObj.EnforceDataVirtualization = true;

            SequenceAlignmentMap seqList = parserObj.Parse(samFilePath);

            Assert.IsNotNull(seqList);

            // Get a pointer object
            SequencePointer pointerObj = GetSequencePointer(Int32.Parse(lineNumberForPointer));

            pointerObj.IndexOffsets[0] = 156;
            pointerObj.IndexOffsets[1] = 304;

            // Parse a SAM file using Sequence Pointer.
            SAMAlignedSequence alignedSeq = (SAMAlignedSequence)parserObj.ParseAlignedSequence(pointerObj);

            // Validate parsed SAM aligned sequence.
            Assert.AreEqual(expectedSequence,
                            alignedSeq.QuerySequence.ToString());

            Console.WriteLine(string.Format(null,
                                            "SAM Parser BVT : Sequence alignment aligned seq {0} validate successfully",
                                            alignedSeq.Sequences[0].ToString()));
            ApplicationLog.WriteLine(string.Format(null,
                                                   "SAM Parser BVT : Sequence alignment aligned seq validate successfully"));
        }
Пример #19
0
        public void ValidateVirtualBAMAlignedSequenceListCopyTo()
        {
            VirtualAlignedSequenceList <SAMAlignedSequence> virtualASeqList =
                GetBAMAlignedSequence(Constants.BAMFileWithMultipleAlignedSeqsNode);

            SAMAlignedSequence[] samAlignedSeqList = new
                                                     SAMAlignedSequence[virtualASeqList.Count];

            // Copy virtual aligned sequence to sam aligned sequence lilst array.
            virtualASeqList.CopyTo(samAlignedSeqList, 0);

            // Validate copied aligned sequences.
            for (int i = 0; i < virtualASeqList.Count; i++)
            {
                Assert.AreEqual(samAlignedSeqList[i].QuerySequence.ToString(),
                                virtualASeqList[i].QuerySequence.ToString());
            }

            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Virtual AlignedSequenceList Bvt : Validated the VAS CopyTo"));
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "Virtual AlignedSequenceList Bvt : Validated the VAS CopyTo"));
        }
Пример #20
0
        /// <summary>
        /// Gets Aligned seqeunces in the Specified SAM file.
        /// </summary>
        /// <param name="textReader">SAM file stream.</param>
        private IEnumerable <SAMAlignedSequence> GetAlignedSequence(MBFTextReader textReader)
        {
            bool isFilterRequired = IsFilterApplied();
            bool display          = true;

            //Displays SAM as output.

            while (textReader.HasLines)
            {
                SAMAlignedSequence alignedSequence = SAMParser.ParseSequence(textReader, false);
                if (isFilterRequired)
                {
                    display = Filter(alignedSequence);
                }

                if (display)
                {
                    yield return(alignedSequence);
                }

                textReader.GoToNextLine();
            }
        }
Пример #21
0
        /// <summary>
        /// Gets Aligned sequences in the Specified BAM file.
        /// </summary>
        /// <param name="bamStream"></param>
        private IEnumerable <SAMAlignedSequence> GetAlignedSequence(Stream bamStream)
        {
            bool isFilterRequired = IsFilterApplied();
            bool display          = true;

            while (!bamparser.IsEOF())
            {
                SAMAlignedSequence alignedSequence = bamparser.GetAlignedSequence(false);
                //TODO: The parser should probably never return a null sequence
                //this may be a band aid over a lurking problem, fix in future
                if (alignedSequence != null)
                {
                    if (isFilterRequired)
                    {
                        display = Filter(alignedSequence);
                    }

                    if (display)
                    {
                        yield return(alignedSequence);
                    }
                }
            }
        }
Пример #22
0
        /// <summary>
        /// Genaral method to Invalidate Quality Sequences
        /// <param name="method">enum type to execute different overload</param>
        /// </summary>
        private static void ValidateQualitySeqLength(ParseOrFormatQualLength method)
        {
            SAMAlignedSequence align = new SAMAlignedSequence();

            try
            {
                switch (method)
                {
                case ParseOrFormatQualLength.AlignedSeq:
                    SAMParser.ParseQualityNSequence(
                        align,
                        Alphabets.DNA,
                        null,
                        String.Empty,
                        null);
                    break;

                case ParseOrFormatQualLength.Sequencedata:
                    align.QName = "Quality Value";
                    SAMParser.ParseQualityNSequence(
                        align,
                        Alphabets.DNA,
                        null,
                        String.Empty,
                        null);
                    break;

                case ParseOrFormatQualLength.Qualitydata:
                    align.QName = "Quality Value";
                    SAMParser.ParseQualityNSequence(
                        align,
                        Alphabets.DNA,
                        null,
                        Constants.QualitySequence,
                        null);
                    break;

                case ParseOrFormatQualLength.QualityLength:
                    align.QName = "Quality Value";
                    SAMParser.ParseQualityNSequence(
                        align,
                        Alphabets.DNA,
                        null,
                        Constants.QualitySequence,
                        new Sequence(Alphabets.DNA, Constants.QualityLength));
                    break;

                default:
                    break;
                }

                Assert.Fail();
            }
            catch (ArgumentException)
            {
                ApplicationLog.WriteLine(
                    "SAM Parser P2 : Successfully validated the exception");
                Console.WriteLine(
                    "SAM Parser P2 : Successfully validated the exception");
            }
            catch (FormatException)
            {
                ApplicationLog.WriteLine(
                    "SAM Parser P2 : Successfully validated the exception");
                Console.WriteLine(
                    "SAM Parser P2 : Successfully validated the exception");
            }
        }
Пример #23
0
        /// <summary>
        /// Gets encoded sequence according to the BAM specification.
        /// </summary>
        /// <param name="alignedSeq"></param>
        /// <returns></returns>
        private static byte[] GetEncodedSequence(SAMAlignedSequence alignedSeq)
        {
            List <byte> byteList = new List <byte>();
            ISequence   seq      = alignedSeq.QuerySequence;

            if (seq != null)
            {
                if (seq.Alphabet != Alphabets.DNA)
                {
                    throw new ArgumentException(Resource.SAMFormatterSupportsDNAOnly);
                }

                for (int i = 0; i < seq.Count; i++)
                {
                    char symbol       = seq[i].Symbol;
                    byte encodedvalue = 0;

                    if (alignedSeq.DotSymbolIndexes.Count > 0)
                    {
                        if (alignedSeq.DotSymbolIndexes.Contains(i))
                        {
                            symbol = 'N';
                            alignedSeq.DotSymbolIndexes.Remove(i);
                        }
                    }

                    if (alignedSeq.EqualSymbolIndexes.Count > 0)
                    {
                        if (alignedSeq.EqualSymbolIndexes.Contains(i))
                        {
                            symbol = '=';
                            alignedSeq.EqualSymbolIndexes.Remove(i);
                        }
                    }

                    // 4-bit encoded read: =ACGTN=>0,1,2,4,8,15; the earlier base is stored in the
                    // high-order 4 bits of the byte.
                    switch (symbol)
                    {
                    case '=':
                        encodedvalue = 0;
                        break;

                    case 'A':
                        encodedvalue = 1;
                        break;

                    case 'C':
                        encodedvalue = 2;
                        break;

                    case 'G':
                        encodedvalue = 4;
                        break;

                    case 'T':
                        encodedvalue = 8;
                        break;

                    default:
                        encodedvalue = 15;
                        break;
                    }

                    if ((i + 1) % 2 > 0)
                    {
                        byteList.Add((byte)(encodedvalue << 4));
                    }
                    else
                    {
                        byteList[byteList.Count - 1] = (byte)(byteList[byteList.Count - 1] | encodedvalue);
                    }
                }
            }

            return(byteList.ToArray());
        }
Пример #24
0
        /// <summary>
        /// Writes SAMAlignedSequence to specified stream.
        /// </summary>
        /// <param name="alignedSeq">SAMAlignedSequence object.</param>
        /// <param name="writer">Stream to write.</param>
        private void WriteAlignedSequence(SAMAlignedSequence alignedSeq, Stream writer)
        {
            // Get the total block size required.
            int blocksize = GetBlockSize(alignedSeq);

            // Get Reference sequence index.
            int rid = GetRefSeqID(alignedSeq.RName);

            // bin<<16|mapQual<<8|read_name_len (including NULL)
            uint bin_mq_nl = (uint)alignedSeq.Bin << 16;

            bin_mq_nl = bin_mq_nl | (uint)alignedSeq.MapQ << 8;
            bin_mq_nl = bin_mq_nl | (uint)(alignedSeq.QName.Length + 1);

            // flag<<16|cigar_len
            uint flag_nc = (uint)alignedSeq.Flag << 16;

            flag_nc = flag_nc | (uint)GetCIGARLength(alignedSeq.CIGAR);

            int readLen = alignedSeq.QuerySequence.Count;

            int mateRefId = GetRefSeqID(alignedSeq.MRNM);

            byte[] readName = System.Text.ASCIIEncoding.ASCII.GetBytes(alignedSeq.QName);

            // Cigar: op_len<<4|op. Op: MIDNSHP=>0123456
            IList <uint> encodedCIGAR = GetEncodedCIGAR(alignedSeq.CIGAR);

            //block size
            writer.Write(Helper.GetLittleEndianByteArray(blocksize), 0, 4);

            // Reference sequence index.
            writer.Write(Helper.GetLittleEndianByteArray(rid), 0, 4);

            // Pos
            writer.Write(Helper.GetLittleEndianByteArray(alignedSeq.Pos > 0 ? alignedSeq.Pos - 1 : -1), 0, 4);

            // bin<<16|mapQual<<8|read_name_len (including NULL)
            writer.Write(Helper.GetLittleEndianByteArray(bin_mq_nl), 0, 4);

            // flag<<16|cigar_len
            writer.Write(Helper.GetLittleEndianByteArray(flag_nc), 0, 4);

            // Length of the read
            writer.Write(Helper.GetLittleEndianByteArray(readLen), 0, 4);

            // Mate reference sequence index
            writer.Write(Helper.GetLittleEndianByteArray(mateRefId), 0, 4);

            // mate_pos - Leftmost coordinate of the mate
            writer.Write(Helper.GetLittleEndianByteArray(alignedSeq.MPos > 1 ? alignedSeq.MPos - 1 : 0), 0, 4);

            // Insert size of the read pair (if paired)
            writer.Write(Helper.GetLittleEndianByteArray(alignedSeq.ISize >= 0 ? alignedSeq.ISize : 0), 0, 4);

            // Read name, null terminated
            writer.Write(readName, 0, readName.Length);
            writer.WriteByte((byte)'\0');

            // Cigar: op_len<<4|op. Op: MIDNSHP=>0123456
            for (int i = 0; i < encodedCIGAR.Count; i++)
            {
                writer.Write(Helper.GetLittleEndianByteArray(encodedCIGAR[i]), 0, 4);
            }

            // 4-bit encoded read: =ACGTN=>0,1,2,4,8,15; the earlier base is stored in the high-order 4 bits of the byte.
            byte[] encodedValues = GetEncodedSequence(alignedSeq);
            writer.Write(encodedValues, 0, encodedValues.Length);

            // Phred base quality (0xFF if absent)
            encodedValues = GetQualityValue(alignedSeq.QuerySequence);
            writer.Write(encodedValues, 0, encodedValues.Length);

            // Optional fields
            foreach (SAMOptionalField field in alignedSeq.OptionalFields)
            {
                byte[] optionalArray = GetOptioanField(field);
                writer.Write(optionalArray, 0, optionalArray.Length);
            }
        }
        public IEnumerable <CompactSAMSequence> Parse()
        {
            if (string.IsNullOrWhiteSpace(_fileName))
            {
                throw new ArgumentNullException("fileName");
            }
            using (readStream = new FileStream(_fileName, FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                Stream reader = readStream;
                if (reader == null || reader.Length == 0)
                {
                    throw new FileFormatException(Properties.Resource.BAM_InvalidBAMFile);
                }
                if (!String.IsNullOrEmpty(ChromosomeToGet))
                {
                    foreach (var s in
                             ParseRangeAsEnumerableSequences(_fileName, ChromosomeToGet))
                    {
                        if (s != null)
                        {
                            yield return(s);
                        }
                        ////TODO: Super inefficient right now, am parsing the sequence multiple times,
                        ////fix this.
                        //var s2 = s.ToArray ();
                        //var alpha = Alphabets.AutoDetectAlphabet(s2, 0, s2.Length, null);


                        //var strippedOfInfo = new Sequence(alpha, s2);
                        //yield return strippedOfInfo;
                    }
                }
                else
                {
                    readStream = reader;
                    ValidateReader();
                    SAMAlignmentHeader   header = GetHeader();
                    SequenceAlignmentMap sequenceAlignmentMap = null;
                    if (sequenceAlignmentMap == null)
                    {
                        sequenceAlignmentMap = new SequenceAlignmentMap(header);
                    }

                    while (!IsEOF())
                    {
#if WANT_OLD_VERSION
                        SAMAlignedSequence alignedSeq = GetAlignedSequence(0, int.MaxValue);
#else
                        var alignedSeq = GetAlignedSequence();
#endif
                        if (alignedSeq != null)
                        {
#if WANT_OLD_VERSION
                            //make a new Sequence
                            ISequence strippedOfInfo = null;
                            try
                            {
                                var syms  = alignedSeq.QuerySequence.ToArray();
                                var alpha = Alphabets.AutoDetectAlphabet(syms, 0, syms.Length, null);
                                strippedOfInfo = new Sequence(alpha, alignedSeq.QuerySequence.ToArray());

                                strippedOfInfo = alignedSeq;
                            }
                            catch (ArgumentOutOfRangeException exception)
                            {
                                Debug.Write("Could not convert sequence: " + exception.Message);
                            }
                            if (strippedOfInfo != null)
                            {
                                yield return(strippedOfInfo);
                            }
#else
                            yield return(alignedSeq);
                                                        #endif
                        }
                        alignedSeq = null;
                    }
                }
            }
        }
        /// <summary>
        /// Returns an aligned sequence by parses the BAM file.
        /// </summary>
        private SAMAlignedSequence GetAlignedSequence(int start, int end)
        {
            byte[] array = new byte[4];

            ReadUnCompressedData(array, 0, 4);
            int blockLen = Helper.GetInt32(array, 0);

            byte[] alignmentBlock = new byte[blockLen];
            ReadUnCompressedData(alignmentBlock, 0, blockLen);
            SAMAlignedSequence alignedSeq = new SAMAlignedSequence();
            int    value;
            UInt32 UnsignedValue;
            // 0-4 bytes
            int refSeqIndex = Helper.GetInt32(alignmentBlock, 0);

            if (refSeqIndex == -1)
            {
                alignedSeq.RName = "*";
            }
            else
            {
                alignedSeq.RName = refSeqNames[refSeqIndex];
            }


            // 4-8 bytes
            alignedSeq.Pos = Helper.GetInt32(alignmentBlock, 4) + 1;

            // if there is no overlap no need to parse further.
            //     BAMPos > closedEnd
            // => (alignedSeq.Pos - 1) > end -1
            if (alignedSeq.Pos > end)
            {
                return(null);
            }

            // 8 - 12 bytes "bin<<16|mapQual<<8|read_name_len"
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 8);

            // 10 -12 bytes
            alignedSeq.Bin = (int)(UnsignedValue & 0xFFFF0000) >> 16;
            // 9th bytes
            alignedSeq.MapQ = (int)(UnsignedValue & 0x0000FF00) >> 8;
            // 8th bytes
            int queryNameLen = (int)(UnsignedValue & 0x000000FF);

            // 12 - 16 bytes
            UnsignedValue = Helper.GetUInt32(alignmentBlock, 12);
            // 14-16 bytes
            int flagValue = (int)(UnsignedValue & 0xFFFF0000) >> 16;

            alignedSeq.Flag = (SAMFlags)flagValue;
            // 12-14 bytes
            int cigarLen = (int)(UnsignedValue & 0x0000FFFF);

            // 16-20 bytes
            int readLen = Helper.GetInt32(alignmentBlock, 16);

            // 20-24 bytes
            int mateRefSeqIndex = Helper.GetInt32(alignmentBlock, 20);

            if (mateRefSeqIndex != -1)
            {
                alignedSeq.MRNM = refSeqNames[mateRefSeqIndex];
            }
            else
            {
                alignedSeq.MRNM = "*";
            }

            // 24-28 bytes
            alignedSeq.MPos = Helper.GetInt32(alignmentBlock, 24) + 1;

            // 28-32 bytes
            alignedSeq.ISize = Helper.GetInt32(alignmentBlock, 28);

            // 32-(32+readLen) bytes
            alignedSeq.QName = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, 32, queryNameLen - 1);
            StringBuilder strbuilder = new StringBuilder();
            int           startIndex = 32 + queryNameLen;

            for (int i = startIndex; i < (startIndex + cigarLen * 4); i += 4)
            {
                // Get the CIGAR operation length stored in first 28 bits.
                UInt32 cigarValue = Helper.GetUInt32(alignmentBlock, i);
                strbuilder.Append(((cigarValue & 0xFFFFFFF0) >> 4).ToString(CultureInfo.InvariantCulture));

                // Get the CIGAR operation stored in last 4 bits.
                value = (int)cigarValue & 0x0000000F;

                // MIDNSHP=>0123456
                switch (value)
                {
                case 0:
                    strbuilder.Append("M");
                    break;

                case 1:
                    strbuilder.Append("I");
                    break;

                case 2:
                    strbuilder.Append("D");
                    break;

                case 3:
                    strbuilder.Append("N");
                    break;

                case 4:
                    strbuilder.Append("S");
                    break;

                case 5:
                    strbuilder.Append("H");
                    break;

                case 6:
                    strbuilder.Append("P");
                    break;

                case 7:
                    strbuilder.Append("=");
                    break;

                case 8:
                    strbuilder.Append("X");
                    break;

                default:
                    throw new FileFormatException(Properties.Resource.BAM_InvalidCIGAR);
                }
            }

            string cigar = strbuilder.ToString();

            if (string.IsNullOrWhiteSpace(cigar))
            {
                alignedSeq.CIGAR = "*";
            }
            else
            {
                alignedSeq.CIGAR = cigar;
            }

            // if there is no overlap no need to parse further.
            // ZeroBasedRefEnd < start
            // => (alignedSeq.RefEndPos -1) < start
            if (alignedSeq.RefEndPos - 1 < start && alignedSeq.RName != Properties.Resource.SAM_NO_REFERENCE_DEFINED_INDICATOR)
            {
                return(null);
            }

            startIndex += cigarLen * 4;
            strbuilder  = new StringBuilder();
            int index = startIndex;

            for (; index < (startIndex + (readLen + 1) / 2) - 1; index++)
            {
                // Get first 4 bit value
                value = (alignmentBlock[index] & 0xF0) >> 4;
                strbuilder.Append(GetSeqChar(value));
                // Get last 4 bit value
                value = alignmentBlock[index] & 0x0F;
                strbuilder.Append(GetSeqChar(value));
            }

            value = (alignmentBlock[index] & 0xF0) >> 4;
            strbuilder.Append(GetSeqChar(value));
            if (readLen % 2 == 0)
            {
                value = alignmentBlock[index] & 0x0F;
                strbuilder.Append(GetSeqChar(value));
            }

            startIndex = index + 1;
            string strSequence = strbuilder.ToString();

            byte[] qualValues    = new byte[readLen];
            string strQualValues = "*";

            if (alignmentBlock[startIndex] != 0xFF)
            {
                for (int i = startIndex; i < (startIndex + readLen); i++)
                {
                    qualValues[i - startIndex] = (byte)(alignmentBlock[i] + 33);
                }

                strQualValues = System.Text.ASCIIEncoding.ASCII.GetString(qualValues);
            }

            SAMParser.ParseQualityNSequence(alignedSeq, Alphabet, strSequence, strQualValues);

            startIndex += readLen;

            if (alignmentBlock.Length > startIndex + 4 && alignmentBlock[startIndex] != 0x0 && alignmentBlock[startIndex + 1] != 0x0)
            {
                for (index = startIndex; index < alignmentBlock.Length;)
                {
                    SAMOptionalField optionalField = new SAMOptionalField();
                    optionalField.Tag = System.Text.ASCIIEncoding.ASCII.GetString(alignmentBlock, index, 2);
                    index            += 2;
                    char   vType     = (char)alignmentBlock[index++];
                    string valueType = vType.ToString();

                    // SAM format supports [AifZH] for value type.
                    // In BAM, an integer may be stored as a signed 8-bit integer (c), unsigned 8-bit integer (C), signed short (s), unsigned
                    // short (S), signed 32-bit (i) or unsigned 32-bit integer (I), depending on the signed magnitude of the integer. However,
                    // in SAM, all types of integers are presented as type ʻiʼ.
                    string message = Helper.IsValidPatternValue("VType", valueType, BAMOptionalFieldRegex);
                    if (!string.IsNullOrEmpty(message))
                    {
                        throw new FormatException(message);
                    }


                    optionalField.Value = GetOptionalValue(vType, alignmentBlock, ref index).ToString();

                    // Convert to SAM format.
                    if ("cCsSI".IndexOf(vType) >= 0)
                    {
                        valueType = "i";
                    }

                    optionalField.VType = valueType;

                    alignedSeq.OptionalFields.Add(optionalField);
                }
            }

            return(alignedSeq);
        }
Пример #27
0
        /// <summary>
        /// Filters Sequence based on user inputs.
        /// </summary>
        /// <param name="alignedSequence">Aligned Sequence.</param>
        /// <returns>Whether aligned sequence matches user defined options.</returns>
        private bool Filter(SAMAlignedSequence alignedSequence)
        {
            bool filter = true;

            if (filter && FlagRequired != 0)
            {
                filter = (((int)alignedSequence.Flag) & FlagRequired) == FlagRequired;
            }

            if (filter && FilteringFlag != 0)
            {
                filter = ((((int)alignedSequence.Flag) & FilteringFlag) == 0);
            }

            if (filter && QualityMinimumMapping != 0)
            {
                filter = alignedSequence.MapQ == QualityMinimumMapping;
            }

            if (filter && !string.IsNullOrEmpty(Library))
            {
                filter = rgRecFields.First(
                    a => a.Tags.First(
                        b => b.Tag.Equals("ID")).Value.Equals(alignedSequence.OptionalFields.First(
                                                                  c => c.Tag.Equals("RG")).Value)).Tags.First(
                    d => d.Tag.Equals("LB")).Value.Equals(Library);
            }

            if (filter && !string.IsNullOrEmpty(ReadGroup))
            {
                filter = alignedSequence.OptionalFields.AsParallel().Where(
                    O => O.Tag.ToUpper().Equals("RG")).ToList().Any(a => a.Value.Equals(ReadGroup));
            }

            if (filter && !string.IsNullOrEmpty(Region))
            {
                if (alignedSequence.RName.Equals(region.Chromosome))
                {
                    if (region.Start > -1)
                    {
                        if (alignedSequence.Pos >= region.Start)
                        {
                            if (region.End > -1)
                            {
                                if (alignedSequence.Pos <= region.End)
                                {
                                    filter = true;
                                }
                                else
                                {
                                    filter = false;
                                }
                            }
                            else
                            {
                                filter = true;
                            }
                        }
                        else
                        {
                            filter = false;
                        }
                    }
                    else
                    {
                        filter = true;
                    }
                }
                else
                {
                    filter = false;
                }
            }

            return(filter);
        }
Пример #28
0
        /// <summary>
        /// Sort and merge multiple SAM objects
        /// </summary>
        /// <param name="sortedIndexes">Sorted Indexes of SAM object.</param>
        /// <param name="fstemp">Temporary tream to write alignments.</param>
        /// <param name="formatter">Format aligned sequences in BAM format.</param>
        /// <param name="sequenceAlignmentMaps">List of SAM objects to be merged.</param>
        private void WriteMergeFile(IList <IList <BAMSortedIndex> > sortedIndexes, FileStream fstemp, BAMFormatter formatter, IList <SequenceAlignmentMap> sequenceAlignmentMaps)
        {
            List <SAMAlignedSequence> alignedSeqs = new List <SAMAlignedSequence>();

            int[] sortedIndex = new int[sequenceAlignmentMaps.Count];

            for (int i = 0; i < sortedIndexes.Count; i++)
            {
                BAMSortedIndex bamSortedIndex = sortedIndexes[i].ElementAt(sortedIndex[i]);
                if (bamSortedIndex != null)
                {
                    if (bamSortedIndex.MoveNext())
                    {
                        alignedSeqs.Add(sequenceAlignmentMaps[i].QuerySequences[bamSortedIndex.Current]);
                    }
                    else
                    {
                        alignedSeqs.Add(null);
                    }
                }
                else
                {
                    alignedSeqs.Add(null);
                }
            }

            int smallestIndex = -1;

            do
            {
                for (int index = 0; index < alignedSeqs.Count; index++)
                {
                    if (alignedSeqs[index] != null)
                    {
                        if (smallestIndex == -1)
                        {
                            smallestIndex = index;
                        }
                        else
                        {
                            if (0 < string.Compare(alignedSeqs[smallestIndex].RName, alignedSeqs[index].RName, StringComparison.OrdinalIgnoreCase))
                            {
                                smallestIndex = index;
                            }
                            else if (alignedSeqs[smallestIndex].RName.Equals(alignedSeqs[index].RName))
                            {
                                if (alignedSeqs[smallestIndex].Pos > alignedSeqs[index].Pos)
                                {
                                    smallestIndex = index;
                                }
                            }
                        }
                    }
                }

                if (smallestIndex > -1)
                {
                    SAMAlignedSequence alignSeqTowrite = alignedSeqs[smallestIndex];

                    if (sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).MoveNext())
                    {
                        int nextIndex = sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).Current;
                        alignedSeqs[smallestIndex] = sequenceAlignmentMaps[smallestIndex].QuerySequences[nextIndex];
                    }
                    else
                    {
                        sortedIndex[smallestIndex]++;
                        if (sortedIndex[smallestIndex] < sortedIndexes[smallestIndex].Count &&
                            sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).MoveNext())
                        {
                            int nextIndex = sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).Current;
                            alignedSeqs[smallestIndex] = sequenceAlignmentMaps[smallestIndex].QuerySequences[nextIndex];
                        }
                        else
                        {
                            alignedSeqs[smallestIndex] = null;
                            smallestIndex = -1;
                        }
                    }

                    formatter.WriteAlignedSequence(_header, alignSeqTowrite, fstemp);
                }
            } while (!alignedSeqs.All(a => a == null));
        }
Пример #29
0
        /// <summary>
        /// Gets encoded sequence according to the BAM specification.
        /// </summary>
        /// <param name="alignedSeq"></param>
        /// <returns></returns>
        private static byte[] GetEncodedSequence(SAMAlignedSequence alignedSeq)
        {
            List <byte> byteList = new List <byte>();
            ISequence   seq      = alignedSeq.QuerySequence;

            if (seq != null)
            {
                if (!(seq.Alphabet is DnaAlphabet))
                {
                    throw new ArgumentException(Properties.Resource.BAMFormatterSupportsDNAOnly);
                }

                byte[] symbolMap = seq.Alphabet.GetSymbolValueMap();

                for (int i = 0; i < seq.Count; i++)
                {
                    char symbol       = (char)symbolMap[seq[i]];
                    byte encodedvalue = 0;


                    // 4-bit encoded read: =ACMGRSVTWYHKDBN -> 0-15; the earlier base is stored in the
                    // high-order 4 bits of the byte.
                    //Note:
                    // All the other symbols which are not supported by BAM specification (other than "=ACMGRSVTWYHKDBN") are converted to 'N'
                    // for example a '.' symbol which is supported by SAM specification will be converted to symbol 'N'
                    switch (symbol)
                    {
                    case '=':
                        encodedvalue = 0;
                        break;

                    case 'A':
                        encodedvalue = 1;
                        break;

                    case 'C':
                        encodedvalue = 2;
                        break;

                    case 'M':
                        encodedvalue = 3;
                        break;

                    case 'G':
                        encodedvalue = 4;
                        break;

                    case 'R':
                        encodedvalue = 5;
                        break;

                    case 'S':
                        encodedvalue = 6;
                        break;

                    case 'V':
                        encodedvalue = 7;
                        break;

                    case 'T':
                        encodedvalue = 8;
                        break;

                    case 'W':
                        encodedvalue = 9;
                        break;

                    case 'Y':
                        encodedvalue = 10;
                        break;

                    case 'H':
                        encodedvalue = 11;
                        break;

                    case 'K':
                        encodedvalue = 12;
                        break;

                    case 'D':
                        encodedvalue = 13;
                        break;

                    case 'B':
                        encodedvalue = 14;
                        break;

                    default:
                        encodedvalue = 15;
                        break;
                    }

                    if ((i + 1) % 2 > 0)
                    {
                        byteList.Add((byte)(encodedvalue << 4));
                    }
                    else
                    {
                        byteList[byteList.Count - 1] = (byte)(byteList[byteList.Count - 1] | encodedvalue);
                    }
                }
            }

            return(byteList.ToArray());
        }
Пример #30
0
        List<BaseAndQualityAndPosition> getBasesForSequence(SAMAlignedSequence seq)
        {
            List<BaseAndQualityAndPosition> toReturn = new List<BaseAndQualityAndPosition>(seq.RefEndPos - seq.Pos + 10);
            // Decode the cigar string into operations.
            // TODO: This code is duplicated in many places
            string CIGAR = seq.CIGAR;
            List<KeyValuePair<char, int>> charsAndPositions = new List<KeyValuePair<char, int>>();
            for (int i = 0; i < CIGAR.Length; i++)
            {
                char ch = CIGAR[i];
                if (Char.IsDigit(ch))
                {
                    continue;
                }
                charsAndPositions.Add(new KeyValuePair<char, int>(ch, i));
            }

            // Get sequence bases and error probabilities
            var qseq = seq.QuerySequence as QualitativeSequence;
            var seq_log10ErrorProb = qseq.GetPhredQualityScores().Select(Utils.GetLog10ErrorProbability).ToArray();
            var seq_bases = qseq.ToArray();
            // Use the cigar operations to emit bases.
            int curRef = seq.Pos;
            int curQuery = 0;
            for (int i = 0; i < charsAndPositions.Count; i++)
            {
                // Parse the current cigar operation
                char ch = charsAndPositions[i].Key;
                int cig_start = i==0 ? 0 : charsAndPositions[i - 1].Value + 1;
                int cig_end = charsAndPositions[i].Value - cig_start;
                int cig_len = int.Parse(CIGAR.Substring(cig_start, cig_end));
                // Emit or advance based on cigar operation.
                switch (ch)
                {
                    case 'P': //padding (Silent deltions from padded reference)
                    case 'N': //skipped region from reference
                        throw new Exception("Pile up methods not built to handle reference clipping (Cigar P or N) yet.");
                    case 'M': //match or mismatch
                    case '=': //match
                    case 'X': //mismatch
                        for (int k = 0; k < cig_len; k++)
                        {                            
                            var bqp= new BaseAndQualityAndPosition(curRef,0, new BaseAndQuality(seq_bases[curQuery], seq_log10ErrorProb[curQuery]));
                            toReturn.Add(bqp);
                            curQuery++;
                            curRef++;
                        }
                        break;
                    case 'I'://insertion to the reference
                        for (int k = 0; k < cig_len; k++)
                        {                            
                            var bqp =  new BaseAndQualityAndPosition(curRef,k, new BaseAndQuality(seq_bases[curQuery], seq_log10ErrorProb[curQuery]));
                            toReturn.Add(bqp);
                            curQuery++;
                        }
                        break;
                    case 'D'://Deletion from the reference
                        for (int k = 0; k < cig_len; k++)
                        {                            
                            var bqp = new BaseAndQualityAndPosition(curRef,k, new BaseAndQuality((byte)'-', Double.NaN));
                            toReturn.Add(bqp);
                            curRef++;
                        }
                        break;
                    case 'S': //soft clipped
                        curQuery += cig_len;
                        break;
                    case 'H'://had clipped
                        break;
                    default:
                        throw new FormatException("Unexpected SAM Cigar element found " + ch.ToString());
                }                
            }
            return toReturn;
        }