예제 #1
0
        // parses sequence.
        private void ParseSequences(SequenceAlignmentMap seqAlignment, BioTextReader bioReader, bool isReadOnly)
        {
            while (bioReader.HasLines && !bioReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
            {
                string[]           tokens     = bioReader.Line.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries);
                SAMAlignedSequence alignedSeq = new SAMAlignedSequence();

                alignedSeq.QName = tokens[0];
                alignedSeq.Flag  = SAMAlignedSequenceHeader.GetFlag(tokens[1]);
                alignedSeq.RName = tokens[2];
                alignedSeq.Pos   = int.Parse(tokens[3], CultureInfo.InvariantCulture);
                alignedSeq.MapQ  = int.Parse(tokens[4], CultureInfo.InvariantCulture);
                alignedSeq.CIGAR = tokens[5];
                alignedSeq.MRNM  = tokens[6].Equals("=") ? alignedSeq.RName : tokens[6];
                alignedSeq.MPos  = int.Parse(tokens[7], CultureInfo.InvariantCulture);
                alignedSeq.ISize = int.Parse(tokens[8], CultureInfo.InvariantCulture);
                string message = alignedSeq.IsValidHeader();

                if (!string.IsNullOrEmpty(message))
                {
                    throw new FormatException(message);
                }

                ISequence refSeq = null;

                if (RefSequences != null && RefSequences.Count > 0)
                {
                    refSeq = RefSequences.FirstOrDefault(R => string.Compare(R.ID, alignedSeq.RName, StringComparison.OrdinalIgnoreCase) == 0);
                }

                ParseQualityNSequence(alignedSeq, Alphabet, Encoding, tokens[9], tokens[10], refSeq, isReadOnly);
                SAMOptionalField optField = null;
                for (int i = 11; i < tokens.Length; i++)
                {
                    optField = new SAMOptionalField();
                    string optionalFieldRegExpn = OptionalFieldLinePattern;
                    if (!Helper.IsValidRegexValue(optionalFieldRegExpn, tokens[i]))
                    {
                        message = string.Format(CultureInfo.CurrentCulture, Resource.InvalidOptionalField, tokens[i]);
                        throw new FormatException(message);
                    }

                    string[] opttokens = tokens[i].Split(colonDelim, StringSplitOptions.RemoveEmptyEntries);
                    optField.Tag   = opttokens[0];
                    optField.VType = opttokens[1];
                    optField.Value = opttokens[2];
                    message        = optField.IsValid();
                    if (!string.IsNullOrEmpty(message))
                    {
                        throw new FormatException(message);
                    }

                    alignedSeq.OptionalFields.Add(optField);
                }

                seqAlignment.QuerySequences.Add(alignedSeq);
                bioReader.GoToNextLine();
            }
        }
예제 #2
0
 /// <summary>
 /// Creates new instance of SAMAlignedSequence with specified SAMAlignedSequenceHeader.
 /// </summary>
 /// <param name="seqHeader"></param>
 public SAMAlignedSequence(SAMAlignedSequenceHeader seqHeader)
 {
     _seqHeader = seqHeader;
     _metadata  = new Dictionary <string, object>(StringComparer.OrdinalIgnoreCase);
     _metadata.Add(Helper.SAMAlignedSequenceHeaderKey, _seqHeader);
     _sequences = new List <ISequence>();
     _sequences.Add(null);
 }
        /// <summary>
        /// Constructor for deserialization.
        /// </summary>
        /// <param name="info">Serialization Info.</param>
        /// <param name="context">Streaming context.</param>
        protected SAMAlignedSequence(SerializationInfo info, StreamingContext context)
        {
            if (info == null)
            {
                throw new ArgumentNullException("info");
            }

            seqHeader = (SAMAlignedSequenceHeader)info.GetValue("header", typeof(SAMAlignedSequenceHeader));
            metadata  = new Dictionary <string, object> (StringComparer.OrdinalIgnoreCase);
            metadata.Add(Helper.SAMAlignedSequenceHeaderKey, seqHeader);
            QuerySequence = (ISequence)info.GetValue("sequence", typeof(ISequence));
        }
예제 #4
0
        /// <summary>
        /// Parse a single sequencer.
        /// </summary>
        /// <param name="bioText">sequence alignment text.</param>
        /// <param name="alphabet">Alphabet of the sequences.</param>
        /// <param name="referenceSequences">Reference sequences.</param>
        private static SAMAlignedSequence ParseSequence(string bioText, IAlphabet alphabet, IList <ISequence> referenceSequences)
        {
            const int optionalTokenStartingIndex = 11;

            string[] tokens = bioText.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries);

            SAMAlignedSequence alignedSeq = new SAMAlignedSequence();

            alignedSeq.QName = tokens[0];
            alignedSeq.Flag  = SAMAlignedSequenceHeader.GetFlag(tokens[1]);
            alignedSeq.RName = tokens[2];
            alignedSeq.Pos   = int.Parse(tokens[3], CultureInfo.InvariantCulture);
            alignedSeq.MapQ  = int.Parse(tokens[4], CultureInfo.InvariantCulture);
            alignedSeq.CIGAR = tokens[5];
            alignedSeq.MRNM  = tokens[6].Equals("=") ? alignedSeq.RName : tokens[6];
            alignedSeq.MPos  = int.Parse(tokens[7], CultureInfo.InvariantCulture);
            alignedSeq.ISize = int.Parse(tokens[8], CultureInfo.InvariantCulture);

            ISequence refSeq = null;

            if (referenceSequences != null && referenceSequences.Count > 0)
            {
                refSeq = referenceSequences.FirstOrDefault(R => string.Compare(R.ID, alignedSeq.RName, StringComparison.OrdinalIgnoreCase) == 0);
            }

            ParseQualityNSequence(alignedSeq, alphabet, tokens[9], tokens[10], refSeq);
            SAMOptionalField optField = null;
            string           message;

            for (int i = optionalTokenStartingIndex; i < tokens.Length; i++)
            {
                optField = new SAMOptionalField();
                if (!Helper.IsValidRegexValue(OptionalFieldRegex, tokens[i]))
                {
                    message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidOptionalField, tokens[i]);
                    throw new FormatException(message);
                }

                string[] opttokens = tokens[i].Split(colonDelim, StringSplitOptions.RemoveEmptyEntries);
                optField.Tag   = opttokens[0];
                optField.VType = opttokens[1];
                optField.Value = opttokens[2];

                alignedSeq.OptionalFields.Add(optField);
            }

            return(alignedSeq);
        }
예제 #5
0
        /// <summary>
        /// Parse a single sequencer.
        /// </summary>
        /// <param name="bioText">sequence alignment text.</param>
        /// <param name="alphabet">Alphabet of the sequences.</param>
        public static SAMAlignedSequence ParseSequence(string bioText, IAlphabet alphabet)
        {
            const int optionalTokenStartingIndex = 11;

            string[] tokens = bioText.Split(TabDelim, StringSplitOptions.RemoveEmptyEntries);

            SAMAlignedSequence alignedSeq = new SAMAlignedSequence
            {
                QName = tokens[0],
                Flag  = SAMAlignedSequenceHeader.GetFlag(tokens[1]),
                RName = tokens[2],
                Pos   = int.Parse(tokens[3]),
                MapQ  = int.Parse(tokens[4]),
                CIGAR = tokens[5]
            };

            alignedSeq.MRNM  = tokens[6].Equals("=") ? alignedSeq.RName : tokens[6];
            alignedSeq.MPos  = int.Parse(tokens[7]);
            alignedSeq.ISize = int.Parse(tokens[8]);

            ParseQualityNSequence(alignedSeq, alphabet, tokens[9], tokens[10]);

            for (int i = optionalTokenStartingIndex; i < tokens.Length; i++)
            {
                SAMOptionalField optField = new SAMOptionalField();
                if (!Helper.IsValidRegexValue(OptionalFieldRegex, tokens[i]))
                {
                    throw new FormatException(string.Format(Properties.Resource.InvalidOptionalField, tokens[i]));
                }

                string[] opttokens = tokens[i].Split(ColonDelim, StringSplitOptions.RemoveEmptyEntries);
                optField.Tag   = opttokens[0];
                optField.VType = opttokens[1];
                optField.Value = opttokens[2];

                alignedSeq.OptionalFields.Add(optField);
            }

            return(alignedSeq);
        }
예제 #6
0
        /// <summary>
        /// Writes SAMAlignedSequence to specified text writer.
        /// </summary>
        /// <param name="alignedSequence">SAM aligned sequence to write</param>
        /// <param name="writer">Text writer.</param>
        public static void WriteSAMAlignedSequence(IAlignedSequence alignedSequence, TextWriter writer)
        {
            if (writer == null)
            {
                throw new ArgumentNullException("writer");
            }

            if (alignedSequence == null)
            {
                throw new ArgumentNullException("alignedSequence");
            }

            SAMAlignedSequenceHeader alignedHeader = alignedSequence.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader;

            if (alignedHeader == null)
            {
                throw new ArgumentException(Properties.Resource.SAM_AlignedSequenceHeaderMissing);
            }

            ISequence sequence = alignedSequence.Sequences[0];

            if (!(sequence.Alphabet is DnaAlphabet))
            {
                throw new ArgumentException(Properties.Resource.SAMFormatterSupportsDNAOnly);
            }

            string seq = "*";

            if (sequence.Count > 0)
            {
                char[] symbols = new char[sequence.Count];
                for (int i = 0; i < sequence.Count; i++)
                {
                    symbols[i] = (char)sequence[i];
                }

                seq = new string(symbols);
            }

            string qualValues = "*";

            QualitativeSequence qualSeq = sequence as QualitativeSequence;

            if (qualSeq != null)
            {
                byte[] bytes = qualSeq.GetEncodedQualityScores();

                // if FormatType is not sanger then convert the quality scores to sanger.
                if (qualSeq.FormatType != FastQFormatType.Sanger)
                {
                    bytes = QualitativeSequence.ConvertEncodedQualityScore(qualSeq.FormatType, FastQFormatType.Sanger, bytes);
                }

                qualValues = System.Text.ASCIIEncoding.ASCII.GetString(bytes);
            }

            writer.Write(AlignedSequenceFormat,
                         alignedHeader.QName, (int)alignedHeader.Flag, alignedHeader.RName,
                         alignedHeader.Pos, alignedHeader.MapQ, alignedHeader.CIGAR,
                         alignedHeader.MRNM.Equals(alignedHeader.RName) ? "=" : alignedHeader.MRNM,
                         alignedHeader.MPos, alignedHeader.ISize, seq, qualValues);

            foreach (var j in alignedHeader.OptionalFields)
            {
                writer.Write(OptionalFieldFormat, j.Tag,
                             j.VType, j.Value);
            }

            writer.WriteLine();
        }
예제 #7
0
 /// <summary>
 /// Creates new instance of SAMAlignedSequence with specified SAMAlignedSequenceHeader.
 /// </summary>
 /// <param name="seqHeader"></param>
 public SAMAlignedSequence(SAMAlignedSequenceHeader seqHeader)
 {
     this.seqHeader = seqHeader;
     metadata       = new Dictionary <string, object>(StringComparer.OrdinalIgnoreCase);
     metadata.Add(Helper.SAMAlignedSequenceHeaderKey, seqHeader);
 }
예제 #8
0
 /// <summary>
 /// Creates new instance of SAMAlignedSequence with specified SAMAlignedSequenceHeader.
 /// </summary>
 /// <param name="seqHeader"></param>
 public SAMAlignedSequence(SAMAlignedSequenceHeader seqHeader)
 {
     this.seqHeader = seqHeader;
     metadata = new Dictionary<string, object>(StringComparer.OrdinalIgnoreCase);
     metadata.Add(Helper.SAMAlignedSequenceHeaderKey, seqHeader);
 }
예제 #9
0
        /// <summary>
        /// Writes SAMAlignedSequence to specified text writer.
        /// </summary>
        /// <param name="alignedSequence">SAM aligned sequence to write</param>
        /// <param name="writer">Text writer.</param>
        public static void WriteSAMAlignedSequence(IAlignedSequence alignedSequence, TextWriter writer)
        {
            if (writer == null)
            {
                throw new ArgumentNullException("writer");
            }

            if (alignedSequence == null)
            {
                throw new ArgumentNullException("alignedSequence");
            }

            SAMAlignedSequenceHeader alignedHeader = alignedSequence.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader;

            if (alignedHeader == null)
            {
                throw new ArgumentException(Properties.Resource.SAM_AlignedSequenceHeaderMissing);
            }

            ISequence sequence = alignedSequence.Sequences[0];

            if (sequence.Alphabet != Alphabets.DNA)
            {
                throw new ArgumentException(Properties.Resource.SAMFormatterSupportsDNAOnly);
            }


            List <int> dotSymbolIndices   = new List <int>(alignedHeader.DotSymbolIndices);
            List <int> equalSymbolIndices = new List <int>(alignedHeader.EqualSymbolIndices);
            string     seq = "*";

            if (sequence.Count > 0)
            {
                char[] symbols = new char[sequence.Count];
                for (int i = 0; i < sequence.Count; i++)
                {
                    char symbol = (char)sequence[i];

                    if (dotSymbolIndices.Count > 0)
                    {
                        if (dotSymbolIndices.Contains(i))
                        {
                            symbol = '.';
                            dotSymbolIndices.Remove(i);
                        }
                    }

                    if (equalSymbolIndices.Count > 0)
                    {
                        if (equalSymbolIndices.Contains(i))
                        {
                            symbol = '=';
                            equalSymbolIndices.Remove(i);
                        }
                    }

                    symbols[i] = symbol;
                }

                seq = new string(symbols);
            }

            string qualValues = "*";

            QualitativeSequence qualSeq = sequence as QualitativeSequence;

            if (qualSeq != null)
            {
                byte[] bytes = qualSeq.QualityScores.ToArray();
                qualValues = System.Text.ASCIIEncoding.ASCII.GetString(bytes);
            }


            writer.Write(AlignedSequenceFormat,
                         alignedHeader.QName, (int)alignedHeader.Flag, alignedHeader.RName,
                         alignedHeader.Pos, alignedHeader.MapQ, alignedHeader.CIGAR,
                         alignedHeader.MRNM.Equals(alignedHeader.RName) ? "=" : alignedHeader.MRNM,
                         alignedHeader.MPos, alignedHeader.ISize, seq, qualValues);

            for (int j = 0; j < alignedHeader.OptionalFields.Count; j++)
            {
                writer.Write(OptionalFieldFormat, alignedHeader.OptionalFields[j].Tag,
                             alignedHeader.OptionalFields[j].VType, alignedHeader.OptionalFields[j].Value);
            }

            writer.WriteLine();
        }
예제 #10
0
        /// <summary>
        /// Writes an ISequenceAlignment to the location specified by the writer.
        /// </summary>
        /// <param name="sequenceAlignment">The sequence alignment to format.</param>
        /// <param name="writer">The TextWriter used to write the formatted sequence alignment text.</param>
        public void Format(ISequenceAlignment sequenceAlignment, TextWriter writer)
        {
            string message = string.Empty;

            if (sequenceAlignment == null)
            {
                throw new ArgumentNullException(Resource.ParameterNameSequenceAlignment);
            }

            if (writer == null)
            {
                throw new ArgumentNullException(Resource.ParameterNameWriter);
            }

            #region Write alignment header
            SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader;
            if (header != null)
            {
                WriteHeader(header, writer);
            }

            #endregion

            #region Write aligned sequences
            foreach (IAlignedSequence alignedSequence in sequenceAlignment.AlignedSequences)
            {
                SAMAlignedSequenceHeader alignedHeader = alignedSequence.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader;
                if (alignedHeader == null)
                {
                    throw new ArgumentException(Resource.SAM_AlignedSequenceHeaderMissing);
                }

                StringBuilder alignmentLine = new StringBuilder();
                message = alignedHeader.IsValid();
                if (!string.IsNullOrEmpty(message))
                {
                    throw new ArgumentException(message);
                }

                alignmentLine.Append(alignedHeader.QName);
                alignmentLine.Append("\t");
                alignmentLine.Append((int)alignedHeader.Flag);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.RName);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.Pos);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.MapQ);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.CIGAR);
                alignmentLine.Append("\t");

                if (string.Compare(alignedHeader.MRNM, alignedHeader.RName, StringComparison.InvariantCultureIgnoreCase) == 0)
                {
                    alignmentLine.Append("=");
                }
                else
                {
                    alignmentLine.Append(alignedHeader.MRNM);
                }

                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.MPos);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.ISize);
                alignmentLine.Append("\t");
                writer.Write(alignmentLine.ToString());
                List <int> dotSymbolIndices   = new List <int>(alignedHeader.DotSymbolIndices);
                List <int> equalSymbolIndices = new List <int>(alignedHeader.EqualSymbolIndices);

                if (alignedSequence.Sequences.Count > 0 && alignedSequence.Sequences[0] != null)
                {
                    ISequence seq = alignedSequence.Sequences[0];

                    if (seq.Alphabet != Alphabets.DNA)
                    {
                        throw new ArgumentException(Resource.SAMFormatterSupportsDNAOnly);
                    }

                    for (int i = 0; i < seq.Count; i++)
                    {
                        char symbol = seq[i].Symbol;

                        if (dotSymbolIndices.Count > 0)
                        {
                            if (dotSymbolIndices.Contains(i))
                            {
                                symbol = '.';
                                dotSymbolIndices.Remove(i);
                            }
                        }

                        if (equalSymbolIndices.Count > 0)
                        {
                            if (equalSymbolIndices.Contains(i))
                            {
                                symbol = '=';
                                equalSymbolIndices.Remove(i);
                            }
                        }

                        writer.Write(symbol);
                    }

                    writer.Write("\t");

                    IQualitativeSequence qualSeq = seq as IQualitativeSequence;
                    if (qualSeq != null)
                    {
                        writer.Write(ASCIIEncoding.ASCII.GetString(qualSeq.Scores));
                    }
                    else
                    {
                        writer.Write("*");
                    }
                }
                else
                {
                    writer.Write("*");
                    writer.Write("\t");
                    writer.Write("*");
                }

                foreach (SAMOptionalField field in alignedHeader.OptionalFields)
                {
                    writer.Write("\t");
                    writer.Write(field.Tag);
                    writer.Write(":");
                    writer.Write(field.VType);
                    writer.Write(":");
                    writer.Write(field.Value);
                }

                writer.WriteLine();
            }
            #endregion

            writer.Flush();
        }