Пример #1
0
        /// <summary>
        /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object.
        /// </summary>
        /// <param name="mbfReader">A reader for a biological sequence alignment text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether sequences in the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property 
        /// will be set to true, otherwise it will be set to false.</param>
        /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns>
        protected SequenceAlignmentMap ParseOneWithSpecificFormat(MBFTextReader mbfReader, bool isReadOnly)
        {
            _isReadOnly = isReadOnly;

            if (mbfReader == null)
            {
                throw new ArgumentNullException("mbfReader");
            }

            // no empty files allowed
            if (!mbfReader.HasLines)
            {
                throw new FormatException(Resource.Parser_NoTextErrorMessage);
            }

            // Parse the alignment header.
            SAMAlignmentHeader header = ParserSAMHeader(mbfReader);

            SequenceAlignmentMap sequenceAlignmentMap = null;

            sequenceAlignmentMap = new SequenceAlignmentMap(header);
            // Parse aligned sequences 
            ParseSequences(sequenceAlignmentMap, mbfReader, isReadOnly);

            return sequenceAlignmentMap;
        }
Пример #2
0
        /// <summary>
        /// Writes an ISequenceAlignment to the location specified by the writer.
        /// </summary>
        /// <param name="sequenceAlignment">The sequence alignment to format.</param>
        /// <param name="writer">The TextWriter used to write the formatted sequence alignment text.</param>
        public void Format(ISequenceAlignment sequenceAlignment, TextWriter writer)
        {
            if (sequenceAlignment == null)
            {
                throw new ArgumentNullException(Resource.ParameterNameSequenceAlignment);
            }

            if (writer == null)
            {
                throw new ArgumentNullException(Resource.ParameterNameWriter);
            }

            #region Write alignment header
            SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader;
            if (header != null)
            {
                WriteHeader(header, writer);
            }

            #endregion

            #region Write aligned sequences
            foreach (IAlignedSequence alignedSequence in sequenceAlignment.AlignedSequences)
            {
                WriteSAMAlignedSequence(alignedSequence, writer);
            }
            #endregion

            writer.Flush();
        }
Пример #3
0
        /// <summary>
        /// Creates SequenceAlignmentMap instance.
        /// </summary>
        /// <param name="header">SAM header.</param>
        public SequenceAlignmentMap(SAMAlignmentHeader header)
        {
            if (header == null)
            {
                throw new ArgumentNullException("header");
            }

            _header   = header;
            _metadata = new Dictionary <string, object>();
            _metadata.Add(Helper.SAMAlignmentHeaderKey, _header);
            _querySequences = new List <SAMAlignedSequence>();
        }
Пример #4
0
        /// <summary>
        /// Parses a sequence alignment texts from a file.
        /// </summary>
        /// <param name="fileName">file name.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in 
        /// readonly mode or not. If this flag is set to true then the resulting sequences's 
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>SequenceAlignmentMap object.</returns>
        public SequenceAlignmentMap Parse(string fileName, bool isReadOnly)
        {
            if (string.IsNullOrWhiteSpace(fileName))
            {
                throw new ArgumentNullException("fileName");
            }

            _fileName = fileName;

            // check if DV is required

            FileInfo fileInfo = new FileInfo(_fileName);

            _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes;
            if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize)
                || _isDataVirtualizationEnforced)
            {
                EnforceDataVirtualization = true;
            }

            SequenceAlignmentMap sequenceAlignmentMap = null;
            SAMAlignmentHeader header = null;

            if (IsDataVirtualizationEnabled)
            {
                VirtualAlignedSequenceList<SAMAlignedSequence> queries = null;

                using (MBFStreamReader mbfReader = new MBFStreamReader(fileName))
                {
                    header = ParserSAMHeader(mbfReader);

                    _sidecarFileProvider = new SidecarFileProvider(fileName);

                    // if a valid sidecar does not exist then recreate it
                    if (_sidecarFileProvider.SidecarFileExists && _sidecarFileProvider.IsSidecarValid == false)
                    {
                        ParseSequences(mbfReader);
                    }

                    if (_sidecarFileProvider.IsSidecarValid)
                    {
                        queries = new VirtualAlignedSequenceList<SAMAlignedSequence>(_sidecarFileProvider, this, _sidecarFileProvider.Count);
                        sequenceAlignmentMap = new SequenceAlignmentMap(header, queries);
                        return sequenceAlignmentMap;
                    }
                }
            }

            using (MBFTextReader mbfReader = new MBFTextReader(fileName))
            {
                return Parse(mbfReader, isReadOnly);
            }
        }
Пример #5
0
        /// <summary>
        /// Parses SAM alignment header from specified MBFTextReader.
        /// </summary>
        /// <param name="mbfReader">MBF text reader.</param>
        public static SAMAlignmentHeader ParseSAMHeader(MBFTextReader mbfReader)
        {
            if (mbfReader == null)
            {
                throw new ArgumentNullException("mbfReader");
            }

            _headerLength = 0;
            SAMAlignmentHeader samHeader = new SAMAlignmentHeader();

            if (mbfReader.HasLines && mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
            {
                while (mbfReader.HasLines && mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
                {
                    _headerLength += mbfReader.Line.Length;
                    string[] tokens         = mbfReader.Line.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries);
                    string   recordTypecode = tokens[0].Substring(1);
                    // Validate the header format.
                    ValidateHeaderLineFormat(mbfReader.Line);

                    SAMRecordField headerLine = null;
                    if (string.Compare(recordTypecode, "CO", StringComparison.OrdinalIgnoreCase) != 0)
                    {
                        List <string> tags = new List <string>();
                        headerLine = new SAMRecordField(recordTypecode);
                        for (int i = 1; i < tokens.Length; i++)
                        {
                            string tagToken = tokens[i];
                            string tagName  = tagToken.Substring(0, 2);
                            tags.Add(tagName);
                            headerLine.Tags.Add(new SAMRecordFieldTag(tagName, tagToken.Substring(3)));
                        }

                        samHeader.RecordFields.Add(headerLine);
                    }
                    else
                    {
                        samHeader.Comments.Add(mbfReader.Line.Substring(4));
                    }

                    mbfReader.GoToNextLine();
                }

                string message = samHeader.IsValid();
                if (!string.IsNullOrEmpty(message))
                {
                    throw new FormatException(message);
                }
            }

            return(samHeader);
        }
Пример #6
0
        /// <summary>
        /// Writes specified SAMAlignedHeader to specified text writer.
        /// </summary>
        /// <param name="header">Header to write.</param>
        /// <param name="writer">Text writer.</param>
        public static void WriteHeader(SAMAlignmentHeader header, TextWriter writer)
        {
            if (header == null)
            {
                return;
            }

            if (writer == null)
            {
                throw new ArgumentNullException("writer");
            }

            string message = header.IsValid();

            if (!string.IsNullOrEmpty(message))
            {
                throw new ArgumentException(message);
            }

            StringBuilder headerLine = null;

            for (int i = 0; i < header.RecordFields.Count; i++)
            {
                headerLine = new StringBuilder();
                headerLine.Append("@");
                headerLine.Append(header.RecordFields[i].Typecode);
                for (int j = 0; j < header.RecordFields[i].Tags.Count; j++)
                {
                    headerLine.Append("\t");
                    headerLine.Append(header.RecordFields[i].Tags[j].Tag);
                    headerLine.Append(":");
                    headerLine.Append(header.RecordFields[i].Tags[j].Value);
                }

                writer.WriteLine(headerLine.ToString());
            }

            foreach (string comment in header.Comments)
            {
                headerLine = new StringBuilder();
                headerLine.Append("@CO");
                headerLine.Append("\t");
                headerLine.Append(comment);
                writer.WriteLine(headerLine.ToString());
            }

            writer.Flush();
        }
Пример #7
0
        /// <summary>
        /// Constructor for deserialization.
        /// </summary>
        /// <param name="info">Serialization Info.</param>
        /// <param name="context">Streaming context.</param>
        protected SequenceAlignmentMap(SerializationInfo info, StreamingContext context)
        {
            if (info == null)
            {
                throw new ArgumentNullException("info");
            }

            _header   = (SAMAlignmentHeader)info.GetValue("header", typeof(SAMAlignmentHeader));
            _metadata = new Dictionary <string, object>();
            _metadata.Add(Helper.SAMAlignmentHeaderKey, _header);
            _querySequences = (IList <SAMAlignedSequence>)info.GetValue("sequences", typeof(IList <SAMAlignedSequence>));

            if (_querySequences == null)
            {
                _querySequences = new List <SAMAlignedSequence>();
            }
        }
Пример #8
0
 /// <summary>
 /// Creates SequenceAlignmentMap instance.
 /// </summary>
 /// <param name="header">SAM header.</param>
 /// <param name="querySequences">A list of virtual sequences.</param>
 public SequenceAlignmentMap(SAMAlignmentHeader header, IVirtualAlignedSequenceList <SAMAlignedSequence> querySequences) : this(header)
 {
     _querySequences = querySequences;
 }
Пример #9
0
        /// <summary>
        /// Writes an ISequenceAlignment to the location specified by the writer.
        /// </summary>
        /// <param name="sequenceAlignment">The sequence alignment to format.</param>
        /// <param name="writer">The TextWriter used to write the formatted sequence alignment text.</param>
        public void Format(ISequenceAlignment sequenceAlignment, TextWriter writer)
        {
            if (sequenceAlignment == null)
            {
                throw new ArgumentNullException(Resource.ParameterNameSequenceAlignment);
            }

            if (writer == null)
            {
                throw new ArgumentNullException(Resource.ParameterNameWriter);
            }

            #region Write alignment header
            SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader;
            if (header != null)
            {
                WriteHeader(header, writer);
            }

            #endregion

            #region Write aligned sequences
            foreach (IAlignedSequence alignedSequence in sequenceAlignment.AlignedSequences)
            {
                SAMAlignedSequenceHeader alignedHeader = alignedSequence.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader;
                if (alignedHeader == null)
                {
                    throw new ArgumentException(Resource.SAM_AlignedSequenceHeaderMissing);
                }

                StringBuilder alignmentLine = new StringBuilder();

                alignmentLine.Append(alignedHeader.QName);
                alignmentLine.Append("\t");
                alignmentLine.Append((int)alignedHeader.Flag);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.RName);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.Pos);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.MapQ);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.CIGAR);
                alignmentLine.Append("\t");

                if (string.Compare(alignedHeader.MRNM, alignedHeader.RName, StringComparison.InvariantCultureIgnoreCase) == 0)
                {
                    alignmentLine.Append("=");
                }
                else
                {
                    alignmentLine.Append(alignedHeader.MRNM);
                }

                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.MPos);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.ISize);
                alignmentLine.Append("\t");
                writer.Write(alignmentLine.ToString());
                List <int> dotSymbolIndices   = new List <int>(alignedHeader.DotSymbolIndices);
                List <int> equalSymbolIndices = new List <int>(alignedHeader.EqualSymbolIndices);

                if (alignedSequence.Sequences.Count > 0 && alignedSequence.Sequences[0] != null)
                {
                    ISequence seq = alignedSequence.Sequences[0];

                    if (seq.Alphabet != Alphabets.DNA)
                    {
                        throw new ArgumentException(Resource.SAMFormatterSupportsDNAOnly);
                    }

                    for (int i = 0; i < seq.Count; i++)
                    {
                        char symbol = seq[i].Symbol;

                        if (dotSymbolIndices.Count > 0)
                        {
                            if (dotSymbolIndices.Contains(i))
                            {
                                symbol = '.';
                                dotSymbolIndices.Remove(i);
                            }
                        }

                        if (equalSymbolIndices.Count > 0)
                        {
                            if (equalSymbolIndices.Contains(i))
                            {
                                symbol = '=';
                                equalSymbolIndices.Remove(i);
                            }
                        }

                        writer.Write(symbol);
                    }

                    writer.Write("\t");

                    IQualitativeSequence qualSeq = seq as IQualitativeSequence;
                    if (qualSeq != null)
                    {
                        writer.Write(ASCIIEncoding.ASCII.GetString(qualSeq.Scores));
                    }
                    else
                    {
                        writer.Write("*");
                    }
                }
                else
                {
                    writer.Write("*");
                    writer.Write("\t");
                    writer.Write("*");
                }

                foreach (SAMOptionalField field in alignedHeader.OptionalFields)
                {
                    writer.Write("\t");
                    writer.Write(field.Tag);
                    writer.Write(":");
                    writer.Write(field.VType);
                    writer.Write(":");
                    writer.Write(field.Value);
                }

                writer.WriteLine();
            }
            #endregion

            writer.Flush();
        }
Пример #10
0
        /// <summary>
        /// Parses a sequence alignment texts from a file.
        /// </summary>
        /// <param name="fileName">file name.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in
        /// readonly mode or not. If this flag is set to true then the resulting sequences's
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>SequenceAlignmentMap object.</returns>
        public SequenceAlignmentMap Parse(string fileName, bool isReadOnly)
        {
            if (string.IsNullOrWhiteSpace(fileName))
            {
                throw new ArgumentNullException("fileName");
            }

            _fileName = fileName;

            // check if DV is required

            FileInfo fileInfo = new FileInfo(_fileName);

            _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes;
            if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize) ||
                _isDataVirtualizationEnforced)
            {
                EnforceDataVirtualization = true;
            }

            SequenceAlignmentMap sequenceAlignmentMap = null;
            SAMAlignmentHeader   header = null;

            if (IsDataVirtualizationEnabled)
            {
                VirtualAlignedSequenceList <SAMAlignedSequence> queries = null;

                using (MBFStreamReader mbfReader = new MBFStreamReader(fileName))
                {
                    header = ParseSAMHeader(mbfReader);

                    if (header.Comments.Count == 0 && header.RecordFields.Count == 0)
                    {
                        try
                        {
                            // verify whether this is a valid SAM file by parsing a single sequence
                            ParseSequence(mbfReader.Line, true, Alphabet, Encoding, RefSequences);
                        }
                        catch (IndexOutOfRangeException)
                        {
                            throw new FileFormatException(Resource.SAM_InvalidInputFile);
                        }
                    }

                    _sidecarFileProvider = new SidecarFileProvider(fileName);

                    // if a valid sidecar does not exist then recreate it
                    if (_sidecarFileProvider.SidecarFileExists && _sidecarFileProvider.IsSidecarValid == false)
                    {
                        ParseSequences(mbfReader);
                    }

                    if (_sidecarFileProvider.IsSidecarValid)
                    {
                        queries = new VirtualAlignedSequenceList <SAMAlignedSequence>(_sidecarFileProvider, this, _sidecarFileProvider.Count);
                        sequenceAlignmentMap = new SequenceAlignmentMap(header, queries);
                        return(sequenceAlignmentMap);
                    }
                }
            }

            using (MBFTextReader mbfReader = new MBFTextReader(fileName))
            {
                return(Parse(mbfReader, isReadOnly));
            }
        }