/// <summary>
        /// Writes an ISequenceAlignment to the location specified by the writer.
        /// </summary>
        /// <param name="sequenceAlignment">The sequence alignment to format.</param>
        /// <param name="writer">The TextWriter used to write the formatted sequence alignment text.</param>
        public void Format(ISequenceAlignment sequenceAlignment, TextWriter writer)
        {
            if (sequenceAlignment == null)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameSequenceAlignment);
            }

            if (writer == null)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameWriter);
            }

            #region Write alignment header
            SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader;
            if (header != null)
            {
                WriteHeader(header, writer);
            }

            #endregion

            #region Write aligned sequences
            foreach (IAlignedSequence alignedSequence in sequenceAlignment.AlignedSequences)
            {
                WriteSAMAlignedSequence(alignedSequence, writer);
            }
            #endregion

            writer.Flush();
        }
Exemple #2
0
        /// <summary>
        /// Writes an ISequenceAlignment to the location specified by the stream.
        /// </summary>
        /// <param name="stream">The Stream used to write the formatted sequence alignment text.</param>
        /// <param name="sequenceAlignment">The sequence alignment to format.</param>
        public void Format(Stream stream, ISequenceAlignment sequenceAlignment)
        {
            if (sequenceAlignment == null)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameSequenceAlignment);
            }

            if (stream == null)
            {
                throw new ArgumentNullException("stream");
            }

            using (var writer = stream.OpenWrite())
            {
                SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader;
                if (header != null)
                {
                    WriteHeader(writer, header);
                }

                foreach (IAlignedSequence alignedSequence in sequenceAlignment.AlignedSequences)
                {
                    WriteSAMAlignedSequence(writer, alignedSequence);
                }
            }
        }
Exemple #3
0
        /// <summary>
        /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object.
        /// </summary>
        /// <param name="reader">A reader for a biological sequence alignment text.</param>
        /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns>
        protected SequenceAlignmentMap ParseOneWithSpecificFormat(TextReader reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            // Parse the header lines and store them in a string.
            // This is being done as parsing the header using the textreader is parsing an extra line.
            List <string> headerStrings = new List <string>();
            string        line          = ReadNextLine(reader);

            while (line != null && line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
            {
                headerStrings.Add(line);
                line = ReadNextLine(reader);
            }

            // Parse the alignment header strings.
            SAMAlignmentHeader   header = ParseSamHeader(headerStrings);
            SequenceAlignmentMap sequenceAlignmentMap = new SequenceAlignmentMap(header);

            // Parse aligned sequences
            while (line != null && !line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
            {
                SAMAlignedSequence alignedSeq = ParseSequence(line, Alphabet, RefSequences);
                sequenceAlignmentMap.QuerySequences.Add(alignedSeq);
                line = ReadNextLine(reader);
            }

            return(sequenceAlignmentMap);
        }
Exemple #4
0
        /// <summary>
        /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object.
        /// </summary>
        /// <param name="reader">A reader for a biological sequence alignment text.</param>
        /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns>
        protected SequenceAlignmentMap ParseOneWithSpecificFormat(TextReader reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            // Parse the header lines and store them in a string.
            // This is being done as parsing the header using the textreader is parsing an extra line.
            List <string> headerStrings = new List <string>();
            string        line          = ReadNextLine(reader);

            while (line != null && line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
            {
                headerStrings.Add(line);
                line = ReadNextLine(reader);
            }

            // Parse the alignment header strings.
            SAMAlignmentHeader   header = ParseSamHeader(headerStrings);
            SequenceAlignmentMap sequenceAlignmentMap = new SequenceAlignmentMap(header);

            List <string> refSeqNames = null;
            bool          hasSQHeader = header.ReferenceSequences.Count > 0;

            if (!hasSQHeader)
            {
                refSeqNames = new List <string>();
            }

            // Parse aligned sequences
            // If the SQ header is not present in header then get the reference sequences information from reads.
            while (line != null && !line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
            {
                SAMAlignedSequence alignedSeq = ParseSequence(line, this.Alphabet);
                if (!hasSQHeader)
                {
                    if (!alignedSeq.RName.Equals("*", StringComparison.OrdinalIgnoreCase) &&
                        !refSeqNames.Contains(alignedSeq.RName, StringComparer.OrdinalIgnoreCase))
                    {
                        refSeqNames.Add(alignedSeq.RName);
                    }
                }

                sequenceAlignmentMap.QuerySequences.Add(alignedSeq);
                line = ReadNextLine(reader);
            }

            if (!hasSQHeader)
            {
                foreach (string refname in refSeqNames)
                {
                    header.ReferenceSequences.Add(new ReferenceSequenceInfo(refname, 0));
                }
            }

            return(sequenceAlignmentMap);
        }
        /// <summary>
        /// Creates SequenceAlignmentMap instance.
        /// </summary>
        /// <param name="header">SAM header.</param>
        public SequenceAlignmentMap(SAMAlignmentHeader header)
        {
            if (header == null)
            {
                throw new ArgumentNullException("header");
            }

            this.header = header;
            Metadata = new Dictionary<string, object> { { Helper.SAMAlignmentHeaderKey, header } };
            querySequences = new List<SAMAlignedSequence>();
        }
Exemple #6
0
        /// <summary>
        /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object.
        /// </summary>
        /// <param name="reader">A reader for a biological sequence alignment text.</param>
        /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns>
        public IEnumerable <SAMAlignedSequence> ParseSequencesAsEnumerable(string fileName)
        {
            FileInfo fileInfo = new FileInfo(fileName);

            using (StreamReader reader = new StreamReader(fileName))
            {
                if (reader == null)
                {
                    throw new ArgumentNullException("reader");
                }

                // Parse the header lines and store them in a string.
                // This is being done as parsing the header using the textreader is parsing an extra line.
                List <string> headerStrings = new List <string>();
                string        line          = ReadNextLine(reader);
                while (line != null && line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
                {
                    headerStrings.Add(line);
                    line = ReadNextLine(reader);
                }

                // Parse the alignment header strings.
                SAMAlignmentHeader   header = ParseSamHeader(headerStrings);
                SequenceAlignmentMap sequenceAlignmentMap = new SequenceAlignmentMap(header);

                List <string> refSeqNames = null;
                bool          hasSQHeader = header.ReferenceSequences.Count > 0;
                if (!hasSQHeader)
                {
                    refSeqNames = new List <string>();
                }

                // Parse aligned sequences
                // If the SQ header is not present in header then get the reference sequences information from reads.
                while (line != null && !line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
                {
                    SAMAlignedSequence alignedSeq = ParseSequence(line, this.Alphabet);

                    if (!hasSQHeader)
                    {
                        if (!alignedSeq.RName.Equals("*", StringComparison.OrdinalIgnoreCase) &&
                            !refSeqNames.Contains(alignedSeq.RName, StringComparer.OrdinalIgnoreCase))
                        {
                            refSeqNames.Add(alignedSeq.RName);
                        }
                    }
                    yield return(alignedSeq);

                    //sequenceAlignmentMap.QuerySequences.Add(alignedSeq);
                    line = ReadNextLine(reader);
                }
            }
        }
        /// <summary>
        /// Creates SequenceAlignmentMap instance.
        /// </summary>
        /// <param name="header">SAM header.</param>
        public SequenceAlignmentMap(SAMAlignmentHeader header)
        {
            if (header == null)
            {
                throw new ArgumentNullException("header");
            }

            this.header = header;
            metadata    = new Dictionary <string, object>();
            metadata.Add(Helper.SAMAlignmentHeaderKey, header);
            querySequences = new List <SAMAlignedSequence>();
        }
Exemple #8
0
        /// <summary>
        /// Writes specified SAMAlignedHeader to specified text writer.
        /// </summary>
        /// <param name="writer">Text Writer</param>
        /// <param name="header">Header to write.</param>
        public static void WriteHeader(TextWriter writer, SAMAlignmentHeader header)
        {
            if (header == null)
            {
                return;
            }

            if (writer == null)
            {
                throw new ArgumentNullException("writer");
            }

            string message = header.IsValid();

            if (!string.IsNullOrEmpty(message))
            {
                throw new ArgumentException(message);
            }

            StringBuilder headerLine = null;

            foreach (SAMRecordField record in header.RecordFields)
            {
                headerLine = new StringBuilder();
                headerLine.Append("@");
                headerLine.Append(record.Typecode);
                foreach (SAMRecordFieldTag tag in record.Tags)
                {
                    headerLine.Append("\t");
                    headerLine.Append(tag.Tag);
                    headerLine.Append(":");
                    headerLine.Append(tag.Value);
                }

                writer.WriteLine(headerLine.ToString());
            }

            foreach (string comment in header.Comments)
            {
                headerLine = new StringBuilder();
                headerLine.Append("@CO");
                headerLine.Append("\t");
                headerLine.Append(comment);
                writer.WriteLine(headerLine.ToString());
            }

            writer.Flush();
        }
Exemple #9
0
        private static SAMAlignmentHeader ParseSamHeader(List <string> headerStrings)
        {
            SAMAlignmentHeader samHeader = new SAMAlignmentHeader();

            foreach (string headerString in headerStrings)
            {
                string[] tokens         = headerString.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries);
                string   recordTypecode = tokens[0].Substring(1);
                // Validate the header format.
                ValidateHeaderLineFormat(headerString);

                SAMRecordField headerLine = null;
                if (string.Compare(recordTypecode, "CO", StringComparison.OrdinalIgnoreCase) != 0)
                {
                    List <string> tags = new List <string>();
                    headerLine = new SAMRecordField(recordTypecode);
                    for (int i = 1; i < tokens.Length; i++)
                    {
                        string tagToken = tokens[i];
                        string tagName  = tagToken.Substring(0, 2);
                        tags.Add(tagName);
                        headerLine.Tags.Add(new SAMRecordFieldTag(tagName, tagToken.Substring(3)));
                    }

                    samHeader.RecordFields.Add(headerLine);
                }
                else
                {
                    samHeader.Comments.Add(headerString.Substring(4));
                }
            }

            IList <ReferenceSequenceInfo> referenceSeqsInfo = samHeader.GetReferenceSequencesInfoFromSQHeader();

            foreach (var item in referenceSeqsInfo)
            {
                samHeader.ReferenceSequences.Add(item);
            }

            string message = samHeader.IsValid();

            if (!string.IsNullOrEmpty(message))
            {
                throw new FormatException(message);
            }

            return(samHeader);
        }
        /// <summary>
        /// Writes specified SAMAlignedHeader to specified text writer.
        /// </summary>
        /// <param name="header">Header to write.</param>
        /// <param name="writer">Text writer.</param>
        public static void WriteHeader(SAMAlignmentHeader header, TextWriter writer)
        {
            if (header == null)
            {
                return;
            }

            if (writer == null)
            {
                throw new ArgumentNullException("writer");
            }

            string message = header.IsValid();

            if (!string.IsNullOrEmpty(message))
            {
                throw new ArgumentException(message);
            }

            StringBuilder headerLine = null;

            for (int i = 0; i < header.RecordFields.Count; i++)
            {
                headerLine = new StringBuilder();
                headerLine.Append("@");
                headerLine.Append(header.RecordFields[i].Typecode);
                for (int j = 0; j < header.RecordFields[i].Tags.Count; j++)
                {
                    headerLine.Append("\t");
                    headerLine.Append(header.RecordFields[i].Tags[j].Tag);
                    headerLine.Append(":");
                    headerLine.Append(header.RecordFields[i].Tags[j].Value);
                }

                writer.WriteLine(headerLine.ToString());
            }

            foreach (string comment in header.Comments)
            {
                headerLine = new StringBuilder();
                headerLine.Append("@CO");
                headerLine.Append("\t");
                headerLine.Append(comment);
                writer.WriteLine(headerLine.ToString());
            }

            writer.Flush();
        }
        /// <summary>
        /// Constructor for deserialization.
        /// </summary>
        /// <param name="info">Serialization Info.</param>
        /// <param name="context">Streaming context.</param>
        protected SequenceAlignmentMap(SerializationInfo info, StreamingContext context)
        {
            if (info == null)
            {
                throw new ArgumentNullException("info");
            }

            header   = (SAMAlignmentHeader)info.GetValue("header", typeof(SAMAlignmentHeader));
            metadata = new Dictionary <string, object>();
            metadata.Add(Helper.SAMAlignmentHeaderKey, header);
            querySequences = (IList <SAMAlignedSequence>)info.GetValue("sequences", typeof(IList <SAMAlignedSequence>));

            if (querySequences == null)
            {
                querySequences = new List <SAMAlignedSequence>();
            }
        }
Exemple #12
0
        /// <summary>
        /// Parses SAM alignment header from specified BioTextReader.
        /// </summary>
        /// <param name="bioReader">Bio text reader.</param>
        private static SAMAlignmentHeader ParserSAMHeader(BioTextReader bioReader)
        {
            SAMAlignmentHeader samHeader = new SAMAlignmentHeader();

            if (bioReader.HasLines && bioReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
            {
                while (bioReader.HasLines && bioReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
                {
                    string[] tokens         = bioReader.Line.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries);
                    string   recordTypecode = tokens[0].Substring(1);
                    // Validate the header format.
                    ValidateHeaderLineFormat(bioReader.Line);

                    SAMRecordField headerLine = null;
                    if (string.Compare(recordTypecode, "CO", StringComparison.OrdinalIgnoreCase) != 0)
                    {
                        List <string> tags = new List <string>();
                        headerLine = new SAMRecordField(recordTypecode);
                        for (int i = 1; i < tokens.Length; i++)
                        {
                            string tagToken = tokens[i];
                            string tagName  = tagToken.Substring(0, 2);
                            tags.Add(tagName);
                            headerLine.Tags.Add(new SAMRecordFieldTag(tagName, tagToken.Substring(3)));
                        }

                        samHeader.RecordFields.Add(headerLine);
                    }
                    else
                    {
                        samHeader.Comments.Add(bioReader.Line.Substring(4));
                    }

                    bioReader.GoToNextLine();
                }

                string message = samHeader.IsValid();
                if (!string.IsNullOrEmpty(message))
                {
                    throw new FormatException(message);
                }
            }

            return(samHeader);
        }
Exemple #13
0
        /// <summary>
        /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object.
        /// </summary>
        /// <param name="bioReader">A reader for a biological sequence alignment text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether sequencs in the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.</param>
        /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns>
        protected SequenceAlignmentMap ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly)
        {
            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }

            // no empty files allowed
            if (!bioReader.HasLines)
            {
                throw new FormatException(Resource.Parser_NoTextErrorMessage);
            }

            // Parse the alignment header.
            SAMAlignmentHeader header = ParserSAMHeader(bioReader);

            SequenceAlignmentMap seqAlignt = new SequenceAlignmentMap(header);

            // Parse aligned sequences
            ParseSequences(seqAlignt, bioReader, isReadOnly);
            return(seqAlignt);
        }
Exemple #14
0
        /// <summary>
        /// Parses SAM alignment header from specified text reader.
        /// </summary>
        /// <param name="reader">Text reader.</param>
        public static SAMAlignmentHeader ParseSAMHeader(TextReader reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            List <string>      headerStrings = new List <string>();
            SAMAlignmentHeader samHeader     = null;
            string             line          = ReadNextLine(reader);

            if (line != null && line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
            {
                while (line != null && line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
                {
                    headerStrings.Add(line);
                    line = ReadNextLine(reader);
                }

                samHeader = ParseSamHeader(headerStrings);
            }

            return(samHeader);
        }
Exemple #15
0
        /// <summary>
        /// Updates the header with reference name from reads in input file.
        /// </summary>
        /// <param name="header">SAM alignment header.</param>
        private void UpdateReferenceInformationFromReads(SAMAlignmentHeader header)
        {
            // If the ReferenceNamesAndLength file name is not specified and there is no @SQ header, 
            // then get the refernece names from read information.
            List<string> refSeqNames = new List<string>();
            using (StreamReader textReader = new StreamReader(InputFilePath))
            {
                foreach (SAMAlignedSequence alignedSeq in GetAlignedSequence(textReader))
                {
                    if (!alignedSeq.RName.Equals("*", StringComparison.OrdinalIgnoreCase)
                        && !refSeqNames.Contains(alignedSeq.RName, StringComparer.OrdinalIgnoreCase))
                    {
                        refSeqNames.Add(alignedSeq.RName);
                    }
                }
            }

            foreach (string refname in refSeqNames)
            {
                header.ReferenceSequences.Add(new ReferenceSequenceInfo(refname, 0));
            }
        }
Exemple #16
0
        /// <summary>
        /// Converts the input SAM to BAM file format.
        /// </summary>
        private void ConvertFromSAMTOBAM()
        {
            SAMAlignmentHeader header = null;
            try
            {
                using (var reader = new StreamReader(InputFilePath))
                    header = SAMParser.ParseSAMHeader(reader);
            }
            catch(Exception ex)
            {
                throw new InvalidOperationException(Resources.InvalidSAMFile,ex);
            }

            if (header == null)
            {
                Console.Error.WriteLine("Warning: SAM file doesn't contain header");
            }

            if (HeaderOnly)
            {
                if (header != null)
                {
                    WriteHeader(header);
                }
            }
            else
            {
                if (header == null)
                {
                    header = new SAMAlignmentHeader();
                }

                if (!string.IsNullOrEmpty(Library))
                {
                    rgRecFields = header.RecordFields.Where(R => R.Typecode.ToUpper().Equals("RG")).ToList();
                }

                if (!string.IsNullOrEmpty(ReferenceNamesAndLength))
                {
                    this.UpdateReferenceInformationFromFile(header);
                }
                else if (header.ReferenceSequences.Count == 0)
                {
                    this.UpdateReferenceInformationFromReads(header);
                }

                WriteHeader(header);
                using (StreamReader textReader = new StreamReader(InputFilePath))
                {
                    foreach (SAMAlignedSequence alignedSeq in GetAlignedSequence(textReader))
                    {
                        WriteAlignedSequence(header, alignedSeq);
                    }
                }
            }

            if (UnCompressedBAM)
            {
                bamUncompressedOutStream.Flush();
                if (writer != null)
                {
                    DisplayBAMContent(bamUncompressedOutStream);
                }
            }

            if (BAMOutput && !UnCompressedBAM)
            {
                bamUncompressedOutStream.Flush();
                bamUncompressedOutStream.Seek(0, SeekOrigin.Begin);
                bamformatter.CompressBAMFile(bamUncompressedOutStream, bamCompressedOutStream);
                bamCompressedOutStream.Flush();
                if (writer != null)
                {
                    DisplayBAMContent(bamCompressedOutStream);
                }
            }
        }
Exemple #17
0
        /// <summary>
        /// Merge multiple sorted alignments.
        /// SAMUtil.exe out.bam in1.bam in2.bam
        /// </summary>
        public void DoMerge()
        {
            if (FilePaths == null)
            {
                throw new InvalidOperationException("FilePath");
            }

            if (FilePaths.Length < 2)
            {
                throw new InvalidOperationException(Resources.MergeHelp);
            }

            IList<IList<BAMSortedIndex>> sortedIndexes = new List<IList<BAMSortedIndex>>();
            IList<SequenceAlignmentMap> sequenceAlignmentMaps = new List<SequenceAlignmentMap>();
            Parallel.For(0, FilePaths.Length, (int index) =>
            {
                IList<BAMSortedIndex> sortedIndex;
                BAMParser parser = new BAMParser(); ;
                SequenceAlignmentMap map;
                if (index == 0)
                {
                    try
                    {
                        map = parser.ParseOne<SequenceAlignmentMap>(FilePaths[0]);
                    }
                    catch
                    {
                        throw new InvalidOperationException(Resources.InvalidBAMFile);
                    }

                    if (map == null)
                    {
                        throw new InvalidOperationException(Resources.EmptyFile);
                    }

                    if (string.IsNullOrEmpty(HeaderFile) && map.Header.RecordFields.Count == 0)
                    {
                        throw new InvalidOperationException(Resources.HeaderMissing);
                    }

                    if (!string.IsNullOrEmpty(HeaderFile))
                    {
                        SAMParser parse = new SAMParser();
                        SequenceAlignmentMap head;
                        try
                        {
                            head = parse.ParseOne<SequenceAlignmentMap>(HeaderFile);
                        }
                        catch
                        {
                            throw new InvalidOperationException(Resources.IncorrectHeaderFile);   
                        }

                        if (head == null)
                        {
                            throw new InvalidOperationException(Resources.EmptyFile);
                        }

                        header = head.Header;                       
                    }
                    else
                    {
                        header = map.Header;
                    }

                    sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates);
                }
                else
                {
                    try
                    {
                        map = parser.ParseOne<SequenceAlignmentMap>(FilePaths[index]);
                    }
                    catch
                    {
                        throw new InvalidOperationException(Resources.InvalidBAMFile);
                    }
                    
                    if (map == null)
                    {
                        throw new InvalidOperationException(Resources.EmptyFile);
                    }

                    sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates);
                }

                lock (sortedIndexes)
                {
                    sortedIndexes.Add(sortedIndex);
                    sequenceAlignmentMaps.Add(map);
                }
            });

            if (string.IsNullOrEmpty(OutputFilename))
            {
                OutputFilename = "out.bam";
                autoGeneratedOutputFilename = true;
            }

            string filePath = Path.GetTempFileName();
            using (FileStream fstemp = new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite))
            {
                BAMFormatter formatter = new BAMFormatter();
                formatter.WriteHeader(header, fstemp);

                if (SortByReadName)
                {
                    IList<BAMSortedIndex> sortedIndex = sortedIndexes.Select(a => a.First()).ToList();
                    WriteMergeFileSortedByReadName(sortedIndex, fstemp, formatter, sequenceAlignmentMaps);
                }
                else
                {
                    WriteMergeFile(sortedIndexes, fstemp, formatter, sequenceAlignmentMaps);
                }

                using (FileStream fsoutput = new FileStream(OutputFilename, FileMode.Create, FileAccess.Write))
                {
                    fstemp.Seek(0, SeekOrigin.Begin);
                    formatter.CompressBAMFile(fstemp, fsoutput);
                }
            }

            File.Delete(filePath);

            if (autoGeneratedOutputFilename)
            {
                Console.WriteLine(Properties.Resources.SuccessMessageWithOutputFileName, OutputFilename);
            }
        }
Exemple #18
0
        private IEnumerable<SAMAlignedSequence> GetAlignmentWithIndexYield(BAMIndexFile bamIndexFile, int refSeqIndex, int start, int end,
            SAMAlignmentHeader header)
        {
            BAMIndex bamIndexInfo;
            BAMReferenceIndexes refIndex;
            IList<Chunk> chunks;

            bamIndexInfo = bamIndexFile.Read();

            if (refSeqIndex != -1 && bamIndexInfo.RefIndexes.Count <= refSeqIndex)
            {
                throw new ArgumentOutOfRangeException("refSeqIndex");
            }

            refIndex = bamIndexInfo.RefIndexes[refSeqIndex];

            if (start == 0 && end == int.MaxValue)
            {
                chunks = GetChunks(refIndex);
            }
            else
            {
                chunks = GetChunks(refIndex, start, end);
            }

            IList<SAMAlignedSequence> alignedSeqs = GetAlignedSequences(chunks, start, end);
            foreach (SAMAlignedSequence alignedSeq in alignedSeqs)
            {
                yield return alignedSeqs[0];
            }

            readStream = null;
        }
Exemple #19
0
        private void GetAlignmentWithoutIndex(SAMAlignmentHeader header, ref SequenceAlignmentMap seqMap)
        {
            Chunk lastChunk = null;
            ulong lastcOffset = 0;
            ushort lastuOffset = 0;
            BAMReferenceIndexes refIndices = null;

            if (createBamIndex)
            {
                bamIndex = new BAMIndex();

                for (int i = 0; i < refSeqNames.Count; i++)
                {
                    bamIndex.RefIndexes.Add(new BAMReferenceIndexes());
                }
                refIndices = bamIndex.RefIndexes[0];
            }

            if (!createBamIndex && seqMap == null)
            {
                seqMap = new SequenceAlignmentMap(header);
            }

            while (!IsEOF())
            {
                if (createBamIndex)
                {
                    lastcOffset = (ulong)currentCompressedBlockStartPos;
                    lastuOffset = (ushort)deCompressedStream.Position;
                }

                SAMAlignedSequence alignedSeq = GetAlignedSequence(0, int.MaxValue);
                alignedSeq = BamIndexing(alignedSeq, refIndices, bamIndex, lastcOffset, lastuOffset, ref lastChunk);

                if (!createBamIndex && alignedSeq != null)
                {
                    seqMap.QuerySequences.Add(alignedSeq);
                }

                alignedSeq = null;
            }

            #region BAM Indexing
            if (createBamIndex)
            {
                lastChunk.ChunkEnd.CompressedBlockOffset = (ulong)readStream.Position;

                if (deCompressedStream != null)
                {
                    lastChunk.ChunkEnd.UncompressedBlockOffset = (ushort)deCompressedStream.Position;
                }
                else
                {
                    lastChunk.ChunkEnd.UncompressedBlockOffset = 0;
                }
            }
            #endregion
        }
Exemple #20
0
 /// <summary>
 /// Writes aligned sequence to output stream.
 /// </summary>
 /// <param name="header">Alignment header.</param>
 /// <param name="alignedSequence">Aligned sequence to write.</param>
 private void WriteAlignedSequence(SAMAlignmentHeader header, SAMAlignedSequence alignedSequence)
 {
     if (UnCompressedBAM || BAMOutput)
     {
         // In case of compressed bamoutput uncompressed file will be compressed before sending it to output stream.
         bamformatter.WriteAlignedSequence(header, alignedSequence, bamUncompressedOutStream);
     }
     else
     {
         SAMFormatter.WriteSAMAlignedSequence(writer, alignedSequence);
     }
 }
Exemple #21
0
        private void GetAlignmentWithIndex(BAMIndexFile bamIndexFile, int refSeqIndex, int start, int end, 
            SAMAlignmentHeader header, ref SequenceAlignmentMap seqMap)
        {
            BAMIndex bamIndexInfo;
            BAMReferenceIndexes refIndex;
            IList<Chunk> chunks;
            seqMap = new SequenceAlignmentMap(header);

            bamIndexInfo = bamIndexFile.Read();

            if (refSeqIndex != -1 && bamIndexInfo.RefIndexes.Count <= refSeqIndex)
            {
                throw new ArgumentOutOfRangeException("refSeqIndex");
            }

            refIndex = bamIndexInfo.RefIndexes[refSeqIndex];

            if (start == 0 && end == int.MaxValue)
            {
                chunks = GetChunks(refIndex);
            }
            else
            {
                chunks = GetChunks(refIndex, start, end);
            }

            IList<SAMAlignedSequence> alignedSeqs = GetAlignedSequences(chunks, start, end);
            foreach (SAMAlignedSequence alignedSeq in alignedSeqs)
            {
                seqMap.QuerySequences.Add(alignedSeq);
            }

            readStream = null;
        }
Exemple #22
0
 // Validates alignment header.
 private static void ValidateAlignmentHeader(SAMAlignmentHeader header)
 {
     string message = header.IsValid();
     if (!string.IsNullOrEmpty(message))
     {
         throw new ArgumentException(message);
     }
 }
Exemple #23
0
        /// <summary>
        /// Writes SAMAlignedSequence to specified stream.
        /// </summary>
        /// <param name="header">Header from SAM object.</param>
        /// <param name="alignedSeq">SAMAlignedSequence object.</param>
        /// <param name="writer">Stream to write.</param>
        public void WriteAlignedSequence(SAMAlignmentHeader header, SAMAlignedSequence alignedSeq, Stream writer)
        {
            if (header == null)
            {
                throw new ArgumentNullException("header");
            }

            if (alignedSeq == null)
            {
                throw new ArgumentNullException("alignedSeq");
            }

            if (writer == null)
            {
                throw new ArgumentNullException("writer");
            }

            if (this.refSequences == null)
            {
                this.refSequences = header.GetReferenceSequenceRanges();
            }

            WriteAlignedSequence(alignedSeq, writer);
        }
Exemple #24
0
        /// <summary>
        /// Writes BAM header to the specified stream in BAM format.
        /// </summary>
        /// <param name="header">SAMAlignmentHeader object</param>
        /// <param name="writer">Stream to write.</param>
        public void WriteHeader(SAMAlignmentHeader header, Stream writer)
        {
            if (header == null)
            {
                throw new ArgumentNullException("header");
            }

            if (writer == null)
            {
                throw new ArgumentNullException("writer");
            }

            string samHeader;

            if (this.refSequences == null)
            {
                this.refSequences = header.GetReferenceSequenceRanges();
            }

            using (StringWriter strwriter = new StringWriter(CultureInfo.InvariantCulture))
            {
                SAMFormatter.WriteHeader(strwriter, header);
                samHeader = strwriter.ToString();
            }

            int samHeaderLen = samHeader.Length;
            byte[] bytes = Encoding.UTF8.GetBytes(samHeader);
            byte[] bamMagicNumber = { 66, 65, 77, 1 };

            // write BAM magic number
            writer.Write(bamMagicNumber, 0, 4);

            // Length of the header text
            writer.Write(Helper.GetLittleEndianByteArray(samHeaderLen), 0, 4);

            //Plain header text in SAM
            writer.Write(bytes, 0, bytes.Length);
            // number of reference sequences
            writer.Write(Helper.GetLittleEndianByteArray(this.refSequences.Count), 0, 4);

            foreach (SequenceRange range in this.refSequences)
            {
                int len = range.ID.Length;

                byte[] array = Encoding.UTF8.GetBytes(range.ID);
                writer.Write(Helper.GetLittleEndianByteArray(len + 1), 0, 4);
                writer.Write(array, 0, len);
                writer.WriteByte((byte)'\0');
                writer.Write(Helper.GetLittleEndianByteArray((int)range.End), 0, 4);
            }
        }
Exemple #25
0
        // Gets new header with sorted SQ Fields.
        // If SQ fields are already sorted then returns the same header.
        private SAMAlignmentHeader GetHeaderWithSortedSQFields(SAMAlignmentHeader header, bool canChangeOtherTagPos)
        {
            if (IsSortedByChromosomeNames(GetSQHeaders(header.RecordFields)))
                return header;

            SAMAlignmentHeader newHeader = new SAMAlignmentHeader();
            int i = 0;
            if (canChangeOtherTagPos)
            {
                List<SAMRecordField> sqHeaders = new List<SAMRecordField>();
                for (; i < header.RecordFields.Count; i++)
                {
                    SAMRecordField field = header.RecordFields[i];
                    if (field.Typecode.Equals("SQ"))
                    {
                        sqHeaders.Add(field);
                    }
                    else
                    {
                        newHeader.RecordFields.Add(field);
                    }

                    sqHeaders.Sort(CompareByChromosomeName);

                    foreach (SAMRecordField sqfield in sqHeaders)
                    {
                        newHeader.RecordFields.Add(sqfield);
                    }

                    foreach (string str in header.Comments)
                    {
                        newHeader.Comments.Add(str);
                    }
                }
            }
            else
            {
                 Bio.Util.SortedList<SAMRecordField, int> map = new Bio.Util.SortedList<SAMRecordField, int>(new ComparisonWrapper<SAMRecordField>(CompareByChromosomeName));

                for (; i < header.RecordFields.Count; i++)
                {
                    SAMRecordField field = header.RecordFields[i];
                    if (field.Typecode.Equals("SQ"))
                    {
                        map.Add(field, i);
                    }

                    newHeader.RecordFields.Add(field);
                }

                i = 0;
                foreach (int index in map.Values.OrderBy(I => I))
                {
                    newHeader.RecordFields[index] = map.Keys[i++];
                }

                foreach (string str in header.Comments)
                {
                    newHeader.Comments.Add(str);
                }
            }

            return newHeader;
        }
Exemple #26
0
        /// <summary>
        /// Updates the header with reference name and length from ReferenceNamesAndLength file.
        /// </summary>
        /// <param name="header">SAM alignment header.</param>
        private void UpdateReferenceInformationFromFile(SAMAlignmentHeader header)
        {
            header.ReferenceSequences.Clear();

            using (StreamReader reader = new StreamReader(ReferenceNamesAndLength))
            {
                header.ReferenceSequences.Clear();
                string read = reader.ReadLine();
                while (!string.IsNullOrEmpty(read))
                {
                    string[] splitRegion = read.Split(new string[] { "\t" }, StringSplitOptions.RemoveEmptyEntries);
                    if (splitRegion.Length > 1)
                    {
                        string name = splitRegion[0];
                        long len = long.Parse(splitRegion[1], CultureInfo.InvariantCulture);
                        header.ReferenceSequences.Add(new ReferenceSequenceInfo(name, len));
                    }
                    else
                    {
                        throw new InvalidOperationException("Invalid file for reference name and length");
                    }

                    read = reader.ReadLine();
                }
            }
        }
Exemple #27
0
        /// <summary>
        /// Writes the header to output stream
        /// </summary>
        /// <param name="header"></param>
        private void WriteHeader(SAMAlignmentHeader header)
        {
            if (!Header && !HeaderOnly)
            {
                return;
            }

            if (UnCompressedBAM || BAMOutput)
            {
                // Incase of compressed bamoutput uncompressed file will be compressed before sending it to output stream.
                bamformatter.WriteHeader(header, bamUncompressedOutStream);
            }
            else
            {
                SAMFormatter.WriteHeader(writer, header);
            }
        }
Exemple #28
0
        /// <summary>
        /// Writes specified SAMAlignedHeader to specified text writer.
        /// </summary>
        /// <param name="writer">Text Writer</param>
        /// <param name="header">Header to write.</param>
        public static void WriteHeader(TextWriter writer, SAMAlignmentHeader header)
        {
            if (header == null)
            {
                return;
            }

            if (writer == null)
            {
                throw new ArgumentNullException("writer");
            }

            string message = header.IsValid();
            if (!string.IsNullOrEmpty(message))
            {
                throw new ArgumentException(message);
            }

            StringBuilder headerLine = null;
            foreach (SAMRecordField record in header.RecordFields)
            {
                headerLine = new StringBuilder();
                headerLine.Append("@");
                headerLine.Append(record.Typecode);
                foreach (SAMRecordFieldTag tag in record.Tags)
                {
                    headerLine.Append("\t");
                    headerLine.Append(tag.Tag);
                    headerLine.Append(":");
                    headerLine.Append(tag.Value);
                }

                writer.WriteLine(headerLine.ToString());
            }

            foreach (string comment in header.Comments)
            {
                headerLine = new StringBuilder();
                headerLine.Append("@CO");
                headerLine.Append("\t");
                headerLine.Append(comment);
                writer.WriteLine(headerLine.ToString());
            }

            writer.Flush();
        }
Exemple #29
0
        private IEnumerable<SAMAlignedSequence> GetAlignmentWithoutIndexYield(SAMAlignmentHeader header)
        {
            Chunk lastChunk = null;
            ulong lastcOffset = 0;
            ushort lastuOffset = 0;
            BAMReferenceIndexes refIndices = null;

            if (createBamIndex)
            {
            bamIndex = new BAMIndex();

            for (int i = 0; i < refSeqNames.Count; i++)
            {
                bamIndex.RefIndexes.Add(new BAMReferenceIndexes());
            }
            refIndices = bamIndex.RefIndexes[0];
            }

            while (!IsEOF())
            {
            if (createBamIndex)
            {
                lastcOffset = (ulong)currentCompressedBlockStartPos;
                lastuOffset = (ushort)deCompressedStream.Position;
            }

            SAMAlignedSequence alignedSeq = GetAlignedSequence(0, int.MaxValue);
            alignedSeq = BamIndexing(alignedSeq, refIndices, bamIndex, lastcOffset, lastuOffset, ref lastChunk);

            yield return alignedSeq;
            alignedSeq = null;
            }

            #region BAM Indexing
            if (createBamIndex)
            {
            lastChunk.ChunkEnd.CompressedBlockOffset = (ulong)readStream.Position;

            if (deCompressedStream != null)
            {
                lastChunk.ChunkEnd.UncompressedBlockOffset = (ushort)deCompressedStream.Position;
            }
            else
            {
                lastChunk.ChunkEnd.UncompressedBlockOffset = 0;
            }
            }
            #endregion
        }
        /// <summary>
        /// Initialise bam output file, if required and if not already initialised
        /// </summary>
        private void InitBamOutputFiles()
        {
            if (writeToFilteredBam && bamStream == null && bamFormatter == null)
            {
                bamFormatter = new BAMFormatter();
                newHeader = new SAMAlignmentHeader();
                bamOutputQueue = new Queue<Collection<SAMAlignedSequence>>();

                // Create the output file for filtered sequences
                string file = fileName + "\\sequences.bam";
                if(File.Exists(file))
                {
                    File.Delete(file);
                }
                bamStream = File.Create(file);
            }
        }
Exemple #31
0
        public void InvalidateSAMWriteTextWriter()
        {
            SAMAlignmentHeader header = new SAMAlignmentHeader();

            try
            {
                SAMFormatter.WriteHeader(null, header);
                Assert.Fail();
            }
            catch (ArgumentNullException)
            {
                ApplicationLog.WriteLine(
                    "SAM Formatter P2 : Successfully validated the exception");
            }
        }
Exemple #32
0
        /// <summary>
        /// Parses the BAM file and returns the Header.
        /// </summary>
        private SAMAlignmentHeader GetHeader()
        {
            SAMAlignmentHeader header = new SAMAlignmentHeader();
            refSeqNames = new List<string>();
            refSeqLengths = new List<int>();

            readStream.Seek(0, SeekOrigin.Begin);
            this.deCompressedStream = null;
            byte[] array = new byte[8];
            ReadUnCompressedData(array, 0, 8);
            int l_text = Helper.GetInt32(array, 4);
            byte[] samHeaderData = new byte[l_text];
            if (l_text != 0)
            {
                ReadUnCompressedData(samHeaderData, 0, l_text);
            }

            ReadUnCompressedData(array, 0, 4);
            int noofRefSeqs = Helper.GetInt32(array, 0);

            for (int i = 0; i < noofRefSeqs; i++)
            {
                ReadUnCompressedData(array, 0, 4);
                int len = Helper.GetInt32(array, 0);
                byte[] refName = new byte[len];
                ReadUnCompressedData(refName, 0, len);
                ReadUnCompressedData(array, 0, 4);
                int refLen = Helper.GetInt32(array, 0);
                refSeqNames.Add(System.Text.ASCIIEncoding.ASCII.GetString(refName, 0, refName.Length - 1));
                refSeqLengths.Add(refLen);
            }

            if (samHeaderData.Length != 0)
            {
                string str = System.Text.ASCIIEncoding.ASCII.GetString(samHeaderData);
                using (StringReader reader = new StringReader(str))
                {
                    header = SAMParser.ParseSAMHeader(reader);
                }
            }

            header.ReferenceSequences.Clear();

            for (int i = 0; i < refSeqNames.Count; i++)
            {
                string refname = refSeqNames[i];
                int length = refSeqLengths[i];
                header.ReferenceSequences.Add(new ReferenceSequenceInfo(refname, length));
            }

            return header;
        }
Exemple #33
0
        /// <summary>
        /// Writes an ISequenceAlignment to the location specified by the writer.
        /// </summary>
        /// <param name="sequenceAlignment">The sequence alignment to format.</param>
        /// <param name="writer">The TextWriter used to write the formatted sequence alignment text.</param>
        public void Format(ISequenceAlignment sequenceAlignment, TextWriter writer)
        {
            string message = string.Empty;

            if (sequenceAlignment == null)
            {
                throw new ArgumentNullException(Resource.ParameterNameSequenceAlignment);
            }

            if (writer == null)
            {
                throw new ArgumentNullException(Resource.ParameterNameWriter);
            }

            #region Write alignment header
            SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader;
            if (header != null)
            {
                WriteHeader(header, writer);
            }

            #endregion

            #region Write aligned sequences
            foreach (IAlignedSequence alignedSequence in sequenceAlignment.AlignedSequences)
            {
                SAMAlignedSequenceHeader alignedHeader = alignedSequence.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader;
                if (alignedHeader == null)
                {
                    throw new ArgumentException(Resource.SAM_AlignedSequenceHeaderMissing);
                }

                StringBuilder alignmentLine = new StringBuilder();
                message = alignedHeader.IsValid();
                if (!string.IsNullOrEmpty(message))
                {
                    throw new ArgumentException(message);
                }

                alignmentLine.Append(alignedHeader.QName);
                alignmentLine.Append("\t");
                alignmentLine.Append((int)alignedHeader.Flag);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.RName);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.Pos);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.MapQ);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.CIGAR);
                alignmentLine.Append("\t");

                if (string.Compare(alignedHeader.MRNM, alignedHeader.RName, StringComparison.InvariantCultureIgnoreCase) == 0)
                {
                    alignmentLine.Append("=");
                }
                else
                {
                    alignmentLine.Append(alignedHeader.MRNM);
                }

                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.MPos);
                alignmentLine.Append("\t");
                alignmentLine.Append(alignedHeader.ISize);
                alignmentLine.Append("\t");
                writer.Write(alignmentLine.ToString());
                List <int> dotSymbolIndices   = new List <int>(alignedHeader.DotSymbolIndices);
                List <int> equalSymbolIndices = new List <int>(alignedHeader.EqualSymbolIndices);

                if (alignedSequence.Sequences.Count > 0 && alignedSequence.Sequences[0] != null)
                {
                    ISequence seq = alignedSequence.Sequences[0];

                    if (seq.Alphabet != Alphabets.DNA)
                    {
                        throw new ArgumentException(Resource.SAMFormatterSupportsDNAOnly);
                    }

                    for (int i = 0; i < seq.Count; i++)
                    {
                        char symbol = seq[i].Symbol;

                        if (dotSymbolIndices.Count > 0)
                        {
                            if (dotSymbolIndices.Contains(i))
                            {
                                symbol = '.';
                                dotSymbolIndices.Remove(i);
                            }
                        }

                        if (equalSymbolIndices.Count > 0)
                        {
                            if (equalSymbolIndices.Contains(i))
                            {
                                symbol = '=';
                                equalSymbolIndices.Remove(i);
                            }
                        }

                        writer.Write(symbol);
                    }

                    writer.Write("\t");

                    IQualitativeSequence qualSeq = seq as IQualitativeSequence;
                    if (qualSeq != null)
                    {
                        writer.Write(ASCIIEncoding.ASCII.GetString(qualSeq.Scores));
                    }
                    else
                    {
                        writer.Write("*");
                    }
                }
                else
                {
                    writer.Write("*");
                    writer.Write("\t");
                    writer.Write("*");
                }

                foreach (SAMOptionalField field in alignedHeader.OptionalFields)
                {
                    writer.Write("\t");
                    writer.Write(field.Tag);
                    writer.Write(":");
                    writer.Write(field.VType);
                    writer.Write(":");
                    writer.Write(field.Value);
                }

                writer.WriteLine();
            }
            #endregion

            writer.Flush();
        }
        /// <summary>
        /// Write a collection of ISequenceAlignments to a file.
        /// </summary>
        /// <param name="stream">The name of the file to write the formatted sequence alignments.</param>
        /// <param name="sequenceAlignments">The sequenceAlignments to write.</param>
        ///  <param name="Header">The sequenceAlignments to write.</param>
        public void Format(Stream stream, List <SAMAlignedSequence> sequenceAlignments, SAMAlignmentHeader Header)
        {
            if (stream == null)
            {
                throw new ArgumentNullException("stream");
            }
            if (sequenceAlignments == null)
            {
                throw new ArgumentNullException("sequenceAlignments");
            }


            using (var writer = stream.OpenWrite())
            {
                if (Header != null)
                {
                    WriteHeader(writer, Header);
                }


                foreach (IAlignedSequence alignedSequence in sequenceAlignments)
                {
                    WriteSAMAlignedSequence(writer, alignedSequence);
                }
            }
        }