/// <summary> /// Writes the SAM object to file in SAM/BAM format. /// </summary> private void PerformFormat() { if (_isSAM) { BAMFormatter format = new BAMFormatter(); try { format.Format(_sequenceAlignmentMap, OutputFilename); } catch (Exception ex) { throw new InvalidOperationException(Resources.WriteBAM + Environment.NewLine + ex.Message); } } else { SAMFormatter format = new SAMFormatter(); try { format.Format(_sequenceAlignmentMap, OutputFilename); } catch (Exception ex) { throw new InvalidOperationException(Resources.WriteSAM + Environment.NewLine + ex.Message); } } }
/// <summary> /// Public method implementing Index method of SAM tool. /// SAMUtil.exe index in.bam (output file: in.bam.bai) /// </summary> public void GenerateIndexFile() { if (string.IsNullOrEmpty(InputFilename)) { throw new InvalidOperationException(Resources.IndexHelp); } try { if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = InputFilename + Properties.Resources.BAM_INDEXFILEEXTENSION; autoGeneratedOutputFilename = true; } BAMFormatter.CreateBAMIndexFile(InputFilename, OutputFilename); if (autoGeneratedOutputFilename) { Console.WriteLine(Properties.Resources.SuccessMessageWithOutputFileName, OutputFilename); } } catch (Exception ex) { throw new InvalidOperationException(Resources.InvalidBAMFile, ex); } }
public void TestFormatterWithSort() { string inputFilePath = @"TestData\BAM\SeqAlignment.bam"; string outputFilePath1 = "output1.bam"; string outputFilePath2 = "output2.bam"; BAMParser parser = new BAMParser(); BAMFormatter formatter = new BAMFormatter(); SequenceAlignmentMap alignmentMap = parser.Parse(inputFilePath); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequences().Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); formatter.CreateSortedBAMFile = true; formatter.SortType = BAMSortByFields.ChromosomeCoordinates; formatter.Format(alignmentMap, outputFilePath1); parser.EnforceDataVirtualization = true; alignmentMap = parser.Parse(inputFilePath); formatter.Format(alignmentMap, outputFilePath2); Assert.IsTrue(File.Exists(outputFilePath1)); Assert.IsTrue(File.Exists(outputFilePath2)); Assert.AreEqual(true, FileCompare(outputFilePath1, outputFilePath2)); }
/// <summary> /// Write BAM file. /// </summary> private void DisplayBAMFile() { BAMFormatter format = new BAMFormatter(); string tempFilename = Path.GetTempFileName(); string tempFilename1 = Path.GetTempFileName(); if (HeaderOnly) { using (FileStream fstemp = new FileStream(tempFilename, FileMode.Create, FileAccess.ReadWrite)) { format.WriteHeader(_seqAlignmentMap.Header, fstemp); using (FileStream fstemp1 = new FileStream(tempFilename1, FileMode.Create, FileAccess.ReadWrite)) { format.CompressBAMFile(fstemp, fstemp1); fstemp1.Seek(0, SeekOrigin.Begin); byte[] bytes = new byte[fstemp1.Length]; fstemp1.Read(bytes, 0, (int)fstemp1.Length); string str = System.Text.ASCIIEncoding.ASCII.GetString(bytes); _write.Write(str); _write.Flush(); } } File.Delete(tempFilename1); File.Delete(tempFilename); } else { using (FileStream fstemp = new FileStream(tempFilename, FileMode.Create, FileAccess.ReadWrite)) { if (Header) { format.WriteHeader(_seqAlignmentMap.Header, fstemp); } foreach (SAMAlignedSequence alignedSequence in _seqAlignmentMap.QuerySequences) { if (Filter(alignedSequence)) { format.WriteAlignedSequence(_seqAlignmentMap.Header, alignedSequence, fstemp); } } using (FileStream fstemp1 = new FileStream(tempFilename1, FileMode.Create, FileAccess.ReadWrite)) { fstemp.Seek(0, SeekOrigin.Begin); format.CompressBAMFile(fstemp, fstemp1); fstemp1.Seek(0, SeekOrigin.Begin); byte[] bytes = new byte[fstemp1.Length]; fstemp1.Read(bytes, 0, (int)fstemp1.Length); string str = System.Text.ASCIIEncoding.ASCII.GetString(bytes); _write.Write(str); _write.Flush(); } } File.Delete(tempFilename); File.Delete(tempFilename1); } }
/// <summary> /// Writes the SAM object to file in SAM/BAM format. /// </summary> private void PerformFormat() { if (_isSAM) { BAMFormatter format = new BAMFormatter(); try { format.Format(_sequenceAlignmentMap, _outputFile); } catch { throw new InvalidOperationException(Resources.WriteBAM); } } else { SAMFormatter format = new SAMFormatter(); try { format.Format(_sequenceAlignmentMap, _outputFile); } catch { throw new InvalidOperationException(Resources.WriteSAM); } } }
public void TestFormatter() { string filePath = @"TestUtils\BAM\SeqAlignment.bam"; string outputfilePath = "BAMTests123.bam"; BAMParser parser = null; try { parser = new BAMParser(); BAMFormatter formatter = new BAMFormatter(); SequenceAlignmentMap alignmentMap = parser.Parse(filePath); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); formatter.Format(alignmentMap, outputfilePath); formatter.CreateSortedBAMFile = true; formatter.CreateIndexFile = true; alignmentMap = parser.Parse(filePath); formatter.Format(alignmentMap, outputfilePath); Assert.IsTrue(File.Exists("BAMTests123.bam.bai")); alignmentMap = parser.Parse(outputfilePath); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, int.MaxValue)); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, 28833)); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 1); } finally { if (parser != null) { parser.Dispose(); } } }
/// <summary> /// Writes Uncompressed BAM file. /// </summary> private void DisplayUncompressedBAM() { BAMFormatter format = new BAMFormatter(); if (HeaderOnly) { using (MemoryStream mstemp = new MemoryStream()) { format.WriteHeader(_seqAlignmentMap.Header, mstemp); mstemp.Seek(0, SeekOrigin.Begin); byte[] bytes = new byte[mstemp.Length]; mstemp.Read(bytes, 0, (int)mstemp.Length); string str = System.Text.ASCIIEncoding.ASCII.GetString(bytes); _write.Write(str); _write.Flush(); } } else { using (MemoryStream mstemp = new MemoryStream()) { long length; if (Header) { format.WriteHeader(_seqAlignmentMap.Header, mstemp); mstemp.Seek(0, SeekOrigin.Begin); byte[] bytes = new byte[mstemp.Length]; mstemp.Read(bytes, 0, (int)mstemp.Length); mstemp.Seek(0, SeekOrigin.Begin); string str = System.Text.ASCIIEncoding.ASCII.GetString(bytes); _write.Write(str); _write.Flush(); } foreach (SAMAlignedSequence alignedSequence in _seqAlignmentMap.QuerySequences) { if (Filter(alignedSequence)) { format.WriteAlignedSequence(_seqAlignmentMap.Header, alignedSequence, mstemp); length = mstemp.Position; mstemp.Seek(0, SeekOrigin.Begin); byte[] bytes = new byte[length]; mstemp.Read(bytes, 0, (int)length); mstemp.Seek(0, SeekOrigin.Begin); string str = System.Text.ASCIIEncoding.ASCII.GetString(bytes); _write.Write(str); _write.Flush(); } } } } }
public void ValidateSAMToBAMConversionWithDVEnabled() { // Get values from xml config file. string expectedBamFilePath = _utilityObj._xmlUtil.GetTextValue( Constants.BAMToSAMConversionNode, Constants.FilePathNode); string samFilePath = _utilityObj._xmlUtil.GetTextValue( Constants.BAMToSAMConversionNode, Constants.FilePathNode1); using (BAMParser bamParserObj = new BAMParser()) { using (SAMParser samParserObj = new SAMParser()) { BAMFormatter bamFormatterObj = new BAMFormatter(); SequenceAlignmentMap samSeqAlignment = null; SequenceAlignmentMap bamSeqAlignment = null; // Enforce DV samParserObj.EnforceDataVirtualization = true; // Parse expected BAM file. SequenceAlignmentMap expextedBamAlignmentObj = bamParserObj.Parse( expectedBamFilePath); // Parse a SAM file. samSeqAlignment = samParserObj.Parse(samFilePath); // Format SAM sequenceAlignment object to BAM file. bamFormatterObj.Format(samSeqAlignment, Constants.BAMTempFileName); // Parse a formatted BAM file. bamSeqAlignment = bamParserObj.Parse(Constants.BAMTempFileName); // Validate converted BAM file with expected BAM file. Assert.IsTrue(CompareSequencedAlignmentHeader(bamSeqAlignment, expextedBamAlignmentObj)); // Validate BAM file aligned sequences. Assert.IsTrue(CompareAlignedSequences(bamSeqAlignment, expextedBamAlignmentObj)); // Log message to NUnit GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SAM Parser BVT : Validated the SAM->BAM conversion successfully")); Console.WriteLine(string.Format((IFormatProvider)null, "SAM Parser BVT : Validated the SAM->BAM conversion successfully")); // Delete temporary file. File.Delete(Constants.BAMTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } } }
public void BAMProperties() { BAMParser parser = new BAMParser(); Assert.AreEqual(parser.Name, Properties.Resource.BAM_NAME); Assert.AreEqual(parser.Description, Properties.Resource.BAMPARSER_DESCRIPTION); Assert.AreEqual(parser.FileTypes, Properties.Resource.BAM_FILEEXTENSION); BAMFormatter formatter = new BAMFormatter(); Assert.AreEqual(formatter.Name, Properties.Resource.BAM_NAME); Assert.AreEqual(formatter.Description, Properties.Resource.BAMFORMATTER_DESCRIPTION); Assert.AreEqual(formatter.FileTypes, Properties.Resource.BAM_FILEEXTENSION); }
public void TestFormatter() { string filePath = @"TestUtils\BAM\SeqAlignment.bam".TestDir(); const string outputfilePath = "BAMTests123.bam"; string outputIndexFile = outputfilePath + ".bai"; BAMParser parser = new BAMParser(); SequenceAlignmentMap alignmentMap = parser.ParseOne <SequenceAlignmentMap>(filePath); Assert.IsNotNull(alignmentMap); Assert.IsNotNull(alignmentMap.Header); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.IsNotNull(alignmentMap.QuerySequences); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); BAMFormatter formatter = new BAMFormatter(); formatter.Format(alignmentMap, outputfilePath); formatter.CreateSortedBAMFile = true; formatter.CreateIndexFile = true; formatter.Format(alignmentMap, outputfilePath); Assert.IsTrue(File.Exists("BAMTests123.bam.bai")); alignmentMap = parser.ParseOne <SequenceAlignmentMap>(outputfilePath); Assert.IsNotNull(alignmentMap); Assert.IsNotNull(alignmentMap.Header); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, int.MaxValue)); Assert.IsNotNull(alignmentMap); Assert.IsNotNull(alignmentMap.Header); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, 28833)); Assert.IsNotNull(alignmentMap); Assert.IsNotNull(alignmentMap.Header); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 1); File.Delete(outputfilePath); File.Delete(outputIndexFile); }
/// <summary> /// Public method implementing Index method of SAM tool. /// SAMUtil.exe index in.bam (output file: in.bam.bai) /// </summary> public void GenerateIndexFile() { if (FilePath == null) { throw new InvalidOperationException("FilePath"); } switch (FilePath.Length) { case 1: { try { BAMFormatter.CreateBAMIndexFile(FilePath[0]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } break; } case 2: { try { BAMFormatter.CreateBAMIndexFile(FilePath[0], FilePath[1]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } break; } default: { throw new InvalidOperationException(Resources.IndexHelp); } } }
/// <summary> /// Public method to sort BAM file. /// SAMUtil.exe in.bam out.bam /// </summary> public void DoSort() { string sortExtension = ".sort"; if (string.IsNullOrEmpty(InputFilename)) { throw new InvalidOperationException(Resources.SortHelp); } BAMParser parse = new BAMParser(); SequenceAlignmentMap map = null; try { map = parse.ParseOne <SequenceAlignmentMap>(InputFilename); } catch (Exception ex) { throw new InvalidOperationException(Resources.InvalidBAMFile, ex); } BAMFormatter format = new BAMFormatter { CreateSortedBAMFile = true, SortType = this.SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates }; if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = InputFilename + sortExtension; autoGeneratedOutputFilename = true; } format.Format(map, OutputFilename); if (autoGeneratedOutputFilename) { Console.WriteLine(Resources.SuccessMessageWithOutputFileName, OutputFilename); } }
public void TestFormatterWithSort() { string inputFilePath = @"TestUtils\BAM\SeqAlignment.bam".TestDir(); string outputFilePath1 = "output1.bam"; string outputFilePath2 = "output2.bam"; BAMParser parser = null; try { parser = new BAMParser(); BAMFormatter formatter = new BAMFormatter(); SequenceAlignmentMap alignmentMap = parser.ParseOne <SequenceAlignmentMap>(inputFilePath); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); formatter.CreateSortedBAMFile = true; formatter.SortType = BAMSortByFields.ChromosomeCoordinates; formatter.Format(alignmentMap, outputFilePath1); alignmentMap = parser.ParseOne <SequenceAlignmentMap>(inputFilePath); formatter.Format(alignmentMap, outputFilePath2); Assert.IsTrue(File.Exists(outputFilePath1)); Assert.IsTrue(File.Exists(outputFilePath2)); Assert.AreEqual(true, FileCompare(outputFilePath1, outputFilePath2)); } finally { if (parser != null) { parser.Dispose(); } File.Delete(outputFilePath1); File.Delete(outputFilePath2); } }
/// <summary> /// Returns sequence alignment formatter which supports the specified file. /// </summary> /// <param name="fileName">File name for which the formatter is required.</param> /// <returns>If found returns the formatter as ISequenceAlignmentFormatter else returns null.</returns> public static ISequenceAlignmentFormatter FindFormatterByFile(string fileName) { ISequenceAlignmentFormatter formatter = null; if (!string.IsNullOrEmpty(fileName)) { if (Helper.IsSAM(fileName)) { formatter = new SAMFormatter(); } else if (Helper.IsBAM(fileName)) { formatter = new BAMFormatter(); } else { formatter = null; } } return(formatter); }
public void TestFormatter() { string filePath = @"TestData\BAM\SeqAlignment.bam"; string outputfilePath = "bamtest1.bam"; BAMParser parser = new BAMParser(); BAMFormatter formatter = new BAMFormatter(); SequenceAlignmentMap alignmentMap = parser.Parse(filePath); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequences().Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); formatter.Format(alignmentMap, outputfilePath); formatter.CreateSortedBAMFile = true; formatter.CreateIndexFile = true; alignmentMap = parser.Parse(filePath); formatter.Format(alignmentMap, "bamtest.bam"); Assert.IsTrue(File.Exists("bamtest.bam.bai")); alignmentMap = parser.Parse(outputfilePath); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequences().Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); alignmentMap = parser.ParseRange("bamtest.bam", new SequenceRange("chr20", 0, int.MaxValue)); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequences().Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); alignmentMap = parser.ParseRange("bamtest.bam", new SequenceRange("chr20", 0, 28833)); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequences().Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 1); }
public void ValidateSeqFormatterProperties() { // Gets the expected sequence from the Xml string samFormatterName = _utilityObj._xmlUtil.GetTextValue(Constants.SamFileParserNode, Constants.ParserNameNode); string bamFormatterName = _utilityObj._xmlUtil.GetTextValue(Constants.BamFileParserNode, Constants.ParserNameNode); // Get SequenceAlignmentFormatter class properties. SAMFormatter actualSamFormatter = SequenceAlignmentFormatters.SAM; IList <ISequenceAlignmentFormatter> allFormatters = SequenceAlignmentFormatters.All; BAMFormatter actualBamFormatterName = SequenceAlignmentFormatters.BAM; // Validate Sequence Formatter Assert.AreEqual(samFormatterName, actualSamFormatter.Name); Assert.IsNotNull(allFormatters); Assert.AreEqual(bamFormatterName, actualBamFormatterName.Name); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAlignmentFormatter : Type of the parser is validated successfully")); ApplicationLog.WriteLine("Type of the parser is validated successfully"); }
/// <summary> /// Public method to sort BAM file. /// SAMUtil.exe in.bam out.bam /// </summary> public void DoSort() { string sortExtension = ".sort"; if (FilePaths == null) { throw new InvalidOperationException("FilePaths"); } if (FilePaths.Length < 1) { throw new InvalidOperationException(Resources.SortHelp); } BAMParser parse = new BAMParser(); SequenceAlignmentMap map = null; try { map = parse.Parse(FilePaths[0]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } BAMFormatter format = new BAMFormatter(); format.CreateSortedBAMFile = true; format.SortType = SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates; if (FilePaths.Length > 1) { format.Format(map, FilePaths[1]); } else { format.Format(map, FilePaths[0] + sortExtension); } }
/// <summary> /// Displays pending data and closes all streams. /// /// </summary> private void Close() { if (_writer != null) { _writer.Close(); } if (_bamCompressedOutStream != null) { _bamCompressedOutStream.Close(); _bamCompressedOutStream = null; } if (_bamUncompressedOutStream != null) { _bamUncompressedOutStream.Close(); _bamUncompressedOutStream = null; } if (string.IsNullOrEmpty(_uncompressedTempfile) && File.Exists(_uncompressedTempfile)) { File.Delete(_uncompressedTempfile); } if (string.IsNullOrEmpty(_compressedTempfile) && File.Exists(_compressedTempfile)) { File.Delete(_compressedTempfile); } _bamformatter = null; if (_bamparser != null) { _bamparser.Dispose(); _bamparser = null; } }
/// <summary> /// Displays pending data and closes all streams. /// /// </summary> private void Close() { if (writer != null) { writer.Close(); } if (bamCompressedOutStream != null) { bamCompressedOutStream.Close(); bamCompressedOutStream = null; } if (bamUncompressedOutStream != null) { bamUncompressedOutStream.Close(); bamUncompressedOutStream = null; } if (string.IsNullOrEmpty(uncompressedTempfile) && File.Exists(uncompressedTempfile)) { File.Delete(uncompressedTempfile); } if (string.IsNullOrEmpty(compressedTempfile) && File.Exists(compressedTempfile)) { File.Delete(compressedTempfile); } bamformatter = null; if (bamparser != null) { bamparser.Dispose(); bamparser = null; } }
/// <summary> /// Initializes required parsers, formatters, input and output files based on user option. /// </summary> private void Initialize() { bamparser = new BAMParser(); bamformatter = new BAMFormatter(); bamUncompressedOutStream = null; bamCompressedOutStream = null; if (string.IsNullOrEmpty(OutputFilePath)) { writer = Console.Out; } else { if (UnCompressedBAM || BAMOutput) { writer = null; if (UnCompressedBAM) { bamUncompressedOutStream = new FileStream(OutputFilePath, FileMode.Create, FileAccess.ReadWrite); } else { bamCompressedOutStream = new FileStream(OutputFilePath, FileMode.Create, FileAccess.ReadWrite); } } else { writer = new StreamWriter(OutputFilePath); } } #region Intialize temp files long inputfileSize = (new FileInfo(InputFilePath)).Length; long unCompressedSize = inputfileSize; if (!SAMInput) { unCompressedSize = inputfileSize * 4; // as uncompressed bam file will be Aprox 4 times that of the compressed file. } long compressedSize = unCompressedSize / 4; // uncompressed file is required for both uncompressed and compressed outputs. if ((UnCompressedBAM || BAMOutput) && bamUncompressedOutStream == null) { if (HeaderOnly || (MemStreamLimit >= unCompressedSize)) { bamUncompressedOutStream = new MemoryStream(); } else { uncompressedTempfile = Path.GetTempFileName(); bamUncompressedOutStream = new FileStream(uncompressedTempfile, FileMode.Open, FileAccess.ReadWrite); } } if (BAMOutput && !UnCompressedBAM && bamCompressedOutStream == null) { if (HeaderOnly || (MemStreamLimit >= compressedSize)) { bamCompressedOutStream = new MemoryStream((int)(inputfileSize)); } else { compressedTempfile = Path.GetTempFileName(); bamCompressedOutStream = new FileStream(compressedTempfile, FileMode.Open, FileAccess.ReadWrite); } } #endregion Intialize temp files }
/// <summary> /// Sort and merge multiple SAM objects /// </summary> /// <param name="sortedIndexes">Sorted Indexes of SAM object.</param> /// <param name="fstemp">Temporary tream to write alignments.</param> /// <param name="formatter">Format aligned sequences in BAM format.</param> /// <param name="sequenceAlignmentMaps">List of SAM objects to be merged.</param> private void WriteMergeFile(IList <IList <BAMSortedIndex> > sortedIndexes, FileStream fstemp, BAMFormatter formatter, IList <SequenceAlignmentMap> sequenceAlignmentMaps) { List <SAMAlignedSequence> alignedSeqs = new List <SAMAlignedSequence>(); int[] sortedIndex = new int[sequenceAlignmentMaps.Count]; for (int i = 0; i < sortedIndexes.Count; i++) { BAMSortedIndex bamSortedIndex = sortedIndexes[i].ElementAt(sortedIndex[i]); if (bamSortedIndex != null) { if (bamSortedIndex.MoveNext()) { alignedSeqs.Add(sequenceAlignmentMaps[i].QuerySequences[bamSortedIndex.Current]); } else { alignedSeqs.Add(null); } } else { alignedSeqs.Add(null); } } int smallestIndex = -1; do { for (int index = 0; index < alignedSeqs.Count; index++) { if (alignedSeqs[index] != null) { if (smallestIndex == -1) { smallestIndex = index; } else { if (0 < string.Compare(alignedSeqs[smallestIndex].RName, alignedSeqs[index].RName, StringComparison.OrdinalIgnoreCase)) { smallestIndex = index; } else if (alignedSeqs[smallestIndex].RName.Equals(alignedSeqs[index].RName)) { if (alignedSeqs[smallestIndex].Pos > alignedSeqs[index].Pos) { smallestIndex = index; } } } } } if (smallestIndex > -1) { SAMAlignedSequence alignSeqTowrite = alignedSeqs[smallestIndex]; if (sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).MoveNext()) { int nextIndex = sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).Current; alignedSeqs[smallestIndex] = sequenceAlignmentMaps[smallestIndex].QuerySequences[nextIndex]; } else { sortedIndex[smallestIndex]++; if (sortedIndex[smallestIndex] < sortedIndexes[smallestIndex].Count && sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).MoveNext()) { int nextIndex = sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).Current; alignedSeqs[smallestIndex] = sequenceAlignmentMaps[smallestIndex].QuerySequences[nextIndex]; } else { alignedSeqs[smallestIndex] = null; smallestIndex = -1; } } formatter.WriteAlignedSequence(_header, alignSeqTowrite, fstemp); } } while (!alignedSeqs.All(a => a == null)); }
/// <summary> /// Merge multiple sorted alignments. /// SAMUtil.exe out.bam in1.bam in2.bam /// </summary> public void DoMerge() { if (FilePaths == null) { throw new InvalidOperationException("FilePath"); } if (FilePaths.Length < 3) { throw new InvalidOperationException(Resources.MergeHelp); } IList <IList <BAMSortedIndex> > sortedIndexes = new List <IList <BAMSortedIndex> >(); IList <SequenceAlignmentMap> sequenceAlignmentMaps = new List <SequenceAlignmentMap>(); IList <int> help = new List <int>(); Parallel.For(1, FilePaths.Length, (int index) => { IList <BAMSortedIndex> sortedIndex; BAMParser parser = new BAMParser();; SequenceAlignmentMap map; if (index == 1) { try { map = parser.Parse(FilePaths[1]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } if (map == null) { throw new InvalidOperationException(Resources.EmptyFile); } if (string.IsNullOrEmpty(HeaderFile) && map.Header.RecordFields.Count == 0) { throw new InvalidOperationException(Resources.HeaderMissing); } if (!string.IsNullOrEmpty(HeaderFile)) { SAMParser parse = new SAMParser(); SequenceAlignmentMap head; try { head = parse.Parse(HeaderFile); } catch { throw new InvalidOperationException(Resources.IncorrectHeaderFile); } if (head == null) { throw new InvalidOperationException(Resources.EmptyFile); } _header = head.Header; } else { _header = map.Header; } sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates); } else { try { map = parser.Parse(FilePaths[index]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } if (map == null) { throw new InvalidOperationException(Resources.EmptyFile); } sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates); } lock (sortedIndexes) { sortedIndexes.Add(sortedIndex); sequenceAlignmentMaps.Add(map); } }); string filePath = Path.GetTempFileName(); using (FileStream fstemp = new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite)) { BAMFormatter formatter = new BAMFormatter(); formatter.WriteHeader(_header, fstemp); int[] indexes = new int[sortedIndexes.Count]; if (SortByReadName) { IList <BAMSortedIndex> sortedIndex = sortedIndexes.Select(a => a.First()).ToList(); WriteMergeFileSortedByReadName(sortedIndex, fstemp, formatter, sequenceAlignmentMaps); } else { WriteMergeFile(sortedIndexes, fstemp, formatter, sequenceAlignmentMaps); } using (FileStream fsoutput = new FileStream(FilePaths[0], FileMode.Create, FileAccess.Write)) { fstemp.Seek(0, SeekOrigin.Begin); formatter.CompressBAMFile(fstemp, fsoutput); } } File.Delete(filePath); }
/// <summary> /// Validate formatted BAM file. /// </summary> /// <param name="nodeName">Different xml nodes used for different test cases</param> /// <param name="BAMParserPam">BAM Format method parameters</param> void ValidateBAMFormatter(string nodeName, BAMParserParameters BAMParserPam) { // Get input and output values from xml node. string bamFilePath = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedAlignedSeqFilePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string alignedSeqCount = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.AlignedSeqCountNode); Stream stream = null; SequenceAlignmentMap seqAlignment = null; using (BAMParser bamParserObj = new BAMParser()) { // Parse a BAM file. seqAlignment = bamParserObj.Parse(bamFilePath); // Create a BAM formatter object. BAMFormatter formatterObj = new BAMFormatter(); // Write/Format aligned sequences to BAM file. switch (BAMParserPam) { case BAMParserParameters.StreamWriter: using (stream = new FileStream(Constants.BAMTempFileName, FileMode.Create, FileAccess.Write)) { formatterObj.Format(seqAlignment, stream); } break; case BAMParserParameters.FileName: formatterObj.Format(seqAlignment, Constants.BAMTempFileName); break; case BAMParserParameters.IndexFile: formatterObj.Format(seqAlignment, Constants.BAMTempFileName, Constants.BAMTempIndexFile); File.Exists(Constants.BAMTempIndexFile); break; default: break; } // Parse formatted BAM file and validate aligned sequences. SequenceAlignmentMap expectedSeqAlignmentMap = bamParserObj.Parse( Constants.BAMTempFileName); // Validate Parsed BAM file Header record fileds. ValidateBAMHeaderRecords(nodeName, expectedSeqAlignmentMap); IList <SAMAlignedSequence> alignedSeqs = expectedSeqAlignmentMap.QuerySequences; Assert.AreEqual(alignedSeqCount, alignedSeqs.Count.ToString((IFormatProvider)null)); // Get expected sequences using (FastaParser parserObj = new FastaParser()) { IList <ISequence> expectedSequences = parserObj.Parse(expectedAlignedSeqFilePath); // Validate aligned sequences from BAM file. for (int index = 0; index < alignedSeqs.Count; index++) { Assert.AreEqual(expectedSequences[index].ToString(), alignedSeqs[index].QuerySequence.ToString()); // Log to NUNIT GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "BAM Formatter BVT : Validated Aligned sequence :{0} successfully", alignedSeqs[index].QuerySequence.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "BAM Formatter BVT : Validated the aligned sequence :{0} successfully", alignedSeqs[index].QuerySequence.ToString())); } } } File.Delete(Constants.BAMTempFileName); File.Delete(Constants.BAMTempIndexFile); }