public void TestFormatter() { string filePath = @"TestUtils\BAM\SeqAlignment.bam".TestDir(); const string outputfilePath = "BAMTests123.bam"; string outputIndexFile = outputfilePath + ".bai"; BAMParser parser = new BAMParser(); SequenceAlignmentMap alignmentMap = parser.ParseOne <SequenceAlignmentMap>(filePath); Assert.IsNotNull(alignmentMap); Assert.IsNotNull(alignmentMap.Header); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.IsNotNull(alignmentMap.QuerySequences); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); BAMFormatter formatter = new BAMFormatter(); formatter.Format(alignmentMap, outputfilePath); formatter.CreateSortedBAMFile = true; formatter.CreateIndexFile = true; formatter.Format(alignmentMap, outputfilePath); Assert.IsTrue(File.Exists("BAMTests123.bam.bai")); alignmentMap = parser.ParseOne <SequenceAlignmentMap>(outputfilePath); Assert.IsNotNull(alignmentMap); Assert.IsNotNull(alignmentMap.Header); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, int.MaxValue)); Assert.IsNotNull(alignmentMap); Assert.IsNotNull(alignmentMap.Header); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, 28833)); Assert.IsNotNull(alignmentMap); Assert.IsNotNull(alignmentMap.Header); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 1); File.Delete(outputfilePath); File.Delete(outputIndexFile); }
public void TestFormatterWithSort() { string inputFilePath = @"TestUtils\BAM\SeqAlignment.bam".TestDir(); string outputFilePath1 = "output1.bam"; string outputFilePath2 = "output2.bam"; BAMParser parser = null; try { parser = new BAMParser(); BAMFormatter formatter = new BAMFormatter(); SequenceAlignmentMap alignmentMap = parser.ParseOne <SequenceAlignmentMap>(inputFilePath); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); formatter.CreateSortedBAMFile = true; formatter.SortType = BAMSortByFields.ChromosomeCoordinates; formatter.Format(alignmentMap, outputFilePath1); alignmentMap = parser.ParseOne <SequenceAlignmentMap>(inputFilePath); formatter.Format(alignmentMap, outputFilePath2); Assert.IsTrue(File.Exists(outputFilePath1)); Assert.IsTrue(File.Exists(outputFilePath2)); Assert.AreEqual(true, FileCompare(outputFilePath1, outputFilePath2)); } finally { if (parser != null) { parser.Dispose(); } File.Delete(outputFilePath1); File.Delete(outputFilePath2); } }
/// <summary> /// Get Chimera data /// </summary> /// <param name="filename">Path of the BAM file</param> /// <param name="mean">Mean value</param> /// <param name="deviation">Standard deviation</param> /// <returns></returns> private Matrix <string, string, string> GetChimeraData(string filename) { SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne <SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne <SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList <PairedRead> pairedReads = null; pairedReads = alignmentMapobj.GetPairedReads(200, 50); // select chimeras from reads. var chimeras = pairedReads.Where(PE => PE.PairedType == PairedReadType.Chimera); // Group chimeras based on first reads chromosomes name. var groupedChimeras = chimeras.GroupBy(PR => PR.Read1.RName); IList <string> chrs = alignmentMapobj.GetRefSequences(); // Declare sparse matrix to store statistics. SparseMatrix <string, string, string> statistics = SparseMatrix <string, string, string> .CreateEmptyInstance( chrs, chrs, "0"); // For each group create sub group depending on the second reads chromosomes. foreach (var group in groupedChimeras) { foreach (var subgroup in group.GroupBy(PE => PE.Read2.RName)) { // store the count to stats statistics[group.Key, subgroup.Key] = subgroup.Count().ToString(); } } return(statistics); }
/// <summary> /// Public method to sort BAM file. /// SAMUtil.exe in.bam out.bam /// </summary> public void DoSort() { string sortExtension = ".sort"; if (string.IsNullOrEmpty(InputFilename)) { throw new InvalidOperationException(Resources.SortHelp); } BAMParser parse = new BAMParser(); SequenceAlignmentMap map = null; try { map = parse.ParseOne <SequenceAlignmentMap>(InputFilename); } catch (Exception ex) { throw new InvalidOperationException(Resources.InvalidBAMFile, ex); } BAMFormatter format = new BAMFormatter { CreateSortedBAMFile = true, SortType = this.SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates }; if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = InputFilename + sortExtension; autoGeneratedOutputFilename = true; } format.Format(map, OutputFilename); if (autoGeneratedOutputFilename) { Console.WriteLine(Resources.SuccessMessageWithOutputFileName, OutputFilename); } }
/// <summary> /// Display Sequence Item occurences percentage /// </summary> /// <param name="inputFile">Path of the input file</param> /// <param name="possibleOccurence">True to display Nculeaotide distribution</param> public void DisplaySequenceItemOccurences(string inputFile, bool possibleOccurence) { if (string.IsNullOrEmpty(inputFile)) { throw new InvalidOperationException("Input File Not specified"); } SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne <SequenceAlignmentMap>(inputFile); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne <SequenceAlignmentMap>(inputFile); } IList <string> chromosomes = alignmentMapobj.GetRefSequences(); if (possibleOccurence) { Console.Write("Nucleotide Distribution:"); Console.Write("\r\nPosition\tA\tT\tG\tC\tPossibility Of Occurences"); foreach (string str in chromosomes) { GetCoverage(str, alignmentMapobj, "true"); } } else { Console.Write("Coverage Profile:"); Console.Write("\r\nPosition\tA\tT\tG\tC"); foreach (string str in chromosomes) { GetCoverage(str, alignmentMapobj, "false"); } } }
/// <summary> /// Parses SAM/BAm file based on input file. /// </summary> private void PerformParse() { string samExtension = ".sam"; string bamExtension = ".bam"; if (Helper.IsBAM(InputFilename)) { BAMParser parser = new BAMParser(); try { _sequenceAlignmentMap = parser.ParseOne <SequenceAlignmentMap>(InputFilename); } catch (Exception ex) { throw new InvalidOperationException(Resources.InvalidBAMFile, ex); } if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = InputFilename + samExtension; } } else { SAMParser parser = new SAMParser(); try { _sequenceAlignmentMap = parser.ParseOne <SequenceAlignmentMap>(InputFilename); } catch (Exception ex) { throw new InvalidOperationException(Resources.InvalidSAMFile, ex); } _isSAM = true; if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = InputFilename + bamExtension; } } }
public void TestParser() { string FilePath = @"TestUtils\BAM\SeqAlignment.bam".TestDir(); BAMParser parser = null; try { parser = new BAMParser(); SequenceAlignmentMap alignmentMap = parser.ParseOne <SequenceAlignmentMap>(FilePath); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); } finally { if (parser != null) { parser.Dispose(); } } }
/// <summary> /// Get chromoses with orphan regions /// </summary> /// <param name="filename">Path of the BAM file</param> /// <param name="mean">Mean value</param> /// <param name="deviation">Standard deviation</param> /// <returns></returns> private void DisplayOrphans(string filename) { SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne <SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne <SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList <PairedRead> pairedReads = null; // Get Aligned sequences IList <SAMAlignedSequence> alignedSeqs = alignmentMapobj.QuerySequences; pairedReads = alignmentMapobj.GetPairedReads(0, 0); // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); int count = orphans.Count(); if (count == 0) { Console.WriteLine("No Orphans to display"); } var orphanRegions = new List <ISequenceRange>(count); orphanRegions.AddRange(orphans.Select(orphanRead => GetRegion(orphanRead.Read1))); // Get sequence range grouping object. SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions); if (!rangeGroup.GroupIDs.Any()) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("Region of Orphan reads:"); DisplaySequenceRange(rangeGroup); } SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps(); if (!mergedRegions.GroupIDs.Any()) { Console.Write("\r\nNo hot spots to display"); } else { Console.Write("\r\nChromosomal hot spot:"); DisplaySequenceRange(mergedRegions); } }
/// <summary> /// Indentify hot spot chromosomes for length anamoly regions. /// </summary> /// <param name="inputFile"> Input file</param> /// <param name="mean">Mean value</param> /// <param name="standardDeviation">Standard deviation</param> private void IdentifyLentghAnamolies(string filename, float mean = -1, float deviation = -1) { bool calculateMeanNdeviation = false; if (mean == -1 || deviation == -1) { calculateMeanNdeviation = true; } SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne <SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne <SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList <PairedRead> pairedReads = null; if (calculateMeanNdeviation) { pairedReads = alignmentMapobj.GetPairedReads(); } else { pairedReads = alignmentMapobj.GetPairedReads(mean, deviation); } // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); if (orphans.Count() == 0) { Console.WriteLine("No Orphans to display"); } List <ISequenceRange> orphanRegions = new List <ISequenceRange>(orphans.Count()); foreach (PairedRead orphanRead in orphans) { orphanRegions.Add(GetRegion(orphanRead.Read1)); } // Get sequence range grouping for Orphan regions. SequenceRangeGrouping orphanRangegroup = new SequenceRangeGrouping(orphanRegions); // Get the Length anomalies regions. var lengthAnomalies = pairedReads.Where(PE => PE.PairedType == PairedReadType.LengthAnomaly); if (lengthAnomalies.Count() == 0) { Console.WriteLine("No Anomalies to display"); } List <ISequenceRange> lengthAnomalyRegions = new List <ISequenceRange>(lengthAnomalies.Count()); foreach (PairedRead laRead in lengthAnomalies) { SequenceRange range = new SequenceRange(); range.ID = laRead.Read1.RName; range.Start = laRead.Read1.Pos; range.End = laRead.Read1.Pos + laRead.InsertLength; lengthAnomalyRegions.Add(range); } // Get sequence range grouping for length anomaly regions. SequenceRangeGrouping lengthAnomalyRangegroup = new SequenceRangeGrouping(lengthAnomalyRegions); if (lengthAnomalyRangegroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Length anomalies reads to display"); } else { Console.Write("Region of length anomaly:"); DisplaySequenceRange(lengthAnomalyRangegroup); } if (orphanRangegroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("\r\nRegion of Orphan reads:"); DisplaySequenceRange(orphanRangegroup); } SequenceRangeGrouping intersectedRegions = lengthAnomalyRangegroup.Intersect(orphanRangegroup); if (intersectedRegions.GroupIDs.Count() == 0) { Console.Write("\r\nNo Hot spots found"); } else { Console.Write("\r\nChromosomal Hot spot of length anomaly and Orphan region:"); DisplaySequenceRange(intersectedRegions); } }
/// <summary> /// Merge multiple sorted alignments. /// SAMUtil.exe out.bam in1.bam in2.bam /// </summary> public void DoMerge() { if (FilePaths == null) { throw new InvalidOperationException("FilePath"); } if (FilePaths.Length < 2) { throw new InvalidOperationException(Resources.MergeHelp); } IList <IList <BAMSortedIndex> > sortedIndexes = new List <IList <BAMSortedIndex> >(); IList <SequenceAlignmentMap> sequenceAlignmentMaps = new List <SequenceAlignmentMap>(); IList <int> help = new List <int>(); Parallel.For(0, FilePaths.Length, (int index) => { IList <BAMSortedIndex> sortedIndex; BAMParser parser = new BAMParser();; SequenceAlignmentMap map; if (index == 0) { try { map = parser.ParseOne <SequenceAlignmentMap>(FilePaths[0]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } if (map == null) { throw new InvalidOperationException(Resources.EmptyFile); } if (string.IsNullOrEmpty(HeaderFile) && map.Header.RecordFields.Count == 0) { throw new InvalidOperationException(Resources.HeaderMissing); } if (!string.IsNullOrEmpty(HeaderFile)) { SAMParser parse = new SAMParser(); SequenceAlignmentMap head; try { head = parse.ParseOne <SequenceAlignmentMap>(HeaderFile); } catch { throw new InvalidOperationException(Resources.IncorrectHeaderFile); } if (head == null) { throw new InvalidOperationException(Resources.EmptyFile); } header = head.Header; } else { header = map.Header; } sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates); } else { try { map = parser.ParseOne <SequenceAlignmentMap>(FilePaths[index]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } if (map == null) { throw new InvalidOperationException(Resources.EmptyFile); } sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates); } lock (sortedIndexes) { sortedIndexes.Add(sortedIndex); sequenceAlignmentMaps.Add(map); } }); if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = "out.bam"; autoGeneratedOutputFilename = true; } string filePath = Path.GetTempFileName(); using (FileStream fstemp = new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite)) { BAMFormatter formatter = new BAMFormatter(); formatter.WriteHeader(header, fstemp); int[] indexes = new int[sortedIndexes.Count]; if (SortByReadName) { IList <BAMSortedIndex> sortedIndex = sortedIndexes.Select(a => a.First()).ToList(); WriteMergeFileSortedByReadName(sortedIndex, fstemp, formatter, sequenceAlignmentMaps); } else { WriteMergeFile(sortedIndexes, fstemp, formatter, sequenceAlignmentMaps); } using (FileStream fsoutput = new FileStream(OutputFilename, FileMode.Create, FileAccess.Write)) { fstemp.Seek(0, SeekOrigin.Begin); formatter.CompressBAMFile(fstemp, fsoutput); } } File.Delete(filePath); if (autoGeneratedOutputFilename) { Console.WriteLine(Properties.Resources.SuccessMessageWithOutputFileName, OutputFilename); } }