/// <summary> /// Write out the given SequenceAlignmentMap to the file /// </summary> /// <param name="formatter">BAMFormatter</param> /// <param name="sam">SequenceAlignmentMap</param> /// <param name="filename">File to write to</param> public static void Format(this BAMFormatter formatter, SequenceAlignmentMap sam, string filename) { if (formatter == null) { throw new ArgumentNullException("formatter"); } if (sam == null) { throw new ArgumentNullException("sam"); } if (string.IsNullOrWhiteSpace(filename)) { throw new ArgumentNullException("filename"); } using (var fs = File.Create(filename)) { // Create the IndexFile if necessary if (formatter.CreateIndexFile) { using (var bamIndexFile = new BAMIndexStorage( File.Create(filename + Properties.Resource.BAM_INDEXFILEEXTENSION))) { formatter.Format(fs, bamIndexFile, sam); } } else { formatter.Format(fs, sam); } } }
/// <summary> /// Compare BAM file aligned sequences. /// </summary> /// <param name="expectedAlignment">Expected sequence alignment object</param> /// <param name="actualAlignment">Actual sequence alignment object</param> /// <returns></returns> private static bool CompareAlignedSequences(SequenceAlignmentMap expectedAlignment, SequenceAlignmentMap actualAlignment) { IList<SAMAlignedSequence> actualAlignedSeqs = actualAlignment.QuerySequences; IList<SAMAlignedSequence> expectedAlignedSeqs = expectedAlignment.QuerySequences; if ( expectedAlignedSeqs.Where( (t, i) => 0 != string.Compare( new string(expectedAlignedSeqs.ElementAt(i).QuerySequence.Select(a => (char) a).ToArray()), new string(actualAlignedSeqs[i].QuerySequence.Select(a => (char) a).ToArray()), true, CultureInfo.CurrentCulture)).Any()) { ApplicationLog.WriteLine(string.Format(null, "BAM Parser BVT : Sequence alignment aligned seq does match")); return false; } return true; }
/// <summary> /// Validate BAM file Header fields. /// </summary> /// <param name="nodeName">XML nodename used for different test cases</param> /// <param name="seqAlignment">seqAlignment object</param> private void ValidateBAMHeaderRecords(string nodeName, SequenceAlignmentMap seqAlignment) { string expectedHeaderTagValues = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.RecordTagValuesNode); string expectedHeaderTagKeys = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.RecordTagKeysNode); string expectedHeaderTypes = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.HeaderTyepsNodes); string[] expectedHeaderTagsValues = expectedHeaderTagValues.Split(','); string[] expectedHeaderKeys = expectedHeaderTagKeys.Split(','); string[] expectedHeaders = expectedHeaderTypes.Split(','); SAMAlignmentHeader header = seqAlignment.Header; IList<SAMRecordField> recordFields = header.RecordFields; int tagKeysCount = 0; int tagValuesCount = 0; for (int index = 0; index < recordFields.Count; index++) { Assert.AreEqual(expectedHeaders[index].Replace("/", ""), recordFields[index].Typecode.ToString(null).Replace("/", "")); for (int tags = 0; tags < recordFields[index].Tags.Count; tags++) { Assert.AreEqual(expectedHeaderKeys[tagKeysCount].Replace("/", ""), recordFields[index].Tags[tags].Tag.ToString(null).Replace("/", "")); Assert.AreEqual(expectedHeaderTagsValues[tagValuesCount].Replace("/", ""), recordFields[index].Tags[tags].Value.ToString(null) .Replace("/", "") .Replace("\r", "") .Replace("\n", "")); tagKeysCount++; tagValuesCount++; } } }
/// <summary> /// Comapare Sequence Alignment Header fields /// </summary> /// <param name="actualAlignment">Actual sequence alignment object</param> /// <param name="expectedAlignment">Expected sequence alignment object</param> /// <returns></returns> private static bool CompareSequencedAlignmentHeader(SequenceAlignmentMap actualAlignment, SequenceAlignmentMap expectedAlignment) { SAMAlignmentHeader aheader = actualAlignment.Header; IList<SAMRecordField> arecordFields = aheader.RecordFields; SAMAlignmentHeader expectedheader = expectedAlignment.Header; IList<SAMRecordField> expectedrecordFields = expectedheader.RecordFields; int tagKeysCount = 0; int tagValuesCount = 0; for (int index = 0; index < expectedrecordFields.Count; index++) { if (0 != string.Compare(expectedrecordFields[index].Typecode.ToString(null), arecordFields[index].Typecode.ToString(null), StringComparison.CurrentCulture)) { return false; } for (int tags = 0; tags < expectedrecordFields[index].Tags.Count; tags++) { if ((0 != string.Compare(expectedrecordFields[index].Tags[tags].Tag.ToString(null), arecordFields[index].Tags[tags].Tag.ToString(null), StringComparison.CurrentCulture)) || (0 != string.Compare(expectedrecordFields[index].Tags[tags].Value.ToString(null), arecordFields[index].Tags[tags].Value.ToString(null), StringComparison.CurrentCulture))) { ApplicationLog.WriteLine(string.Format(null, "BAM Parser BVT : Sequence alignment header does not match")); return false; } tagKeysCount++; tagValuesCount++; } } return true; }
/// <summary> /// Writes specified sequence alignment to stream according to the specified sorted order. /// The output is formatted according to the BAM structure. /// </summary> /// <param name="sequenceAlignmentMap">SequenceAlignmentMap object.</param> /// <param name="writer">Stream to write.</param> private void WriteUncompressedSortedBAM(SequenceAlignmentMap sequenceAlignmentMap, Stream writer) { if (SortType != BAMSortByFields.ReadNames) { List<IGrouping<string, SAMAlignedSequence>> groups = sequenceAlignmentMap.QuerySequences.GroupBy(Q => Q.RName).OrderBy(G => G.Key).ToList(); foreach (SequenceRange range in this.refSequences) { IGrouping<string, SAMAlignedSequence> group = groups.FirstOrDefault(G => G.Key.Equals(range.ID)); if (group == null) { continue; } // sort aligned sequence on left co-ordinate. List<SAMAlignedSequence> alignedSeqs = group.OrderBy(A => A.Pos).ToList(); foreach (SAMAlignedSequence alignedSeq in alignedSeqs) { ValidateSQHeader(alignedSeq.RName); WriteAlignedSequence(alignedSeq, writer); writer.Flush(); } } } else { List<SAMAlignedSequence> alignedSeqs = sequenceAlignmentMap.QuerySequences.OrderBy(Q => Q.QName).ToList(); foreach (SAMAlignedSequence alignedSeq in alignedSeqs) { ValidateSQHeader(alignedSeq.RName); WriteAlignedSequence(alignedSeq, writer); writer.Flush(); } } }
/// <summary> /// calls the Sort() method in BAMSort and checks if a BAMSortedIndex is returned. /// </summary> /// <param name="seqAlignment">SequenceAlignmentMap to be sorted</param> /// <param name="sortType">The sort type to be used when sorting using BAMSort</param> private void ValidateSort(SequenceAlignmentMap seqAlignment, BAMSortByFields sortType) { var sorter = new BAMSort(seqAlignment, sortType); IList<BAMSortedIndex> sortedIndex = sorter.Sort(); Assert.IsNotNull(sortedIndex); Assert.IsTrue(this.IsSortedIndex(sortedIndex, sortType)); }
// Validates the alignment. private SequenceAlignmentMap ValidateAlignment(ISequenceAlignment sequenceAlignment) { SequenceAlignmentMap seqAlignmentMap = sequenceAlignment as SequenceAlignmentMap; if (seqAlignmentMap != null) { ValidateAlignmentHeader(seqAlignmentMap.Header); if (CreateSortedBAMFile && SortType == BAMSortByFields.ChromosomeNameAndCoordinates) { this.refSequences = SortSequenceRanges(seqAlignmentMap.Header.GetReferenceSequenceRanges()); } else { this.refSequences = seqAlignmentMap.Header.GetReferenceSequenceRanges(); } return seqAlignmentMap; } SAMAlignmentHeader header = sequenceAlignment.Metadata[Helper.SAMAlignmentHeaderKey] as SAMAlignmentHeader; if (header == null) { throw new ArgumentException(Properties.Resource.SAMAlignmentHeaderNotFound); } ValidateAlignmentHeader(header); seqAlignmentMap = new SequenceAlignmentMap(header); if (CreateSortedBAMFile && SortType == BAMSortByFields.ChromosomeNameAndCoordinates) { this.refSequences = SortSequenceRanges(seqAlignmentMap.Header.GetReferenceSequenceRanges()); } else { this.refSequences = seqAlignmentMap.Header.GetReferenceSequenceRanges(); } foreach (IAlignedSequence alignedSeq in sequenceAlignment.AlignedSequences) { SAMAlignedSequenceHeader alignedHeader = alignedSeq.Metadata[Helper.SAMAlignedSequenceHeaderKey] as SAMAlignedSequenceHeader; if (alignedHeader == null) { throw new ArgumentException(Properties.Resource.SAMAlignedSequenceHeaderNotFound); } SAMAlignedSequence samAlignedSeq = new SAMAlignedSequence(alignedHeader); samAlignedSeq.QuerySequence = alignedSeq.Sequences[0]; seqAlignmentMap.QuerySequences.Add(samAlignedSeq); } return seqAlignmentMap; }
private void GetAlignmentWithoutIndex(SAMAlignmentHeader header, ref SequenceAlignmentMap seqMap) { Chunk lastChunk = null; ulong lastcOffset = 0; ushort lastuOffset = 0; BAMReferenceIndexes refIndices = null; if (createBamIndex) { bamIndex = new BAMIndex(); for (int i = 0; i < refSeqNames.Count; i++) { bamIndex.RefIndexes.Add(new BAMReferenceIndexes()); } refIndices = bamIndex.RefIndexes[0]; } if (!createBamIndex && seqMap == null) { seqMap = new SequenceAlignmentMap(header); } while (!IsEOF()) { if (createBamIndex) { lastcOffset = (ulong)currentCompressedBlockStartPos; lastuOffset = (ushort)deCompressedStream.Position; } SAMAlignedSequence alignedSeq = GetAlignedSequence(0, int.MaxValue); alignedSeq = BamIndexing(alignedSeq, refIndices, bamIndex, lastcOffset, lastuOffset, ref lastChunk); if (!createBamIndex && alignedSeq != null) { seqMap.QuerySequences.Add(alignedSeq); } alignedSeq = null; } #region BAM Indexing if (createBamIndex) { lastChunk.ChunkEnd.CompressedBlockOffset = (ulong)readStream.Position; if (deCompressedStream != null) { lastChunk.ChunkEnd.UncompressedBlockOffset = (ushort)deCompressedStream.Position; } else { lastChunk.ChunkEnd.UncompressedBlockOffset = 0; } } #endregion }
/// <summary> /// Default Constructor /// Initializes a new instance of the BAMSorter class /// </summary> /// <param name="seqAlignMap">SequenceAlignmentMap object to be sorted.</param> /// <param name="sortType">Type of sort required.</param> public BAMSort(SequenceAlignmentMap seqAlignMap, BAMSortByFields sortType) { sequenceAlignMap = seqAlignMap; sortField = sortType; }
/// <summary> /// Sort Sequence Alignments. /// </summary> /// <param name="map">SAM object to be sorted.</param> /// <param name="bAMSortType">Sort based on genomic coordinates or read name.</param> /// <returns></returns> private IList<BAMSortedIndex> Sort(SequenceAlignmentMap map, BAMSortByFields bAMSortType) { BAMSort sort = new BAMSort(map, bAMSortType); return sort.Sort(); }
/// <summary> /// Display Coverage plot /// </summary> /// <param name="readname">Chromosoem name</param> /// <param name="alignmentMapobj">Alignment object</param> /// <param name="possibility">True for Nucleaotide distribution</param> private void GetCoverage(string readname, SequenceAlignmentMap alignmentMapobj, string possibility) { List<byte> distinctChars = new List<byte> { Alphabets.DNA.A, Alphabets.DNA.T, Alphabets.DNA.G, Alphabets.DNA.C }; // Dictionary to hold coverage profile. ConcurrentDictionary<long, double[]> coverageProfile = new ConcurrentDictionary<long, double[]>(); // Get the position specific alphabet count. foreach (SAMAlignedSequence read in alignmentMapobj.QuerySequences) { for (int i = 0; i < read.QuerySequence.Count; i++) { double[] values; coverageProfile.TryGetValue(read.Pos + i, out values); if (values == null) { coverageProfile[read.Pos + i] = new double[distinctChars.Count]; } byte item = read.QuerySequence[i]; if (Alphabets.DNA.ValidateSequence(new byte[] { item }, 0, 1)) { for (int k = 0; k < coverageProfile[read.Pos + i].Length; k++) { coverageProfile[read.Pos + i][k]++; } } else { coverageProfile[read.Pos + i][distinctChars.IndexOf(item)]++; } } } // Get the position specific alphabet coverage. foreach (long i in coverageProfile.Keys) { double count = coverageProfile[i].Sum(); for (int j = 0; j < distinctChars.Count; j++) { coverageProfile[i][j] = coverageProfile[i][j] / count; } } // Display foreach (long pos in coverageProfile.Keys) { double[] values = coverageProfile[pos]; if (possibility == "true") { string possibleOccurence = GetMoreOccurences(values[0], values[1], values[2], values[3]); Console.Write("\r\n{0}\t\t{1:0.00}%\t{2:0.00}%\t{3:0.00}%\t{4:0.00}%\t\t{5}", pos.ToString(), values[0] * 100, values[1] * 100, values[2] * 100, values[3] * 100, possibleOccurence); } else { Console.Write("\r\n{0}\t\t{1:0.00}\t{2:0.00}\t{3:0.00}\t{4:0.00}", pos.ToString(), values[0], values[1], values[2], values[3]); } } }
/// <summary> /// Compare BAM file aligned sequences. /// </summary> /// <param name="expectedAlignment">Expected sequence alignment object</param> /// <param name="actualAlignment">Actual sequence alignment object</param> /// <returns>True is successful, otherwise false</returns> private static bool CompareAlignedSequences(SequenceAlignmentMap expectedAlignment, SequenceAlignmentMap actualAlignment) { IList<SAMAlignedSequence> actualAlignedSeqs = actualAlignment.QuerySequences; IList<SAMAlignedSequence> expectedAlignedSeqs = expectedAlignment.QuerySequences; for (int i = 0; i < expectedAlignedSeqs.Count; i++) { if (0 != string.Compare(new string(expectedAlignedSeqs[i].QuerySequence.Select(a => (char)a).ToArray()), new string(actualAlignedSeqs[i].QuerySequence.Select(a => (char)a).ToArray()), StringComparison.CurrentCulture)) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "BAM Parser P1 : Sequence alignment aligned seq does match")); return false; } } return true; }
/// <summary> /// Validate Aligned sequences CIGAR,QName, and Bin index values. /// </summary> /// <param name="nodeName">XML nodename used for different test cases</param> /// <param name="seqAlignment">seqAlignment object</param> void ValidateAlignedSeqValues(string nodeName, SequenceAlignmentMap seqAlignment) { string expectedCigars = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CigarsNode); string expectedQNames = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.QNamesNode); string expectedBinValues = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.BinsNode); string[] expectedCigarValues = expectedCigars.Split(','); string[] expectedQNameValues = expectedQNames.Split(','); string[] expectedBin = expectedBinValues.Split(','); for (int i = 0; i < seqAlignment.AlignedSequences.Count; i++) { Assert.AreEqual(expectedCigarValues[i], seqAlignment.QuerySequences[i].CIGAR.ToString((IFormatProvider)null)); Assert.AreEqual(expectedQNameValues[i], seqAlignment.QuerySequences[i].QName.ToString((IFormatProvider)null)); Assert.AreEqual(expectedBin[i], seqAlignment.QuerySequences[i].Bin.ToString((IFormatProvider)null)); } }
/// <summary> /// Genaral method to Invalidate SAM Formatter /// <param name="method">enum type to execute different overload</param> /// </summary> void ValidateSamFormatter(ParseOrFormatTypes method) { string filePath = utilityObj.xmlUtil.GetTextValue( Constants.SmallSAMFileNode, Constants.FilePathNode); ISequenceAlignmentParser parser = new SAMParser(); ISequenceAlignment alignment = null; try { switch (method) { case ParseOrFormatTypes.ParseOrFormatSeqText: new SAMFormatter().Format(null, null as ISequenceAlignment); break; case ParseOrFormatTypes.ParseOrFormatSeqTextWithFlag: alignment = parser.ParseOne(filePath); new SAMFormatter().Format(null, alignment); break; case ParseOrFormatTypes.ParseOrFormatIseq: new SAMFormatter().Format( null as ISequenceAlignment, null as string); break; case ParseOrFormatTypes.ParseOrFormatIseqFile: alignment = parser.ParseOne(filePath); new SAMFormatter().Format( alignment, null as string); break; case ParseOrFormatTypes.ParseOrFormatCollString: new SAMFormatter().Format( null as ICollection<ISequenceAlignment>, null as string); break; case ParseOrFormatTypes.ParseOrFormatCollection: new SAMFormatter().Format(null, null as ICollection<ISequenceAlignment>); break; case ParseOrFormatTypes.ParseOneOrFormatSeq: SequenceAlignmentMap align = new SequenceAlignmentMap(); new SAMFormatter().Format(align, null as string); break; case ParseOrFormatTypes.ParseOneOrFormatSeqFile: new SAMFormatter().Format(null as SequenceAlignmentMap, null as string); break; case ParseOrFormatTypes.ParseOrFormatIseqT: SequenceAlignmentMap alignments = new SequenceAlignmentMap(); new SAMFormatter().Format(alignments, null as string); break; case ParseOrFormatTypes.ParseOrFormatIseqText: new SAMFormatter().Format(null as SequenceAlignmentMap, null as string); break; case ParseOrFormatTypes.ParseOrFormatFormatString: break; default: break; } } catch (ArgumentNullException) { ApplicationLog.WriteLine( "SAM Formatter P2 : Successfully validated the exception"); } catch (NotSupportedException) { ApplicationLog.WriteLine( "SAM Formatter P2 : Successfully validated the exception"); } }
/// <summary> /// Writes specified sequence alignment to stream. /// The output is formatted according to the BAM structure. /// </summary> /// <param name="sequenceAlignmentMap">SequenceAlignmentMap object.</param> /// <param name="writer">Stream to write.</param> /// <param name="createSortedFile">If this flag is true output file will be sorted.</param> private void WriteUncompressed(SequenceAlignmentMap sequenceAlignmentMap, Stream writer, bool createSortedFile) { SAMAlignmentHeader header = sequenceAlignmentMap.Header; if (createSortedFile && SortType == BAMSortByFields.ChromosomeNameAndCoordinates) { header = GetHeaderWithSortedSQFields(header, true); this.refSequences = header.GetReferenceSequenceRanges(); } if (this.refSequences == null) { this.refSequences = header.GetReferenceSequenceRanges(); } WriteHeader(header, writer); writer.Flush(); if (createSortedFile) { WriteUncompressedSortedBAM(sequenceAlignmentMap, writer); } else { foreach (SAMAlignedSequence seq in sequenceAlignmentMap.QuerySequences) { SAMAlignedSequence alignedSeq = seq; this.ValidateSQHeader(alignedSeq.RName); this.WriteAlignedSequence(alignedSeq, writer); writer.Flush(); } } writer.Flush(); }
/// <summary> /// Parses SAM/BAm file based on input file. /// </summary> private void PerformParse() { string samExtension = ".sam"; string bamExtension = ".bam"; if (Helper.IsBAM(InputFilename)) { BAMParser parser = new BAMParser(); try { _sequenceAlignmentMap = parser.ParseOne<SequenceAlignmentMap>(InputFilename); } catch (Exception ex) { throw new InvalidOperationException(Resources.InvalidBAMFile, ex); } if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = InputFilename + samExtension; } } else { SAMParser parser = new SAMParser(); try { _sequenceAlignmentMap = parser.ParseOne<SequenceAlignmentMap>(InputFilename); } catch (Exception ex) { throw new InvalidOperationException(Resources.InvalidSAMFile, ex); } _isSAM = true; if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = InputFilename + bamExtension; } } }
private void GetAlignmentWithIndex(BAMIndexFile bamIndexFile, int refSeqIndex, int start, int end, SAMAlignmentHeader header, ref SequenceAlignmentMap seqMap) { BAMIndex bamIndexInfo; BAMReferenceIndexes refIndex; IList<Chunk> chunks; seqMap = new SequenceAlignmentMap(header); bamIndexInfo = bamIndexFile.Read(); if (refSeqIndex != -1 && bamIndexInfo.RefIndexes.Count <= refSeqIndex) { throw new ArgumentOutOfRangeException("refSeqIndex"); } refIndex = bamIndexInfo.RefIndexes[refSeqIndex]; if (start == 0 && end == int.MaxValue) { chunks = GetChunks(refIndex); } else { chunks = GetChunks(refIndex, start, end); } IList<SAMAlignedSequence> alignedSeqs = GetAlignedSequences(chunks, start, end); foreach (SAMAlignedSequence alignedSeq in alignedSeqs) { seqMap.QuerySequences.Add(alignedSeq); } readStream = null; }