/// <summary> /// Sorts the SequenceAlignmentMap based on the sort by fields, /// either chromosome coordinates or read names and retuns sorted BAM indexer /// </summary> /// <example> /// 1. Sort by chromosome name. /// BAMSort sorter = new BAMSort([SequenceAlignmentMap], BAMSortByFields.ChromosomeCoordinates); /// IList<BAMSortedIndex> sortedGroups = sorter.Sort(); /// foreach (BAMSortedIndex sortedGroup in sortedGroups) /// { /// sortedGroup.GroupName // Containes the RName /// foreach (int index in sortedGroup) /// { /// index // index of SequenceAlignmentMap.QuerySequences /// } /// } /// 2. Sort by read name. /// BAMSort sorter = new BAMSort([SequenceAlignmentMap], BAMSortByFields.ChromosomeCoordinates); /// IList<BAMSortedIndex> sortedGroups = sorter.Sort(); /// foreach (int index in sortedGroups[0]) // There will be only Group in list. /// { /// index // index of SequenceAlignmentMap.QuerySequences /// } /// </example> /// <returns>sorted BAM indexer</returns> public IList<BAMSortedIndex> Sort() { IList<BAMSortedIndex> sortedIndices = new List<BAMSortedIndex>(); switch (sortField) { case BAMSortByFields.ChromosomeNameAndCoordinates: // Sort by Chromosomes (RName) and then by Positions (Pos) and retun "BAMSortedIndex" // containing the indices of sorted SequenceAlignmentMap.QuerySequences items. SortedDictionary<string, IList<string>> sortedFiles = SortByChromosomeCoordinates(); foreach (KeyValuePair<string, IList<string>> sortedFile in sortedFiles) { BAMSortedIndex sortedIndex = new BAMSortedIndex(sortedFile.Value, sortField); sortedIndex.GroupName = sortedFile.Key; sortedIndices.Add(sortedIndex); } break; case BAMSortByFields.ChromosomeCoordinates: // Sort by Chromosomes Positions (Pos) and retun "BAMSortedIndex" // containing the indices of sorted SequenceAlignmentMap.QuerySequences items. sortedFiles = SortByChromosomeCoordinates(); foreach (string refName in sequenceAlignMap.GetRefSequences()) { IList<string> filenames =null; if (sortedFiles.TryGetValue(refName, out filenames)) { BAMSortedIndex sortedIndex = new BAMSortedIndex(filenames, sortField); sortedIndex.GroupName = refName; sortedIndices.Add(sortedIndex); } } break; case BAMSortByFields.ReadNames: // Sort by Read name (QName) and retun "BAMSortedIndex" containing the indices of // sorted SequenceAlignmentMap.QuerySequences items. sortedIndices.Add(new BAMSortedIndex(SortByReadNames(), sortField)); break; } return sortedIndices; }
/// <summary> /// Sort and merge multiple SAM objects /// </summary> /// <param name="sortedIndexes">Sorted Indexes of SAM object.</param> /// <param name="fstemp">Temporary tream to write alignments.</param> /// <param name="formatter">Format aligned sequences in BAM format.</param> /// <param name="sequenceAlignmentMaps">List of SAM objects to be merged.</param> private void WriteMergeFile(IList <IList <BAMSortedIndex> > sortedIndexes, FileStream fstemp, BAMFormatter formatter, IList <SequenceAlignmentMap> sequenceAlignmentMaps) { List <SAMAlignedSequence> alignedSeqs = new List <SAMAlignedSequence>(); int[] sortedIndex = new int[sequenceAlignmentMaps.Count]; for (int i = 0; i < sortedIndexes.Count; i++) { BAMSortedIndex bamSortedIndex = sortedIndexes[i].ElementAt(sortedIndex[i]); if (bamSortedIndex != null) { if (bamSortedIndex.MoveNext()) { alignedSeqs.Add(sequenceAlignmentMaps[i].QuerySequences[bamSortedIndex.Current]); } else { alignedSeqs.Add(null); } } else { alignedSeqs.Add(null); } } int smallestIndex = -1; do { for (int index = 0; index < alignedSeqs.Count; index++) { if (alignedSeqs[index] != null) { if (smallestIndex == -1) { smallestIndex = index; } else { if (0 < string.Compare(alignedSeqs[smallestIndex].RName, alignedSeqs[index].RName, StringComparison.OrdinalIgnoreCase)) { smallestIndex = index; } else if (alignedSeqs[smallestIndex].RName.Equals(alignedSeqs[index].RName)) { if (alignedSeqs[smallestIndex].Pos > alignedSeqs[index].Pos) { smallestIndex = index; } } } } } if (smallestIndex > -1) { SAMAlignedSequence alignSeqTowrite = alignedSeqs[smallestIndex]; if (sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).MoveNext()) { int nextIndex = sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).Current; alignedSeqs[smallestIndex] = sequenceAlignmentMaps[smallestIndex].QuerySequences[nextIndex]; } else { sortedIndex[smallestIndex]++; if (sortedIndex[smallestIndex] < sortedIndexes[smallestIndex].Count && sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).MoveNext()) { int nextIndex = sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).Current; alignedSeqs[smallestIndex] = sequenceAlignmentMaps[smallestIndex].QuerySequences[nextIndex]; } else { alignedSeqs[smallestIndex] = null; smallestIndex = -1; } } formatter.WriteAlignedSequence(_header, alignSeqTowrite, fstemp); } } while (!alignedSeqs.All(a => a == null)); }