Example #1
0
        /// <summary>
        /// Sorts the SequenceAlignmentMap based on the sort by fields,
        /// either chromosome coordinates or read names and retuns sorted BAM indexer
        /// </summary>
        /// <example>
        /// 1. Sort by chromosome name.
        /// BAMSort sorter = new BAMSort([SequenceAlignmentMap], BAMSortByFields.ChromosomeCoordinates);
        /// IList&lt;BAMSortedIndex&gt; sortedGroups = sorter.Sort();
        /// foreach (BAMSortedIndex sortedGroup in sortedGroups)
        /// {
        ///     sortedGroup.GroupName // Containes the RName
        ///     foreach (int index in sortedGroup)
        ///     {
        ///         index // index of SequenceAlignmentMap.QuerySequences 
        ///     }
        /// }
        /// 2. Sort by read name.
        /// BAMSort sorter = new BAMSort([SequenceAlignmentMap], BAMSortByFields.ChromosomeCoordinates);
        /// IList&lt;BAMSortedIndex&gt; sortedGroups = sorter.Sort();
        /// foreach (int index in sortedGroups[0]) // There will be only Group in list.
        /// {
        ///     index // index of SequenceAlignmentMap.QuerySequences 
        /// }
        /// </example>
        /// <returns>sorted BAM indexer</returns>
        public IList<BAMSortedIndex> Sort()
        {
            IList<BAMSortedIndex> sortedIndices = new List<BAMSortedIndex>();

            switch (sortField)
            {
                case BAMSortByFields.ChromosomeNameAndCoordinates:
                
                    // Sort by Chromosomes (RName) and then by Positions (Pos) and retun "BAMSortedIndex"
                    // containing the indices of sorted SequenceAlignmentMap.QuerySequences items.
                    SortedDictionary<string, IList<string>> sortedFiles = SortByChromosomeCoordinates();

                    foreach (KeyValuePair<string, IList<string>> sortedFile in sortedFiles)
                    {
                        BAMSortedIndex sortedIndex = new BAMSortedIndex(sortedFile.Value, sortField);
                        sortedIndex.GroupName = sortedFile.Key;
                        sortedIndices.Add(sortedIndex);
                    }
                    break;
                case BAMSortByFields.ChromosomeCoordinates:
                    // Sort by Chromosomes Positions (Pos) and retun "BAMSortedIndex"
                    // containing the indices of sorted SequenceAlignmentMap.QuerySequences items.
                    sortedFiles = SortByChromosomeCoordinates();
                    foreach (string refName in sequenceAlignMap.GetRefSequences())
                    {
                        IList<string> filenames =null;
                        if (sortedFiles.TryGetValue(refName, out filenames))
                        {
                            BAMSortedIndex sortedIndex = new BAMSortedIndex(filenames, sortField);
                            sortedIndex.GroupName = refName;
                            sortedIndices.Add(sortedIndex);
                            
                        }
                    }

                    break;

                case BAMSortByFields.ReadNames:
                    // Sort by Read name (QName) and retun "BAMSortedIndex" containing the indices of 
                    // sorted SequenceAlignmentMap.QuerySequences items.
                    sortedIndices.Add(new BAMSortedIndex(SortByReadNames(), sortField));
                    break;
            }

            return sortedIndices;
        }
Example #2
0
        /// <summary>
        /// Sort and merge multiple SAM objects
        /// </summary>
        /// <param name="sortedIndexes">Sorted Indexes of SAM object.</param>
        /// <param name="fstemp">Temporary tream to write alignments.</param>
        /// <param name="formatter">Format aligned sequences in BAM format.</param>
        /// <param name="sequenceAlignmentMaps">List of SAM objects to be merged.</param>
        private void WriteMergeFile(IList <IList <BAMSortedIndex> > sortedIndexes, FileStream fstemp, BAMFormatter formatter, IList <SequenceAlignmentMap> sequenceAlignmentMaps)
        {
            List <SAMAlignedSequence> alignedSeqs = new List <SAMAlignedSequence>();

            int[] sortedIndex = new int[sequenceAlignmentMaps.Count];

            for (int i = 0; i < sortedIndexes.Count; i++)
            {
                BAMSortedIndex bamSortedIndex = sortedIndexes[i].ElementAt(sortedIndex[i]);
                if (bamSortedIndex != null)
                {
                    if (bamSortedIndex.MoveNext())
                    {
                        alignedSeqs.Add(sequenceAlignmentMaps[i].QuerySequences[bamSortedIndex.Current]);
                    }
                    else
                    {
                        alignedSeqs.Add(null);
                    }
                }
                else
                {
                    alignedSeqs.Add(null);
                }
            }

            int smallestIndex = -1;

            do
            {
                for (int index = 0; index < alignedSeqs.Count; index++)
                {
                    if (alignedSeqs[index] != null)
                    {
                        if (smallestIndex == -1)
                        {
                            smallestIndex = index;
                        }
                        else
                        {
                            if (0 < string.Compare(alignedSeqs[smallestIndex].RName, alignedSeqs[index].RName, StringComparison.OrdinalIgnoreCase))
                            {
                                smallestIndex = index;
                            }
                            else if (alignedSeqs[smallestIndex].RName.Equals(alignedSeqs[index].RName))
                            {
                                if (alignedSeqs[smallestIndex].Pos > alignedSeqs[index].Pos)
                                {
                                    smallestIndex = index;
                                }
                            }
                        }
                    }
                }

                if (smallestIndex > -1)
                {
                    SAMAlignedSequence alignSeqTowrite = alignedSeqs[smallestIndex];

                    if (sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).MoveNext())
                    {
                        int nextIndex = sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).Current;
                        alignedSeqs[smallestIndex] = sequenceAlignmentMaps[smallestIndex].QuerySequences[nextIndex];
                    }
                    else
                    {
                        sortedIndex[smallestIndex]++;
                        if (sortedIndex[smallestIndex] < sortedIndexes[smallestIndex].Count &&
                            sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).MoveNext())
                        {
                            int nextIndex = sortedIndexes[smallestIndex].ElementAt(sortedIndex[smallestIndex]).Current;
                            alignedSeqs[smallestIndex] = sequenceAlignmentMaps[smallestIndex].QuerySequences[nextIndex];
                        }
                        else
                        {
                            alignedSeqs[smallestIndex] = null;
                            smallestIndex = -1;
                        }
                    }

                    formatter.WriteAlignedSequence(_header, alignSeqTowrite, fstemp);
                }
            } while (!alignedSeqs.All(a => a == null));
        }