/// <summary> /// Sorts the SequenceAlignmentMap based on the sort by fields, /// either chromosome coordinates or read names and retuns sorted BAM indexer /// </summary> /// <example> /// 1. Sort by chromosome name. /// BAMSort sorter = new BAMSort([SequenceAlignmentMap], BAMSortByFields.ChromosomeCoordinates); /// IList<BAMSortedIndex> sortedGroups = sorter.Sort(); /// foreach (BAMSortedIndex sortedGroup in sortedGroups) /// { /// sortedGroup.GroupName // Containes the RName /// foreach (int index in sortedGroup) /// { /// index // index of SequenceAlignmentMap.QuerySequences /// } /// } /// 2. Sort by read name. /// BAMSort sorter = new BAMSort([SequenceAlignmentMap], BAMSortByFields.ChromosomeCoordinates); /// IList<BAMSortedIndex> sortedGroups = sorter.Sort(); /// foreach (int index in sortedGroups[0]) // There will be only Group in list. /// { /// index // index of SequenceAlignmentMap.QuerySequences /// } /// </example> /// <returns>sorted BAM indexer</returns> public IList <BAMSortedIndex> Sort() { IList <BAMSortedIndex> sortedIndices = new List <BAMSortedIndex>(); switch (_sortType) { case BAMSortByFields.ChromosomeNameAndCoordinates: // Sort by Chromosomes (RName) and then by Positions (Pos) and retun "BAMSortedIndex" // containing the indices of sorted SequenceAlignmentMap.QuerySequences items. SortedDictionary <string, IList <string> > sortedFiles = SortByChromosomeCoordinates(); foreach (KeyValuePair <string, IList <string> > sortedFile in sortedFiles) { BAMSortedIndex sortedIndex = new BAMSortedIndex(sortedFile.Value, _sortType); sortedIndex.GroupName = sortedFile.Key; sortedIndices.Add(sortedIndex); } break; case BAMSortByFields.ChromosomeCoordinates: // Sort by Chromosomes Positions (Pos) and retun "BAMSortedIndex" // containing the indices of sorted SequenceAlignmentMap.QuerySequences items. sortedFiles = SortByChromosomeCoordinates(); foreach (string refName in _seqAlignMap.GetRefSequences()) { IList <string> filenames = null; if (sortedFiles.TryGetValue(refName, out filenames)) { BAMSortedIndex sortedIndex = new BAMSortedIndex(filenames, _sortType); sortedIndex.GroupName = refName; sortedIndices.Add(sortedIndex); } } break; case BAMSortByFields.ReadNames: // Sort by Read name (QName) and retun "BAMSortedIndex" containing the indices of // sorted SequenceAlignmentMap.QuerySequences items. sortedIndices.Add(new BAMSortedIndex(SortByReadNames(), _sortType)); break; } return(sortedIndices); }
/// <summary> /// Get Chimera data /// </summary> /// <param name="filename">Path of the BAM file</param> /// <param name="mean">Mean value</param> /// <param name="deviation">Standard deviation</param> /// <returns></returns> private Matrix <string, string, string> GetChimeraData(string filename) { SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.Parse(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.Parse(filename); } // get reads from sequence alignment map object. IList <PairedRead> pairedReads = null; pairedReads = alignmentMapobj.GetPairedReads(200, 50); // select chimeras from reads. var chimeras = pairedReads.Where(PE => PE.PairedType == PairedReadType.Chimera); // Group chimeras based on first reads chromosomes name. var groupedChimeras = chimeras.GroupBy(PR => PR.Read1.RName); IList <string> chrs = alignmentMapobj.GetRefSequences(); // Declare sparse matrix to store statistics. SparseMatrix <string, string, string> statistics = SparseMatrix <string, string, string> .CreateEmptyInstance( chrs, chrs, "0"); // For each group create sub group depending on the second reads chromosomes. foreach (var group in groupedChimeras) { foreach (var subgroup in group.GroupBy(PE => PE.Read2.RName)) { // store the count to stats statistics[group.Key, subgroup.Key] = subgroup.Count().ToString(); } } return(statistics); }
/// <summary> /// Display Sequence Item occurences percentage /// </summary> /// <param name="inputFile">Path of the input file</param> /// <param name="possibleOccurence">True to display Nculeaotide distribution</param> public void DisplaySequenceItemOccurences(string inputFile, bool possibleOccurence) { if (string.IsNullOrEmpty(inputFile)) { throw new InvalidOperationException("Input File Not specified"); } SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.Parse(inputFile); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.Parse(inputFile); } IList <string> chromosomes = alignmentMapobj.GetRefSequences(); if (possibleOccurence) { Console.Write("Nucleotide Distribution:"); Console.Write("\r\nPosition\tA\tT\tG\tC\tPossibility Of Occurences"); foreach (string str in chromosomes) { GetCoverage(str, alignmentMapobj, "true"); } } else { Console.Write("Coverage Profile:"); Console.Write("\r\nPosition\tA\tT\tG\tC"); foreach (string str in chromosomes) { GetCoverage(str, alignmentMapobj, "false"); } } }
/// <summary> /// Tests Chimeric stats. /// </summary> private static void TestChimeraData(SequenceAlignmentMap alignmentMapobj) { string expectedOutput; string actualOutput; expectedOutput = "varchr1chr2chr3chr4chr10320chr22040chr33100chr40000"; // get reads from sequence alignment map object. IList <PairedRead> pairedReads = null; pairedReads = alignmentMapobj.GetPairedReads(200, 50); // select chimeras from reads. var chimeras = pairedReads.Where(PE => PE.PairedType == PairedReadType.Chimera); // Group chimeras based on first reads chromosomes name. var groupedChimeras = chimeras.GroupBy(PR => PR.Read1.RName); IList <string> chrs = alignmentMapobj.GetRefSequences(); // Declare sparse matrix to store statistics. SparseMatrix <string, string, string> statistics = SparseMatrix <string, string, string> .CreateEmptyInstance( chrs, chrs, "0"); // For each group create sub group depending on the second reads chromosomes. foreach (var group in groupedChimeras) { foreach (var subgroup in group.GroupBy(PE => PE.Read2.RName)) { // store the count to stats statistics[group.Key, subgroup.Key] = subgroup.Count().ToString(CultureInfo.InvariantCulture); } } actualOutput = statistics.ToString2D().Replace(Environment.NewLine, "").Replace("\t", ""); Assert.AreEqual(expectedOutput, actualOutput); }