Exemple #1
0
        /// <summary>
        /// Sorts the SequenceAlignmentMap based on the sort by fields,
        /// either chromosome coordinates or read names and retuns sorted BAM indexer
        /// </summary>
        /// <example>
        /// 1. Sort by chromosome name.
        /// BAMSort sorter = new BAMSort([SequenceAlignmentMap], BAMSortByFields.ChromosomeCoordinates);
        /// IList&lt;BAMSortedIndex&gt; sortedGroups = sorter.Sort();
        /// foreach (BAMSortedIndex sortedGroup in sortedGroups)
        /// {
        ///     sortedGroup.GroupName // Containes the RName
        ///     foreach (int index in sortedGroup)
        ///     {
        ///         index // index of SequenceAlignmentMap.QuerySequences
        ///     }
        /// }
        /// 2. Sort by read name.
        /// BAMSort sorter = new BAMSort([SequenceAlignmentMap], BAMSortByFields.ChromosomeCoordinates);
        /// IList&lt;BAMSortedIndex&gt; sortedGroups = sorter.Sort();
        /// foreach (int index in sortedGroups[0]) // There will be only Group in list.
        /// {
        ///     index // index of SequenceAlignmentMap.QuerySequences
        /// }
        /// </example>
        /// <returns>sorted BAM indexer</returns>
        public IList <BAMSortedIndex> Sort()
        {
            IList <BAMSortedIndex> sortedIndices = new List <BAMSortedIndex>();

            switch (_sortType)
            {
            case BAMSortByFields.ChromosomeNameAndCoordinates:

                // Sort by Chromosomes (RName) and then by Positions (Pos) and retun "BAMSortedIndex"
                // containing the indices of sorted SequenceAlignmentMap.QuerySequences items.
                SortedDictionary <string, IList <string> > sortedFiles = SortByChromosomeCoordinates();

                foreach (KeyValuePair <string, IList <string> > sortedFile in sortedFiles)
                {
                    BAMSortedIndex sortedIndex = new BAMSortedIndex(sortedFile.Value, _sortType);
                    sortedIndex.GroupName = sortedFile.Key;

                    sortedIndices.Add(sortedIndex);
                }
                break;

            case BAMSortByFields.ChromosomeCoordinates:
                // Sort by Chromosomes Positions (Pos) and retun "BAMSortedIndex"
                // containing the indices of sorted SequenceAlignmentMap.QuerySequences items.
                sortedFiles = SortByChromosomeCoordinates();
                foreach (string refName in _seqAlignMap.GetRefSequences())
                {
                    IList <string> filenames = null;
                    if (sortedFiles.TryGetValue(refName, out filenames))
                    {
                        BAMSortedIndex sortedIndex = new BAMSortedIndex(filenames, _sortType);
                        sortedIndex.GroupName = refName;

                        sortedIndices.Add(sortedIndex);
                    }
                }

                break;

            case BAMSortByFields.ReadNames:
                // Sort by Read name (QName) and retun "BAMSortedIndex" containing the indices of
                // sorted SequenceAlignmentMap.QuerySequences items.
                sortedIndices.Add(new BAMSortedIndex(SortByReadNames(), _sortType));
                break;
            }

            return(sortedIndices);
        }
Exemple #2
0
        /// <summary>
        /// Get Chimera data
        /// </summary>
        /// <param name="filename">Path of the BAM file</param>
        /// <param name="mean">Mean value</param>
        /// <param name="deviation">Standard deviation</param>
        /// <returns></returns>
        private Matrix <string, string, string> GetChimeraData(string filename)
        {
            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.Parse(filename);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.Parse(filename);
            }

            // get reads from sequence alignment map object.
            IList <PairedRead> pairedReads = null;

            pairedReads = alignmentMapobj.GetPairedReads(200, 50);

            // select chimeras from reads.
            var chimeras = pairedReads.Where(PE => PE.PairedType == PairedReadType.Chimera);

            // Group chimeras based on first reads chromosomes name.
            var groupedChimeras =
                chimeras.GroupBy(PR => PR.Read1.RName);

            IList <string> chrs = alignmentMapobj.GetRefSequences();

            // Declare sparse matrix to store statistics.
            SparseMatrix <string, string, string> statistics =
                SparseMatrix <string, string, string> .CreateEmptyInstance(
                    chrs, chrs, "0");

            // For each group create sub group depending on the second reads chromosomes.
            foreach (var group in groupedChimeras)
            {
                foreach (var subgroup in group.GroupBy(PE => PE.Read2.RName))
                {
                    // store the count to stats
                    statistics[group.Key, subgroup.Key] = subgroup.Count().ToString();
                }
            }

            return(statistics);
        }
Exemple #3
0
        /// <summary>
        /// Display Sequence Item occurences percentage
        /// </summary>
        /// <param name="inputFile">Path of the input file</param>
        /// <param name="possibleOccurence">True to display Nculeaotide distribution</param>
        public void DisplaySequenceItemOccurences(string inputFile,
                                                  bool possibleOccurence)
        {
            if (string.IsNullOrEmpty(inputFile))
            {
                throw new InvalidOperationException("Input File Not specified");
            }

            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.Parse(inputFile);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.Parse(inputFile);
            }

            IList <string> chromosomes = alignmentMapobj.GetRefSequences();

            if (possibleOccurence)
            {
                Console.Write("Nucleotide Distribution:");
                Console.Write("\r\nPosition\tA\tT\tG\tC\tPossibility Of Occurences");
                foreach (string str in chromosomes)
                {
                    GetCoverage(str, alignmentMapobj, "true");
                }
            }
            else
            {
                Console.Write("Coverage Profile:");
                Console.Write("\r\nPosition\tA\tT\tG\tC");
                foreach (string str in chromosomes)
                {
                    GetCoverage(str, alignmentMapobj, "false");
                }
            }
        }
Exemple #4
0
        /// <summary>
        /// Tests Chimeric stats.
        /// </summary>
        private static void TestChimeraData(SequenceAlignmentMap alignmentMapobj)
        {
            string expectedOutput;
            string actualOutput;

            expectedOutput = "varchr1chr2chr3chr4chr10320chr22040chr33100chr40000";

            // get reads from sequence alignment map object.
            IList <PairedRead> pairedReads = null;

            pairedReads = alignmentMapobj.GetPairedReads(200, 50);

            // select chimeras from reads.
            var chimeras = pairedReads.Where(PE => PE.PairedType == PairedReadType.Chimera);

            // Group chimeras based on first reads chromosomes name.
            var groupedChimeras =
                chimeras.GroupBy(PR => PR.Read1.RName);

            IList <string> chrs = alignmentMapobj.GetRefSequences();

            // Declare sparse matrix to store statistics.
            SparseMatrix <string, string, string> statistics =
                SparseMatrix <string, string, string> .CreateEmptyInstance(
                    chrs, chrs, "0");

            // For each group create sub group depending on the second reads chromosomes.
            foreach (var group in groupedChimeras)
            {
                foreach (var subgroup in group.GroupBy(PE => PE.Read2.RName))
                {
                    // store the count to stats
                    statistics[group.Key, subgroup.Key] = subgroup.Count().ToString(CultureInfo.InvariantCulture);
                }
            }

            actualOutput = statistics.ToString2D().Replace(Environment.NewLine, "").Replace("\t", "");
            Assert.AreEqual(expectedOutput, actualOutput);
        }