Beispiel #1
0
        public void TestFormatter()
        {
            string               filePath        = @"TestUtils\BAM\SeqAlignment.bam".TestDir();
            const string         outputfilePath  = "BAMTests123.bam";
            string               outputIndexFile = outputfilePath + ".bai";
            BAMParser            parser          = new BAMParser();
            SequenceAlignmentMap alignmentMap    = parser.ParseOne <SequenceAlignmentMap>(filePath);

            Assert.IsNotNull(alignmentMap);
            Assert.IsNotNull(alignmentMap.Header);
            Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1);
            Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1);
            Assert.IsNotNull(alignmentMap.QuerySequences);
            Assert.AreEqual(alignmentMap.QuerySequences.Count, 2);

            BAMFormatter formatter = new BAMFormatter();

            formatter.Format(alignmentMap, outputfilePath);

            formatter.CreateSortedBAMFile = true;
            formatter.CreateIndexFile     = true;
            formatter.Format(alignmentMap, outputfilePath);
            Assert.IsTrue(File.Exists("BAMTests123.bam.bai"));

            alignmentMap = parser.ParseOne <SequenceAlignmentMap>(outputfilePath);
            Assert.IsNotNull(alignmentMap);
            Assert.IsNotNull(alignmentMap.Header);
            Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1);
            Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1);
            Assert.AreEqual(alignmentMap.QuerySequences.Count, 2);

            alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, int.MaxValue));
            Assert.IsNotNull(alignmentMap);
            Assert.IsNotNull(alignmentMap.Header);
            Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1);
            Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1);
            Assert.AreEqual(alignmentMap.QuerySequences.Count, 2);

            alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, 28833));
            Assert.IsNotNull(alignmentMap);
            Assert.IsNotNull(alignmentMap.Header);
            Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1);
            Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1);
            Assert.AreEqual(alignmentMap.QuerySequences.Count, 1);

            File.Delete(outputfilePath);
            File.Delete(outputIndexFile);
        }
Beispiel #2
0
        public void TestFormatterWithSort()
        {
            string    inputFilePath   = @"TestUtils\BAM\SeqAlignment.bam".TestDir();
            string    outputFilePath1 = "output1.bam";
            string    outputFilePath2 = "output2.bam";
            BAMParser parser          = null;

            try
            {
                parser = new BAMParser();
                BAMFormatter         formatter    = new BAMFormatter();
                SequenceAlignmentMap alignmentMap = parser.ParseOne <SequenceAlignmentMap>(inputFilePath);

                Assert.IsTrue(alignmentMap != null);
                Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1);
                Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1);
                Assert.AreEqual(alignmentMap.QuerySequences.Count, 2);

                formatter.CreateSortedBAMFile = true;
                formatter.SortType            = BAMSortByFields.ChromosomeCoordinates;
                formatter.Format(alignmentMap, outputFilePath1);

                alignmentMap = parser.ParseOne <SequenceAlignmentMap>(inputFilePath);
                formatter.Format(alignmentMap, outputFilePath2);

                Assert.IsTrue(File.Exists(outputFilePath1));
                Assert.IsTrue(File.Exists(outputFilePath2));

                Assert.AreEqual(true, FileCompare(outputFilePath1, outputFilePath2));
            }
            finally
            {
                if (parser != null)
                {
                    parser.Dispose();
                }
                File.Delete(outputFilePath1);
                File.Delete(outputFilePath2);
            }
        }
Beispiel #3
0
        /// <summary>
        /// Get Chimera data
        /// </summary>
        /// <param name="filename">Path of the BAM file</param>
        /// <param name="mean">Mean value</param>
        /// <param name="deviation">Standard deviation</param>
        /// <returns></returns>
        private Matrix <string, string, string> GetChimeraData(string filename)
        {
            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.ParseOne <SequenceAlignmentMap>(filename);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.ParseOne <SequenceAlignmentMap>(filename);
            }

            // get reads from sequence alignment map object.
            IList <PairedRead> pairedReads = null;

            pairedReads = alignmentMapobj.GetPairedReads(200, 50);

            // select chimeras from reads.
            var chimeras = pairedReads.Where(PE => PE.PairedType == PairedReadType.Chimera);

            // Group chimeras based on first reads chromosomes name.
            var groupedChimeras =
                chimeras.GroupBy(PR => PR.Read1.RName);

            IList <string> chrs = alignmentMapobj.GetRefSequences();

            // Declare sparse matrix to store statistics.
            SparseMatrix <string, string, string> statistics =
                SparseMatrix <string, string, string> .CreateEmptyInstance(
                    chrs, chrs, "0");

            // For each group create sub group depending on the second reads chromosomes.
            foreach (var group in groupedChimeras)
            {
                foreach (var subgroup in group.GroupBy(PE => PE.Read2.RName))
                {
                    // store the count to stats
                    statistics[group.Key, subgroup.Key] = subgroup.Count().ToString();
                }
            }

            return(statistics);
        }
Beispiel #4
0
        /// <summary>
        /// Public method to sort BAM file.
        /// SAMUtil.exe in.bam out.bam
        /// </summary>
        public void DoSort()
        {
            string sortExtension = ".sort";

            if (string.IsNullOrEmpty(InputFilename))
            {
                throw new InvalidOperationException(Resources.SortHelp);
            }

            BAMParser            parse = new BAMParser();
            SequenceAlignmentMap map   = null;

            try
            {
                map = parse.ParseOne <SequenceAlignmentMap>(InputFilename);
            }
            catch (Exception ex)
            {
                throw new InvalidOperationException(Resources.InvalidBAMFile, ex);
            }

            BAMFormatter format = new BAMFormatter
            {
                CreateSortedBAMFile = true,
                SortType            =
                    this.SortByReadName
                                              ? BAMSortByFields.ReadNames
                                              : BAMSortByFields.ChromosomeCoordinates
            };

            if (string.IsNullOrEmpty(OutputFilename))
            {
                OutputFilename = InputFilename + sortExtension;
                autoGeneratedOutputFilename = true;
            }

            format.Format(map, OutputFilename);

            if (autoGeneratedOutputFilename)
            {
                Console.WriteLine(Resources.SuccessMessageWithOutputFileName, OutputFilename);
            }
        }
Beispiel #5
0
        /// <summary>
        /// Display Sequence Item occurences percentage
        /// </summary>
        /// <param name="inputFile">Path of the input file</param>
        /// <param name="possibleOccurence">True to display Nculeaotide distribution</param>
        public void DisplaySequenceItemOccurences(string inputFile,
                                                  bool possibleOccurence)
        {
            if (string.IsNullOrEmpty(inputFile))
            {
                throw new InvalidOperationException("Input File Not specified");
            }

            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.ParseOne <SequenceAlignmentMap>(inputFile);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.ParseOne <SequenceAlignmentMap>(inputFile);
            }

            IList <string> chromosomes = alignmentMapobj.GetRefSequences();

            if (possibleOccurence)
            {
                Console.Write("Nucleotide Distribution:");
                Console.Write("\r\nPosition\tA\tT\tG\tC\tPossibility Of Occurences");
                foreach (string str in chromosomes)
                {
                    GetCoverage(str, alignmentMapobj, "true");
                }
            }
            else
            {
                Console.Write("Coverage Profile:");
                Console.Write("\r\nPosition\tA\tT\tG\tC");
                foreach (string str in chromosomes)
                {
                    GetCoverage(str, alignmentMapobj, "false");
                }
            }
        }
Beispiel #6
0
        /// <summary>
        /// Parses SAM/BAm file based on input file.
        /// </summary>
        private void PerformParse()
        {
            string samExtension = ".sam";
            string bamExtension = ".bam";

            if (Helper.IsBAM(InputFilename))
            {
                BAMParser parser = new BAMParser();
                try
                {
                    _sequenceAlignmentMap = parser.ParseOne <SequenceAlignmentMap>(InputFilename);
                }
                catch (Exception ex)
                {
                    throw new InvalidOperationException(Resources.InvalidBAMFile, ex);
                }

                if (string.IsNullOrEmpty(OutputFilename))
                {
                    OutputFilename = InputFilename + samExtension;
                }
            }
            else
            {
                SAMParser parser = new SAMParser();
                try
                {
                    _sequenceAlignmentMap = parser.ParseOne <SequenceAlignmentMap>(InputFilename);
                }
                catch (Exception ex)
                {
                    throw new InvalidOperationException(Resources.InvalidSAMFile, ex);
                }

                _isSAM = true;
                if (string.IsNullOrEmpty(OutputFilename))
                {
                    OutputFilename = InputFilename + bamExtension;
                }
            }
        }
Beispiel #7
0
        public void TestParser()
        {
            string    FilePath = @"TestUtils\BAM\SeqAlignment.bam".TestDir();
            BAMParser parser   = null;

            try
            {
                parser = new BAMParser();
                SequenceAlignmentMap alignmentMap = parser.ParseOne <SequenceAlignmentMap>(FilePath);
                Assert.IsTrue(alignmentMap != null);
                Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1);
                Assert.AreEqual(alignmentMap.QuerySequences.Count, 2);
            }
            finally
            {
                if (parser != null)
                {
                    parser.Dispose();
                }
            }
        }
Beispiel #8
0
        /// <summary>
        /// Get chromoses with orphan regions
        /// </summary>
        /// <param name="filename">Path of the BAM file</param>
        /// <param name="mean">Mean value</param>
        /// <param name="deviation">Standard deviation</param>
        /// <returns></returns>
        private void DisplayOrphans(string filename)
        {
            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.ParseOne <SequenceAlignmentMap>(filename);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.ParseOne <SequenceAlignmentMap>(filename);
            }

            // get reads from sequence alignment map object.
            IList <PairedRead> pairedReads = null;

            // Get Aligned sequences
            IList <SAMAlignedSequence> alignedSeqs = alignmentMapobj.QuerySequences;

            pairedReads = alignmentMapobj.GetPairedReads(0, 0);


            // Get the orphan regions.
            var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan);
            int count   = orphans.Count();

            if (count == 0)
            {
                Console.WriteLine("No Orphans to display");
            }

            var orphanRegions = new List <ISequenceRange>(count);

            orphanRegions.AddRange(orphans.Select(orphanRead => GetRegion(orphanRead.Read1)));

            // Get sequence range grouping object.
            SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions);

            if (!rangeGroup.GroupIDs.Any())
            {
                Console.Write("\r\nNo Orphan reads to display");
            }
            else
            {
                Console.Write("Region of Orphan reads:");
                DisplaySequenceRange(rangeGroup);
            }

            SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps();

            if (!mergedRegions.GroupIDs.Any())
            {
                Console.Write("\r\nNo hot spots to display");
            }
            else
            {
                Console.Write("\r\nChromosomal hot spot:");
                DisplaySequenceRange(mergedRegions);
            }
        }
Beispiel #9
0
        /// <summary>
        /// Indentify hot spot chromosomes for length anamoly regions.
        /// </summary>
        /// <param name="inputFile"> Input file</param>
        /// <param name="mean">Mean value</param>
        /// <param name="standardDeviation">Standard deviation</param>
        private void IdentifyLentghAnamolies(string filename,
                                             float mean = -1, float deviation = -1)
        {
            bool calculateMeanNdeviation = false;

            if (mean == -1 || deviation == -1)
            {
                calculateMeanNdeviation = true;
            }

            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.ParseOne <SequenceAlignmentMap>(filename);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.ParseOne <SequenceAlignmentMap>(filename);
            }

            // get reads from sequence alignment map object.
            IList <PairedRead> pairedReads = null;

            if (calculateMeanNdeviation)
            {
                pairedReads = alignmentMapobj.GetPairedReads();
            }
            else
            {
                pairedReads = alignmentMapobj.GetPairedReads(mean, deviation);
            }

            // Get the orphan regions.
            var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan);


            if (orphans.Count() == 0)
            {
                Console.WriteLine("No Orphans to display");
            }

            List <ISequenceRange> orphanRegions = new List <ISequenceRange>(orphans.Count());

            foreach (PairedRead orphanRead in orphans)
            {
                orphanRegions.Add(GetRegion(orphanRead.Read1));
            }

            // Get sequence range grouping for Orphan regions.
            SequenceRangeGrouping orphanRangegroup = new SequenceRangeGrouping(orphanRegions);

            // Get the Length anomalies regions.
            var lengthAnomalies = pairedReads.Where(PE => PE.PairedType == PairedReadType.LengthAnomaly);

            if (lengthAnomalies.Count() == 0)
            {
                Console.WriteLine("No Anomalies to display");
            }

            List <ISequenceRange> lengthAnomalyRegions = new List <ISequenceRange>(lengthAnomalies.Count());

            foreach (PairedRead laRead in lengthAnomalies)
            {
                SequenceRange range = new SequenceRange();
                range.ID    = laRead.Read1.RName;
                range.Start = laRead.Read1.Pos;
                range.End   = laRead.Read1.Pos + laRead.InsertLength;
                lengthAnomalyRegions.Add(range);
            }

            // Get sequence range grouping for length anomaly regions.
            SequenceRangeGrouping lengthAnomalyRangegroup =
                new SequenceRangeGrouping(lengthAnomalyRegions);

            if (lengthAnomalyRangegroup.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Length anomalies reads to display");
            }
            else
            {
                Console.Write("Region of length anomaly:");
                DisplaySequenceRange(lengthAnomalyRangegroup);
            }

            if (orphanRangegroup.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Orphan reads to display");
            }
            else
            {
                Console.Write("\r\nRegion of Orphan reads:");
                DisplaySequenceRange(orphanRangegroup);
            }

            SequenceRangeGrouping intersectedRegions =
                lengthAnomalyRangegroup.Intersect(orphanRangegroup);

            if (intersectedRegions.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Hot spots found");
            }
            else
            {
                Console.Write("\r\nChromosomal Hot spot of length anomaly and Orphan region:");
                DisplaySequenceRange(intersectedRegions);
            }
        }
Beispiel #10
0
        /// <summary>
        /// Merge multiple sorted alignments.
        /// SAMUtil.exe out.bam in1.bam in2.bam
        /// </summary>
        public void DoMerge()
        {
            if (FilePaths == null)
            {
                throw new InvalidOperationException("FilePath");
            }

            if (FilePaths.Length < 2)
            {
                throw new InvalidOperationException(Resources.MergeHelp);
            }

            IList <IList <BAMSortedIndex> > sortedIndexes         = new List <IList <BAMSortedIndex> >();
            IList <SequenceAlignmentMap>    sequenceAlignmentMaps = new List <SequenceAlignmentMap>();
            IList <int> help = new List <int>();

            Parallel.For(0, FilePaths.Length, (int index) =>
            {
                IList <BAMSortedIndex> sortedIndex;
                BAMParser parser = new BAMParser();;
                SequenceAlignmentMap map;
                if (index == 0)
                {
                    try
                    {
                        map = parser.ParseOne <SequenceAlignmentMap>(FilePaths[0]);
                    }
                    catch
                    {
                        throw new InvalidOperationException(Resources.InvalidBAMFile);
                    }

                    if (map == null)
                    {
                        throw new InvalidOperationException(Resources.EmptyFile);
                    }

                    if (string.IsNullOrEmpty(HeaderFile) && map.Header.RecordFields.Count == 0)
                    {
                        throw new InvalidOperationException(Resources.HeaderMissing);
                    }

                    if (!string.IsNullOrEmpty(HeaderFile))
                    {
                        SAMParser parse = new SAMParser();
                        SequenceAlignmentMap head;
                        try
                        {
                            head = parse.ParseOne <SequenceAlignmentMap>(HeaderFile);
                        }
                        catch
                        {
                            throw new InvalidOperationException(Resources.IncorrectHeaderFile);
                        }

                        if (head == null)
                        {
                            throw new InvalidOperationException(Resources.EmptyFile);
                        }

                        header = head.Header;
                    }
                    else
                    {
                        header = map.Header;
                    }

                    sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates);
                }
                else
                {
                    try
                    {
                        map = parser.ParseOne <SequenceAlignmentMap>(FilePaths[index]);
                    }
                    catch
                    {
                        throw new InvalidOperationException(Resources.InvalidBAMFile);
                    }

                    if (map == null)
                    {
                        throw new InvalidOperationException(Resources.EmptyFile);
                    }

                    sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates);
                }

                lock (sortedIndexes)
                {
                    sortedIndexes.Add(sortedIndex);
                    sequenceAlignmentMaps.Add(map);
                }
            });

            if (string.IsNullOrEmpty(OutputFilename))
            {
                OutputFilename = "out.bam";
                autoGeneratedOutputFilename = true;
            }

            string filePath = Path.GetTempFileName();

            using (FileStream fstemp = new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite))
            {
                BAMFormatter formatter = new BAMFormatter();
                formatter.WriteHeader(header, fstemp);
                int[] indexes = new int[sortedIndexes.Count];

                if (SortByReadName)
                {
                    IList <BAMSortedIndex> sortedIndex = sortedIndexes.Select(a => a.First()).ToList();
                    WriteMergeFileSortedByReadName(sortedIndex, fstemp, formatter, sequenceAlignmentMaps);
                }
                else
                {
                    WriteMergeFile(sortedIndexes, fstemp, formatter, sequenceAlignmentMaps);
                }

                using (FileStream fsoutput = new FileStream(OutputFilename, FileMode.Create, FileAccess.Write))
                {
                    fstemp.Seek(0, SeekOrigin.Begin);
                    formatter.CompressBAMFile(fstemp, fsoutput);
                }
            }

            File.Delete(filePath);

            if (autoGeneratedOutputFilename)
            {
                Console.WriteLine(Properties.Resources.SuccessMessageWithOutputFileName, OutputFilename);
            }
        }