Exemplo n.º 1
0
        public void TestFormatter()
        {
            string    filePath       = @"TestUtils\BAM\SeqAlignment.bam";
            string    outputfilePath = "BAMTests123.bam";
            BAMParser parser         = null;

            try
            {
                parser = new BAMParser();
                BAMFormatter         formatter    = new BAMFormatter();
                SequenceAlignmentMap alignmentMap = parser.Parse(filePath);

                Assert.IsTrue(alignmentMap != null);
                Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1);
                Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1);
                Assert.AreEqual(alignmentMap.QuerySequences.Count, 2);

                formatter.Format(alignmentMap, outputfilePath);

                formatter.CreateSortedBAMFile = true;
                formatter.CreateIndexFile     = true;
                alignmentMap = parser.Parse(filePath);
                formatter.Format(alignmentMap, outputfilePath);

                Assert.IsTrue(File.Exists("BAMTests123.bam.bai"));

                alignmentMap = parser.Parse(outputfilePath);

                Assert.IsTrue(alignmentMap != null);
                Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1);
                Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1);
                Assert.AreEqual(alignmentMap.QuerySequences.Count, 2);

                alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, int.MaxValue));

                Assert.IsTrue(alignmentMap != null);
                Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1);
                Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1);
                Assert.AreEqual(alignmentMap.QuerySequences.Count, 2);

                alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, 28833));

                Assert.IsTrue(alignmentMap != null);
                Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1);
                Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1);
                Assert.AreEqual(alignmentMap.QuerySequences.Count, 1);
            }
            finally
            {
                if (parser != null)
                {
                    parser.Dispose();
                }
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// Creates BAMIndex object from the specified BAM file and writes to specified BAMIndex file.
        /// </summary>
        /// <param name="compressedBAMStream"></param>
        /// <param name="indexFile"></param>
        private static void CreateIndexFile(Stream compressedBAMStream, BAMIndexFile indexFile)
        {
            BAMParser parser = new BAMParser();
            BAMIndex  bamIndex;

            try
            {
                bamIndex = parser.GetIndexFromBAMFile(compressedBAMStream);
            }
            finally
            {
                parser.Dispose();
            }

            parser = null;

            indexFile.Write(bamIndex);
        }
Exemplo n.º 3
0
        public void TestParser()
        {
            string    filePath = @"TestUtils\BAM\SeqAlignment.bam";
            BAMParser parser   = null;

            try
            {
                parser = new BAMParser();
                SequenceAlignmentMap alignmentMap = parser.Parse(filePath);
                Assert.IsTrue(alignmentMap != null);
                Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1);
                Assert.AreEqual(alignmentMap.QuerySequences.Count, 2);
            }
            finally
            {
                if (parser != null)
                {
                    parser.Dispose();
                }
            }
        }
Exemplo n.º 4
0
        public void TestFormatterWithSort()
        {
            string    inputFilePath   = @"TestUtils\BAM\SeqAlignment.bam".TestDir();
            string    outputFilePath1 = "output1.bam";
            string    outputFilePath2 = "output2.bam";
            BAMParser parser          = null;

            try
            {
                parser = new BAMParser();
                BAMFormatter         formatter    = new BAMFormatter();
                SequenceAlignmentMap alignmentMap = parser.ParseOne <SequenceAlignmentMap>(inputFilePath);

                Assert.IsTrue(alignmentMap != null);
                Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1);
                Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1);
                Assert.AreEqual(alignmentMap.QuerySequences.Count, 2);

                formatter.CreateSortedBAMFile = true;
                formatter.SortType            = BAMSortByFields.ChromosomeCoordinates;
                formatter.Format(alignmentMap, outputFilePath1);

                alignmentMap = parser.ParseOne <SequenceAlignmentMap>(inputFilePath);
                formatter.Format(alignmentMap, outputFilePath2);

                Assert.IsTrue(File.Exists(outputFilePath1));
                Assert.IsTrue(File.Exists(outputFilePath2));

                Assert.AreEqual(true, FileCompare(outputFilePath1, outputFilePath2));
            }
            finally
            {
                if (parser != null)
                {
                    parser.Dispose();
                }
                File.Delete(outputFilePath1);
                File.Delete(outputFilePath2);
            }
        }
Exemplo n.º 5
0
        /// <summary>
        /// Displays pending data and closes all streams.
        ///
        /// </summary>
        private void Close()
        {
            if (writer != null)
            {
                writer.Close();
            }

            if (bamCompressedOutStream != null)
            {
                bamCompressedOutStream.Close();
                bamCompressedOutStream = null;
            }

            if (bamUncompressedOutStream != null)
            {
                bamUncompressedOutStream.Close();
                bamUncompressedOutStream = null;
            }

            if (string.IsNullOrEmpty(uncompressedTempfile) && File.Exists(uncompressedTempfile))
            {
                File.Delete(uncompressedTempfile);
            }

            if (string.IsNullOrEmpty(compressedTempfile) && File.Exists(compressedTempfile))
            {
                File.Delete(compressedTempfile);
            }

            bamformatter = null;
            if (bamparser != null)
            {
                bamparser.Dispose();
                bamparser = null;
            }
        }
Exemplo n.º 6
0
        public void TestGettingPairedReads()
        {
            string    bamfilePath = @"TestUtils\BAM\SeqAlignment.bam";
            BAMParser parser      = null;

            try
            {
                parser = new BAMParser();
                SequenceAlignmentMap alignmentMap = parser.Parse(bamfilePath);
                Assert.IsTrue(alignmentMap != null);
                IList <PairedRead> pairedReads = alignmentMap.GetPairedReads();
                Assert.IsTrue(pairedReads.Count > 0);

                pairedReads = alignmentMap.GetPairedReads(250, 50);
                Assert.IsTrue(pairedReads.Count > 0);
            }
            finally
            {
                if (parser != null)
                {
                    parser.Dispose();
                }
            }
        }
Exemplo n.º 7
0
        /// <summary>
        /// Displays pending data and closes all streams.
        ///
        /// </summary>
        private void Close()
        {
            if (_writer != null)
            {
                _writer.Close();
            }

            if (_bamCompressedOutStream != null)
            {
                _bamCompressedOutStream.Close();
                _bamCompressedOutStream = null;
            }

            if (_bamUncompressedOutStream != null)
            {
                _bamUncompressedOutStream.Close();
                _bamUncompressedOutStream = null;
            }

            if (string.IsNullOrEmpty(_uncompressedTempfile) && File.Exists(_uncompressedTempfile))
            {
                File.Delete(_uncompressedTempfile);
            }

            if (string.IsNullOrEmpty(_compressedTempfile) && File.Exists(_compressedTempfile))
            {
                File.Delete(_compressedTempfile);
            }

            _bamformatter = null;
            if (_bamparser != null)
            {
                _bamparser.Dispose();
                _bamparser = null;
            }
        }
Exemplo n.º 8
0
        /// <summary>
        /// Run ConPADE on each contig of the input BAM file.
        /// </summary>
        /// <param name="bamName">Name of the input BAM file.</param>
        public void RunFile(string bamName)
        {
            // Current implementation requires that minimum ploidy be 1
            int min_ploidy         = 1;
            int number_of_ploidies = max_ploidy - min_ploidy + 1;

            // Set nucleotide proportions (genotypes)
            double[][][] nuc_props = Nuc_Props(min_ploidy, number_of_ploidies);

            // Set dosage probabilities
            double SNP_density = (double)1 / snpDens;
            double no_SNP_prob = Math.Log((1 - SNP_density) / 2);

            double[][] dose_probs = Dose_Probs(min_ploidy, number_of_ploidies, SNP_density, no_SNP_prob);

            // Set HiSeq error model
            double[, , , ,] log_probs = Error_Probs();

            // Set substitution model
            double[, ,] log_subst_probs = Subst_Probs();

            // Set SNP calling probability
            double log_SNP_thres = SNPthres * Math.Log(10) / -10;

            Stopwatch clock = new Stopwatch();

            Console.WriteLine("Program started at {0}\n", DateTime.Now);

            Stream             bam_stream = new FileStream(bamName, FileMode.Open, FileAccess.Read);
            BAMParser          parser     = new BAMParser();
            SAMAlignmentHeader header     = parser.GetHeader(bam_stream);
            string             temp       = Path.GetFileNameWithoutExtension(bamName);

            // Find first valid alignment in BAM file
            SAMAlignedSequence next_alignment = parser.GetAlignedSequence(true);

            while (next_alignment == null || next_alignment.RName == "*" || next_alignment.IsDummyRead)
            {
                next_alignment = parser.GetAlignedSequence(true);
            }

            TextWriter writer_log_like = null;
            TextWriter writer_SNP      = null;
            TextWriter writer_ploidy   = null;
            TextWriter writer_reads    = null;

            // Create global output files and write headers.
            if (!splitContigs)
            {
                string SNP_file = temp + "_SNP.txt";
                writer_SNP = new StreamWriter(SNP_file);
                writer_SNP.WriteLine("Contig\tPosition\tAlleles\tCounts\tDosage\tPhredQuality");

                string ploidy_file = temp + "_ploidy.txt";
                writer_ploidy = new StreamWriter(ploidy_file);
                writer_ploidy.Write("Contig\tBestPloidy");
                for (int i = 0; i < number_of_ploidies; i++)
                {
                    writer_ploidy.Write("\tlogLike_M{0}", i + min_ploidy);
                }
                writer_ploidy.WriteLine("");

                string reads_file = temp + "_readStats.txt";
                writer_reads = new StreamWriter(reads_file);
                writer_reads.WriteLine("Contig\tAlignedReads\tAlignedBases\tUsedReads\tUsedBases");
            }

            // Run over each contig in input BAM file.
            int contig_ind = -1;

            while (next_alignment != null && next_alignment.RName != "*" && !next_alignment.IsDummyRead)
            {
                string contig_name = next_alignment.RName;

                Console.WriteLine("Started contig {0} at {1}",
                                  contig_name, DateTime.Now);

                clock.Restart();

                #region Variables and file handles for current contig
                long number_of_aligned_reads      = 0;
                long number_of_aligned_base_pairs = 0;
                long number_of_used_reads         = 0;
                long number_of_used_base_pairs    = 0;

                // Create individual output files for the current contig.
                if (splitContigs)
                {
                    string name          = temp + "_" + contig_name;
                    string log_like_file = name + "_log_likelihoods.txt";
                    writer_log_like = new StreamWriter(log_like_file);

                    string SNP_file = name + "_SNP.txt";
                    writer_SNP = new StreamWriter(SNP_file);

                    string ploidy_file = name + "_ploidy.txt";
                    writer_ploidy = new StreamWriter(ploidy_file);

                    string reads_file = name + "_readStats.txt";
                    writer_reads = new StreamWriter(reads_file);
                }

                double[] global_log_like = new double[number_of_ploidies];

                while (header.ReferenceSequences[++contig_ind].Name != contig_name)
                {
                    ;
                }
                long contig_length = header.ReferenceSequences[contig_ind].Length;

                // Create a queue to include all reads that overlap with a given position.
                Queue <Padded_Read> read_queue = new Queue <Padded_Read>();

                // Create a queue to include best doses for each tested position.
                Queue <Best_Dose> dose_queue = new Queue <Best_Dose>((int)contig_length);
                #endregion Variables and file handles  for current contig

                int  positions_to_compute = 0;
                long current_position     = 0;

                #region Run over every position in contig
                while (current_position < contig_length)
                {
                    if ((current_position % 1000000) == 0 && current_position != 0)
                    {
                        Console.WriteLine("At position {0} of {1}", current_position + 1, contig_length);
                    }

                    // Search for reads starting at current position.
                    Search_Reads(parser, ref next_alignment, contig_name, ref number_of_aligned_reads,
                                 ref number_of_aligned_base_pairs, ref number_of_used_reads, ref number_of_used_base_pairs,
                                 read_queue, current_position);

                    if (read_queue.Count > 0)
                    {
                        positions_to_compute++;

                        // Extract information from each read in queue.
                        byte[] obs_nucs;
                        byte[] is_GG;
                        bool[] reverse;
                        int[]  quality_scores;
                        int[]  neigh_quality_scores;
                        int[]  scores;
                        int[]  counts;
                        int    k;

                        Extract_Read_Info(read_queue, current_position, out obs_nucs, out is_GG, out reverse,
                                          out quality_scores, out neigh_quality_scores, out scores, out counts, out k);

                        // Find two most abundant nucleotides for this position.
                        byte nuc_one;
                        byte nuc_two;
                        Get_Two_Nucs(scores, out nuc_one, out nuc_two);

                        // Calculate Pr(obs|allele1) and Pr(obs|allele2).
                        double[][] log_nuc_probs = Obs_Probs(log_probs, log_subst_probs, obs_nucs, is_GG, reverse,
                                                             quality_scores, neigh_quality_scores, counts, k, nuc_one, nuc_two);

                        // Calculate log_likelihoods of genotypes for current position.
                        double[][] log_likelihoods = Log_Likelihoods(min_ploidy, max_ploidy, log_nuc_probs, nuc_props);

                        // Calculate log_likelihood of each ploidy and keep most likely allele dosage.
                        Global_Likelihood_Keep_Dose(min_ploidy, number_of_ploidies, dose_probs, global_log_like,
                                                    dose_queue, current_position, counts, nuc_one, nuc_two, log_likelihoods);
                    }

                    // Remove finished reads from queue. Finished reads no longer overlap with current position.
                    Padded_Read read_to_remove;
                    if (read_queue.Count > 0)
                    {
                        read_to_remove = read_queue.First();
                    }
                    else
                    {
                        read_to_remove = null;
                    }

                    while (read_to_remove != null &&
                           (read_to_remove.alignment.Pos + read_to_remove.alignment_length - 2) < current_position)
                    {
                        read_queue.Dequeue();
                        if (read_queue.Count > 0)
                        {
                            read_to_remove = read_queue.First();
                        }
                        else
                        {
                            read_to_remove = null;
                        }
                    }

                    ++current_position;
                }
                #endregion Run over every position in contig

                // Output log_likelihoods.
                int best_log_like = 0;
                for (int i = 0; i < number_of_ploidies; i++)
                {
                    if (global_log_like[i] > global_log_like[best_log_like])
                    {
                        best_log_like = i;
                    }

                    if (splitContigs)
                    {
                        writer_log_like.WriteLine("Ploidy {0} - log_likelihood {1}", i + min_ploidy, global_log_like[i]);
                    }
                }

                // Output most likely ploidy.
                int best_ploidy = best_log_like + min_ploidy;
                if (splitContigs)
                {
                    writer_ploidy.WriteLine(best_ploidy);
                }
                else
                {
                    writer_ploidy.Write("{0}\t{1}", contig_name, best_ploidy);
                    for (int i = 0; i < number_of_ploidies; i++)
                    {
                        writer_ploidy.Write("\t{0}", global_log_like[i]);
                    }
                    writer_ploidy.WriteLine("");
                }

                // Output SNPs.
                if (splitContigs)
                {
                    writer_SNP.WriteLine("Position\tAlleles\tCounts\tDosage\tPhredQuality");
                }
                char[] nuc_chars = new char[4] {
                    'A', 'C', 'G', 'T'
                };
                foreach (Best_Dose cur_doses in dose_queue)
                {
                    double cur_SNP_posterior = cur_doses.SNP_posterior[best_log_like];
                    if (cur_SNP_posterior <= log_SNP_thres)
                    {
                        int cur_best_dose = cur_doses.best_dose[best_log_like];
                        if (cur_best_dose != best_ploidy && cur_best_dose != 0)
                        {
                            if (splitContigs)
                            {
                                writer_SNP.WriteLine("{0}\t{1}|{2}\t{3}|{4}\t{5}\t{6}", cur_doses.position + 1,
                                                     nuc_chars[cur_doses.nuc_one], nuc_chars[cur_doses.nuc_two], cur_doses.count_one,
                                                     cur_doses.count_two, cur_best_dose, -10 * cur_SNP_posterior / Math.Log(10));
                            }
                            else
                            {
                                writer_SNP.WriteLine("{0}\t{1}\t{2}|{3}\t{4}|{5}\t{6}\t{7}", contig_name,
                                                     cur_doses.position + 1, nuc_chars[cur_doses.nuc_one], nuc_chars[cur_doses.nuc_two],
                                                     cur_doses.count_one, cur_doses.count_two, cur_best_dose,
                                                     -10 * cur_SNP_posterior / Math.Log(10));
                            }
                        }
                    }
                }

                // Output read statistics.
                if (splitContigs)
                {
                    writer_reads.WriteLine("\nNumber of aligned reads: {0}", number_of_aligned_reads);
                    writer_reads.WriteLine("Number of aligned base pairs: {0}", number_of_aligned_base_pairs);
                    writer_reads.WriteLine("\nNumber of used reads: {0}", number_of_used_reads);
                    writer_reads.WriteLine("Number of used base pairs: {0}", number_of_used_base_pairs);
                }
                else
                {
                    writer_reads.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", contig_name, number_of_aligned_reads,
                                           number_of_aligned_base_pairs, number_of_used_reads, number_of_used_base_pairs);
                }

                if (splitContigs)
                {
                    writer_log_like.Close();
                    writer_SNP.Close();
                    writer_ploidy.Close();
                    writer_reads.Close();
                }

                clock.Stop();
                Console.WriteLine("Time to run contig: {0} s\n", (double)clock.ElapsedMilliseconds / 1000);
            }

            if (!splitContigs)
            {
                writer_SNP.Close();
                writer_ploidy.Close();
                writer_reads.Close();
            }

            parser.Dispose();
            Console.WriteLine("Finished at {0}\n", DateTime.Now);
        }
Exemplo n.º 9
0
        /// <summary>
        /// Validate different paired read types
        /// </summary>
        /// <param name="nodeName">XML node name</param>
        /// <param name="pams">GetPairedReadTypes method parameters</param>
        void ValidatePairedReadTypes(string nodeName, GetPairedReadTypeParameters pams)
        {
            // Get input and output values from xml node.
            string bamFilePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                 Constants.FilePathNode);
            string mean = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.MeanNode);
            string deviation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.DeviationValueNode);
            string library = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.LibraryNameNode);

            string[] pairedReadType = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.PairedReadTypeNode).Split(',');
            string[] insertLength = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.InsertLengthNode).Split(',');

            IList <PairedRead>      pairedReads        = null;
            BAMParser               bamParser          = new BAMParser();
            SequenceAlignmentMap    seqAlignmentMapObj = bamParser.Parse(bamFilePath);
            CloneLibraryInformation libraryInfo;
            int i = 0;

            try
            {
                switch (pams)
                {
                case GetPairedReadTypeParameters.PaireReadTypeUsingLibraryName:
                    pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                                    float.Parse(deviation, (IFormatProvider)null));
                    foreach (PairedRead read in pairedReads)
                    {
                        PairedReadType type = PairedRead.GetPairedReadType(read, library);
                        Assert.AreEqual(type.ToString(), pairedReadType[i]);
                        i++;
                    }
                    break;

                case GetPairedReadTypeParameters.PaireReadTypeUsingCloneLibraryInfo:
                    pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                                    float.Parse(deviation, (IFormatProvider)null));
                    libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library);
                    foreach (PairedRead read in pairedReads)
                    {
                        PairedReadType type = PairedRead.GetPairedReadType(read, libraryInfo);
                        Assert.AreEqual(type.ToString(), pairedReadType[i]);
                        i++;
                    }
                    break;

                case GetPairedReadTypeParameters.PaireReadTypeUsingMeanAndDeviation:
                    pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                                    float.Parse(deviation, (IFormatProvider)null));
                    foreach (PairedRead read in pairedReads)
                    {
                        PairedReadType type = PairedRead.GetPairedReadType(read, float.Parse(mean, (IFormatProvider)null),
                                                                           float.Parse(deviation, (IFormatProvider)null));
                        Assert.AreEqual(type.ToString(), pairedReadType[i]);
                        i++;
                    }
                    break;

                case GetPairedReadTypeParameters.PaireReadTypeUsingReadsAndLibrary:
                    pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                                    float.Parse(deviation, (IFormatProvider)null));
                    foreach (PairedRead read in pairedReads)
                    {
                        PairedReadType type = PairedRead.GetPairedReadType(read.Read1,
                                                                           read.Read2, library);
                        Assert.AreEqual(type.ToString(), pairedReadType[i]);
                        i++;
                    }
                    break;

                case GetPairedReadTypeParameters.PaireReadTypeUsingReadsAndLibraryInfo:
                    pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                                    float.Parse(deviation, (IFormatProvider)null));
                    libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library);
                    foreach (PairedRead read in pairedReads)
                    {
                        PairedReadType type = PairedRead.GetPairedReadType(read.Read1,
                                                                           read.Read2, libraryInfo);
                        Assert.AreEqual(type.ToString(), pairedReadType[i]);
                        i++;
                    }
                    break;

                case GetPairedReadTypeParameters.GetInsertLength:
                    pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                                    float.Parse(deviation, (IFormatProvider)null));
                    libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library);
                    foreach (PairedRead read in pairedReads)
                    {
                        int length = PairedRead.GetInsertLength(read.Read1, read.Read2);
                        Assert.AreEqual(length.ToString((IFormatProvider)null), insertLength[i]);
                        i++;
                    }
                    break;
                }
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "BAM Parser BVT : Validated Paired read Type Successfully"));
            }

            finally
            {
                bamParser.Dispose();
            }
        }
Exemplo n.º 10
0
        /// <summary>
        /// Validate GetPaired method
        /// </summary>
        /// <param name="nodeName">XML node name</param>
        /// <param name="pams">GetPairedReads method parameters</param>
        void ValidatePairedReads(string nodeName, GetPairedReadParameters pams)
        {
            // Get input and output values from xml node.
            string bamFilePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                 Constants.FilePathNode);
            string expectedAlignedSeqFilePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequence);
            string mean = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.MeanNode);
            string deviation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.DeviationValueNode);
            string library = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.LibraryNameNode);
            string pairedReadsCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.PairedReadsNode);

            string[] insertLength = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.InsertLengthNode).Split(',');
            string[] pairedReadType = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.PairedReadTypeNode).Split(',');

            SequenceAlignmentMap seqAlignment = null;
            IList <PairedRead>   pairedReads  = null;
            BAMParser            bamParser    = new BAMParser();
            FastAParser          parserObj    = new FastAParser(expectedAlignedSeqFilePath);

            try
            {
                seqAlignment = bamParser.Parse(bamFilePath);
                IEnumerable <ISequence> expectedSequences = parserObj.Parse();

                switch (pams)
                {
                case GetPairedReadParameters.GetPairedReadWithParameters:
                    pairedReads = seqAlignment.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                              float.Parse(deviation, (IFormatProvider)null));
                    break;

                case GetPairedReadParameters.GetPairedReadWithLibraryName:
                    pairedReads = seqAlignment.GetPairedReads(library);
                    break;

                case GetPairedReadParameters.GetPairedReadWithCloneLibraryInfo:
                    CloneLibraryInformation libraryInfo =
                        CloneLibrary.Instance.GetLibraryInformation(library);
                    pairedReads = seqAlignment.GetPairedReads(libraryInfo);
                    break;

                case GetPairedReadParameters.Default:
                    pairedReads = seqAlignment.GetPairedReads();
                    break;
                }

                Assert.AreEqual(pairedReadsCount, pairedReads.Count.ToString((IFormatProvider)null));

                int i = 0;
                foreach (PairedRead read in pairedReads)
                {
                    Assert.AreEqual(insertLength[i], read.InsertLength.ToString((IFormatProvider)null));
                    Assert.AreEqual(pairedReadType[i], read.PairedType.ToString());

                    foreach (SAMAlignedSequence seq in read.Reads)
                    {
                        Assert.AreEqual(new string(expectedSequences.ElementAt(i).Select(a => (char)a).ToArray()),
                                        new string(seq.QuerySequence.Select(a => (char)a).ToArray()));

                        // Log to NUNIT GUI.
                        ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                               "BAM Parser BVT : Validated Paired read :{0} successfully",
                                                               seq.QuerySequence.ToString()));
                    }
                    i++;
                }
            }

            finally
            {
                bamParser.Dispose();
            }
        }
Exemplo n.º 11
0
        /// <summary>
        /// Parse BAM and validate parsed aligned sequences and its properties.
        /// </summary>
        /// <param name="nodeName">Different xml nodes used for different test cases</param>
        /// <param name="BAMParserPam">BAM Parse method parameters</param>
        /// <param name="IsEncoding">True for BAMParser ctor with encoding.
        /// False otherwise </param>
        void ValidateBAMParser(string nodeName,
                               BAMParserParameters BAMParserPam,
                               bool IsReferenceIndex)
        {
            // Get input and output values from xml node.
            string bamFilePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                 Constants.FilePathNode);
            string expectedAlignedSeqFilePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequence);
            string refIndexValue = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.RefIndexNode);
            string startIndexValue = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.StartIndexNode);
            string endIndexValue = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.EndIndexNode);
            string alignedSeqCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.AlignedSeqCountNode);
            string refSeqName = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ChromosomeNameNode);

            SequenceAlignmentMap seqAlignment = null;
            BAMParser            bamParser    = null;

            try
            {
                bamParser = new BAMParser();

                // Parse a BAM file with different parameters.
                switch (BAMParserPam)
                {
                case BAMParserParameters.StreamReader:
                    using (Stream stream = new FileStream(bamFilePath, FileMode.Open,
                                                          FileAccess.Read))
                    {
                        seqAlignment = bamParser.Parse(stream);
                    }
                    break;

                case BAMParserParameters.FileName:
                    seqAlignment = bamParser.Parse(bamFilePath);
                    break;

                case BAMParserParameters.ParseRangeFileName:
                    seqAlignment = bamParser.ParseRange(bamFilePath,
                                                        Convert.ToInt32(refIndexValue, (IFormatProvider)null));
                    break;

                case BAMParserParameters.ParseRangeWithIndex:
                    seqAlignment = bamParser.ParseRange(bamFilePath,
                                                        Convert.ToInt32(refIndexValue, (IFormatProvider)null),
                                                        Convert.ToInt32(startIndexValue, (IFormatProvider)null),
                                                        Convert.ToInt32(endIndexValue, (IFormatProvider)null));
                    break;

                case BAMParserParameters.ParseRangeUsingRefSeq:
                    seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName);
                    break;

                case BAMParserParameters.ParseRangeUsingRefSeqAndFlag:
                    seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName);
                    break;

                case BAMParserParameters.ParseRangeUsingRefSeqUsingIndex:
                    seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName,
                                                        Convert.ToInt32(startIndexValue, (IFormatProvider)null),
                                                        Convert.ToInt32(endIndexValue, (IFormatProvider)null));
                    break;

                case BAMParserParameters.ParseRangeUsingIndexesAndFlag:
                    seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName,
                                                        Convert.ToInt32(startIndexValue, (IFormatProvider)null),
                                                        Convert.ToInt32(endIndexValue, (IFormatProvider)null));
                    break;
                }


                // Validate BAM Header record fileds.
                if (!IsReferenceIndex)
                {
                    ValidateBAMHeaderRecords(nodeName, seqAlignment);
                }

                IList <SAMAlignedSequence> alignedSeqs = seqAlignment.QuerySequences;

                Assert.AreEqual(alignedSeqCount, alignedSeqs.Count.ToString((IFormatProvider)null));

                // Get expected sequences
                using (FastAParser parserObj = new FastAParser(expectedAlignedSeqFilePath))
                {
                    IEnumerable <ISequence> expectedSequences = parserObj.Parse();

                    IList <ISequence> expectedSequencesList = expectedSequences.ToList();

                    // Validate aligned sequences from BAM file.
                    for (int index = 0; index < alignedSeqs.Count; index++)
                    {
                        Assert.AreEqual(
                            new string(expectedSequencesList[index].Select(a => (char)a).ToArray()),
                            new string(alignedSeqs[index].QuerySequence.Select(a => (char)a).ToArray()));

                        // Log to NUNIT GUI.
                        ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                               "BAM Parser BVT : Validated Aligned sequence :{0} successfully",
                                                               alignedSeqs[index].QuerySequence.ToString()));
                        Console.WriteLine(string.Format((IFormatProvider)null,
                                                        "BAM Parser BVT : Validated the aligned sequence :{0} successfully",
                                                        alignedSeqs[index].QuerySequence.ToString()));
                    }
                }
            }
            finally
            {
                bamParser.Dispose();
            }
        }