Example #1
0
        public void ImportTestWithSAM()
        {
            Import options = new Import();

            options.FilePath = new string[2];
            string tempFilename = Path.GetTempFileName();

            options.FilePath[0] = tempFilename;
            options.FilePath[1] = @"TestUtils\SAM\SeqAlignment.bam";
            options.DoImport();

            using (BAMParser parser = new BAMParser())
            {
                SequenceAlignmentMap map = parser.Parse(@"TestUtils\SAM\SeqAlignment.bam");
                using (SAMParser parse = new SAMParser())
                {
                    SequenceAlignmentMap map1 = parse.Parse(tempFilename);
                    Assert.IsTrue(CompareSAM(map, map1));
                }
            }

            File.Delete(tempFilename);
        }
Example #2
0
        public void TestGettingPairedReads()
        {
            string    bamfilePath = @"TestUtils\BAM\SeqAlignment.bam";
            BAMParser parser      = null;

            try
            {
                parser = new BAMParser();
                SequenceAlignmentMap alignmentMap = parser.Parse(bamfilePath);
                Assert.IsTrue(alignmentMap != null);
                IList <PairedRead> pairedReads = alignmentMap.GetPairedReads();
                Assert.IsTrue(pairedReads.Count > 0);

                pairedReads = alignmentMap.GetPairedReads(250, 50);
                Assert.IsTrue(pairedReads.Count > 0);
            }
            finally
            {
                if (parser != null)
                {
                    parser.Dispose();
                }
            }
        }
Example #3
0
        /// <summary>
        /// Validate formatted BAM file.
        /// </summary>
        /// <param name="nodeName">Different xml nodes used for different test cases</param>
        /// <param name="BAMParserPam">BAM Format method parameters</param>
        void ValidateBAMFormatter(string nodeName,
                                  BAMParserParameters BAMParserPam)
        {
            // Get input and output values from xml node.
            string bamFilePath = _utilityObj._xmlUtil.GetTextValue(nodeName,
                                                                   Constants.FilePathNode);
            string expectedAlignedSeqFilePath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequence);
            string alignedSeqCount = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.AlignedSeqCountNode);

            Stream stream = null;
            SequenceAlignmentMap seqAlignment = null;

            using (BAMParser bamParserObj = new BAMParser())
            {
                // Parse a BAM file.
                seqAlignment = bamParserObj.Parse(bamFilePath);

                // Create a BAM formatter object.
                BAMFormatter formatterObj = new BAMFormatter();

                // Write/Format aligned sequences to BAM file.
                switch (BAMParserPam)
                {
                case BAMParserParameters.StreamWriter:
                    using (stream = new
                                    FileStream(Constants.BAMTempFileName,
                                               FileMode.Create, FileAccess.Write))
                    {
                        formatterObj.Format(seqAlignment, stream);
                    }
                    break;

                case BAMParserParameters.FileName:
                    formatterObj.Format(seqAlignment, Constants.BAMTempFileName);
                    break;

                case BAMParserParameters.IndexFile:
                    formatterObj.Format(seqAlignment, Constants.BAMTempFileName,
                                        Constants.BAMTempIndexFile);
                    File.Exists(Constants.BAMTempIndexFile);
                    break;

                default:
                    break;
                }

                // Parse formatted BAM file and validate aligned sequences.
                SequenceAlignmentMap expectedSeqAlignmentMap = bamParserObj.Parse(
                    Constants.BAMTempFileName);


                // Validate Parsed BAM file Header record fileds.
                ValidateBAMHeaderRecords(nodeName, expectedSeqAlignmentMap);

                IList <SAMAlignedSequence> alignedSeqs = expectedSeqAlignmentMap.QuerySequences;

                Assert.AreEqual(alignedSeqCount, alignedSeqs.Count.ToString((IFormatProvider)null));

                // Get expected sequences
                using (FastaParser parserObj = new FastaParser())
                {
                    IList <ISequence> expectedSequences = parserObj.Parse(expectedAlignedSeqFilePath);

                    // Validate aligned sequences from BAM file.
                    for (int index = 0; index < alignedSeqs.Count; index++)
                    {
                        Assert.AreEqual(expectedSequences[index].ToString(),
                                        alignedSeqs[index].QuerySequence.ToString());

                        // Log to NUNIT GUI.
                        ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                               "BAM Formatter BVT : Validated Aligned sequence :{0} successfully",
                                                               alignedSeqs[index].QuerySequence.ToString()));
                        Console.WriteLine(string.Format((IFormatProvider)null,
                                                        "BAM Formatter BVT : Validated the aligned sequence :{0} successfully",
                                                        alignedSeqs[index].QuerySequence.ToString()));
                    }
                }
            }
            File.Delete(Constants.BAMTempFileName);
            File.Delete(Constants.BAMTempIndexFile);
        }
Example #4
0
        /// <summary>
        /// Merge multiple sorted alignments.
        /// SAMUtil.exe out.bam in1.bam in2.bam
        /// </summary>
        public void DoMerge()
        {
            if (FilePaths == null)
            {
                throw new InvalidOperationException("FilePath");
            }

            if (FilePaths.Length < 3)
            {
                throw new InvalidOperationException(Resources.MergeHelp);
            }

            IList <IList <BAMSortedIndex> > sortedIndexes         = new List <IList <BAMSortedIndex> >();
            IList <SequenceAlignmentMap>    sequenceAlignmentMaps = new List <SequenceAlignmentMap>();
            IList <int> help = new List <int>();

            Parallel.For(1, FilePaths.Length, (int index) =>
            {
                IList <BAMSortedIndex> sortedIndex;
                BAMParser parser = new BAMParser();;
                SequenceAlignmentMap map;
                if (index == 1)
                {
                    try
                    {
                        map = parser.Parse(FilePaths[1]);
                    }
                    catch
                    {
                        throw new InvalidOperationException(Resources.InvalidBAMFile);
                    }

                    if (map == null)
                    {
                        throw new InvalidOperationException(Resources.EmptyFile);
                    }

                    if (string.IsNullOrEmpty(HeaderFile) && map.Header.RecordFields.Count == 0)
                    {
                        throw new InvalidOperationException(Resources.HeaderMissing);
                    }

                    if (!string.IsNullOrEmpty(HeaderFile))
                    {
                        SAMParser parse = new SAMParser();
                        SequenceAlignmentMap head;
                        try
                        {
                            head = parse.Parse(HeaderFile);
                        }
                        catch
                        {
                            throw new InvalidOperationException(Resources.IncorrectHeaderFile);
                        }

                        if (head == null)
                        {
                            throw new InvalidOperationException(Resources.EmptyFile);
                        }

                        _header = head.Header;
                    }
                    else
                    {
                        _header = map.Header;
                    }

                    sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates);
                }
                else
                {
                    try
                    {
                        map = parser.Parse(FilePaths[index]);
                    }
                    catch
                    {
                        throw new InvalidOperationException(Resources.InvalidBAMFile);
                    }

                    if (map == null)
                    {
                        throw new InvalidOperationException(Resources.EmptyFile);
                    }

                    sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates);
                }

                lock (sortedIndexes)
                {
                    sortedIndexes.Add(sortedIndex);
                    sequenceAlignmentMaps.Add(map);
                }
            });

            string filePath = Path.GetTempFileName();

            using (FileStream fstemp = new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite))
            {
                BAMFormatter formatter = new BAMFormatter();
                formatter.WriteHeader(_header, fstemp);
                int[] indexes = new int[sortedIndexes.Count];

                if (SortByReadName)
                {
                    IList <BAMSortedIndex> sortedIndex = sortedIndexes.Select(a => a.First()).ToList();
                    WriteMergeFileSortedByReadName(sortedIndex, fstemp, formatter, sequenceAlignmentMaps);
                }
                else
                {
                    WriteMergeFile(sortedIndexes, fstemp, formatter, sequenceAlignmentMaps);
                }

                using (FileStream fsoutput = new FileStream(FilePaths[0], FileMode.Create, FileAccess.Write))
                {
                    fstemp.Seek(0, SeekOrigin.Begin);
                    formatter.CompressBAMFile(fstemp, fsoutput);
                }
            }

            File.Delete(filePath);
        }
Example #5
0
        /// <summary>
        /// Indentify hot spot chromosomes for length anamoly regions.
        /// </summary>
        /// <param name="inputFile"> Input file</param>
        /// <param name="mean">Mean value</param>
        /// <param name="standardDeviation">Standard deviation</param>
        private void IdentifyLentghAnamolies(string filename,
                                             float mean = -1, float deviation = -1)
        {
            bool calculateMeanNdeviation = false;

            if (mean == -1 || deviation == -1)
            {
                calculateMeanNdeviation = true;
            }

            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.Parse(filename);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.Parse(filename);
            }

            // get reads from sequence alignment map object.
            IList <PairedRead> pairedReads = null;

            if (calculateMeanNdeviation)
            {
                pairedReads = alignmentMapobj.GetPairedReads();
            }
            else
            {
                pairedReads = alignmentMapobj.GetPairedReads(mean, deviation);
            }

            // Get the orphan regions.
            var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan);


            if (orphans.Count() == 0)
            {
                Console.WriteLine("No Orphans to display");
            }

            List <ISequenceRange> orphanRegions = new List <ISequenceRange>(orphans.Count());

            foreach (PairedRead orphanRead in orphans)
            {
                orphanRegions.Add(GetRegion(orphanRead.Read1));
            }

            // Get sequence range grouping for Orphan regions.
            SequenceRangeGrouping orphanRangegroup = new SequenceRangeGrouping(orphanRegions);

            // Get the Length anomalies regions.
            var lengthAnomalies = pairedReads.Where(PE => PE.PairedType == PairedReadType.LengthAnomaly);

            if (lengthAnomalies.Count() == 0)
            {
                Console.WriteLine("No Anomalies to display");
            }

            List <ISequenceRange> lengthAnomalyRegions = new List <ISequenceRange>(lengthAnomalies.Count());

            foreach (PairedRead laRead in lengthAnomalies)
            {
                SequenceRange range = new SequenceRange();
                range.ID    = laRead.Read1.RName;
                range.Start = laRead.Read1.Pos;
                range.End   = laRead.Read1.Pos + laRead.InsertLength;
                lengthAnomalyRegions.Add(range);
            }

            // Get sequence range grouping for length anomaly regions.
            SequenceRangeGrouping lengthAnomalyRangegroup =
                new SequenceRangeGrouping(lengthAnomalyRegions);

            if (lengthAnomalyRangegroup.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Length anomalies reads to display");
            }
            else
            {
                Console.Write("Region of length anomaly:");
                DisplaySequenceRange(lengthAnomalyRangegroup);
            }

            if (orphanRangegroup.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Orphan reads to display");
            }
            else
            {
                Console.Write("\r\nRegion of Orphan reads:");
                DisplaySequenceRange(orphanRangegroup);
            }

            SequenceRangeGrouping intersectedRegions =
                lengthAnomalyRangegroup.Intersect(orphanRangegroup);

            if (intersectedRegions.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Hot spots found");
            }
            else
            {
                Console.Write("\r\nChromosomal Hot spot of length anomaly and Orphan region:");
                DisplaySequenceRange(intersectedRegions);
            }
        }
Example #6
0
        /// <summary>
        /// Validate different paired read types
        /// </summary>
        /// <param name="nodeName">XML node name</param>
        /// <param name="pams">GetPairedReadTypes method parameters</param>
        void ValidatePairedReadTypes(string nodeName, GetPairedReadTypeParameters pams)
        {
            // Get input and output values from xml node.
            string bamFilePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                 Constants.FilePathNode);
            string mean = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.MeanNode);
            string deviation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.DeviationValueNode);
            string library = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.LibraryNameNode);

            string[] pairedReadType = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.PairedReadTypeNode).Split(',');
            string[] insertLength = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.InsertLengthNode).Split(',');

            IList <PairedRead>      pairedReads        = null;
            BAMParser               bamParser          = new BAMParser();
            SequenceAlignmentMap    seqAlignmentMapObj = bamParser.Parse(bamFilePath);
            CloneLibraryInformation libraryInfo;
            int i = 0;

            try
            {
                switch (pams)
                {
                case GetPairedReadTypeParameters.PaireReadTypeUsingLibraryName:
                    pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                                    float.Parse(deviation, (IFormatProvider)null));
                    foreach (PairedRead read in pairedReads)
                    {
                        PairedReadType type = PairedRead.GetPairedReadType(read, library);
                        Assert.AreEqual(type.ToString(), pairedReadType[i]);
                        i++;
                    }
                    break;

                case GetPairedReadTypeParameters.PaireReadTypeUsingCloneLibraryInfo:
                    pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                                    float.Parse(deviation, (IFormatProvider)null));
                    libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library);
                    foreach (PairedRead read in pairedReads)
                    {
                        PairedReadType type = PairedRead.GetPairedReadType(read, libraryInfo);
                        Assert.AreEqual(type.ToString(), pairedReadType[i]);
                        i++;
                    }
                    break;

                case GetPairedReadTypeParameters.PaireReadTypeUsingMeanAndDeviation:
                    pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                                    float.Parse(deviation, (IFormatProvider)null));
                    foreach (PairedRead read in pairedReads)
                    {
                        PairedReadType type = PairedRead.GetPairedReadType(read, float.Parse(mean, (IFormatProvider)null),
                                                                           float.Parse(deviation, (IFormatProvider)null));
                        Assert.AreEqual(type.ToString(), pairedReadType[i]);
                        i++;
                    }
                    break;

                case GetPairedReadTypeParameters.PaireReadTypeUsingReadsAndLibrary:
                    pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                                    float.Parse(deviation, (IFormatProvider)null));
                    foreach (PairedRead read in pairedReads)
                    {
                        PairedReadType type = PairedRead.GetPairedReadType(read.Read1,
                                                                           read.Read2, library);
                        Assert.AreEqual(type.ToString(), pairedReadType[i]);
                        i++;
                    }
                    break;

                case GetPairedReadTypeParameters.PaireReadTypeUsingReadsAndLibraryInfo:
                    pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                                    float.Parse(deviation, (IFormatProvider)null));
                    libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library);
                    foreach (PairedRead read in pairedReads)
                    {
                        PairedReadType type = PairedRead.GetPairedReadType(read.Read1,
                                                                           read.Read2, libraryInfo);
                        Assert.AreEqual(type.ToString(), pairedReadType[i]);
                        i++;
                    }
                    break;

                case GetPairedReadTypeParameters.GetInsertLength:
                    pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                                    float.Parse(deviation, (IFormatProvider)null));
                    libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library);
                    foreach (PairedRead read in pairedReads)
                    {
                        int length = PairedRead.GetInsertLength(read.Read1, read.Read2);
                        Assert.AreEqual(length.ToString((IFormatProvider)null), insertLength[i]);
                        i++;
                    }
                    break;
                }
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "BAM Parser BVT : Validated Paired read Type Successfully"));
            }

            finally
            {
                bamParser.Dispose();
            }
        }
Example #7
0
        /// <summary>
        /// Validate GetPaired method
        /// </summary>
        /// <param name="nodeName">XML node name</param>
        /// <param name="pams">GetPairedReads method parameters</param>
        void ValidatePairedReads(string nodeName, GetPairedReadParameters pams)
        {
            // Get input and output values from xml node.
            string bamFilePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                 Constants.FilePathNode);
            string expectedAlignedSeqFilePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequence);
            string mean = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.MeanNode);
            string deviation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.DeviationValueNode);
            string library = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.LibraryNameNode);
            string pairedReadsCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.PairedReadsNode);

            string[] insertLength = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.InsertLengthNode).Split(',');
            string[] pairedReadType = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.PairedReadTypeNode).Split(',');

            SequenceAlignmentMap seqAlignment = null;
            IList <PairedRead>   pairedReads  = null;
            BAMParser            bamParser    = new BAMParser();
            FastAParser          parserObj    = new FastAParser(expectedAlignedSeqFilePath);

            try
            {
                seqAlignment = bamParser.Parse(bamFilePath);
                IEnumerable <ISequence> expectedSequences = parserObj.Parse();

                switch (pams)
                {
                case GetPairedReadParameters.GetPairedReadWithParameters:
                    pairedReads = seqAlignment.GetPairedReads(float.Parse(mean, (IFormatProvider)null),
                                                              float.Parse(deviation, (IFormatProvider)null));
                    break;

                case GetPairedReadParameters.GetPairedReadWithLibraryName:
                    pairedReads = seqAlignment.GetPairedReads(library);
                    break;

                case GetPairedReadParameters.GetPairedReadWithCloneLibraryInfo:
                    CloneLibraryInformation libraryInfo =
                        CloneLibrary.Instance.GetLibraryInformation(library);
                    pairedReads = seqAlignment.GetPairedReads(libraryInfo);
                    break;

                case GetPairedReadParameters.Default:
                    pairedReads = seqAlignment.GetPairedReads();
                    break;
                }

                Assert.AreEqual(pairedReadsCount, pairedReads.Count.ToString((IFormatProvider)null));

                int i = 0;
                foreach (PairedRead read in pairedReads)
                {
                    Assert.AreEqual(insertLength[i], read.InsertLength.ToString((IFormatProvider)null));
                    Assert.AreEqual(pairedReadType[i], read.PairedType.ToString());

                    foreach (SAMAlignedSequence seq in read.Reads)
                    {
                        Assert.AreEqual(new string(expectedSequences.ElementAt(i).Select(a => (char)a).ToArray()),
                                        new string(seq.QuerySequence.Select(a => (char)a).ToArray()));

                        // Log to NUNIT GUI.
                        ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                               "BAM Parser BVT : Validated Paired read :{0} successfully",
                                                               seq.QuerySequence.ToString()));
                    }
                    i++;
                }
            }

            finally
            {
                bamParser.Dispose();
            }
        }
Example #8
0
        /// <summary>
        /// Parse BAM and validate parsed aligned sequences and its properties.
        /// </summary>
        /// <param name="nodeName">Different xml nodes used for different test cases</param>
        /// <param name="BAMParserPam">BAM Parse method parameters</param>
        /// <param name="IsEncoding">True for BAMParser ctor with encoding.
        /// False otherwise </param>
        void ValidateBAMParser(string nodeName,
                               BAMParserParameters BAMParserPam,
                               bool IsReferenceIndex)
        {
            // Get input and output values from xml node.
            string bamFilePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                 Constants.FilePathNode);
            string expectedAlignedSeqFilePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequence);
            string refIndexValue = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.RefIndexNode);
            string startIndexValue = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.StartIndexNode);
            string endIndexValue = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.EndIndexNode);
            string alignedSeqCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.AlignedSeqCountNode);
            string refSeqName = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ChromosomeNameNode);

            SequenceAlignmentMap seqAlignment = null;
            BAMParser            bamParser    = null;

            try
            {
                bamParser = new BAMParser();

                // Parse a BAM file with different parameters.
                switch (BAMParserPam)
                {
                case BAMParserParameters.StreamReader:
                    using (Stream stream = new FileStream(bamFilePath, FileMode.Open,
                                                          FileAccess.Read))
                    {
                        seqAlignment = bamParser.Parse(stream);
                    }
                    break;

                case BAMParserParameters.FileName:
                    seqAlignment = bamParser.Parse(bamFilePath);
                    break;

                case BAMParserParameters.ParseRangeFileName:
                    seqAlignment = bamParser.ParseRange(bamFilePath,
                                                        Convert.ToInt32(refIndexValue, (IFormatProvider)null));
                    break;

                case BAMParserParameters.ParseRangeWithIndex:
                    seqAlignment = bamParser.ParseRange(bamFilePath,
                                                        Convert.ToInt32(refIndexValue, (IFormatProvider)null),
                                                        Convert.ToInt32(startIndexValue, (IFormatProvider)null),
                                                        Convert.ToInt32(endIndexValue, (IFormatProvider)null));
                    break;

                case BAMParserParameters.ParseRangeUsingRefSeq:
                    seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName);
                    break;

                case BAMParserParameters.ParseRangeUsingRefSeqAndFlag:
                    seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName);
                    break;

                case BAMParserParameters.ParseRangeUsingRefSeqUsingIndex:
                    seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName,
                                                        Convert.ToInt32(startIndexValue, (IFormatProvider)null),
                                                        Convert.ToInt32(endIndexValue, (IFormatProvider)null));
                    break;

                case BAMParserParameters.ParseRangeUsingIndexesAndFlag:
                    seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName,
                                                        Convert.ToInt32(startIndexValue, (IFormatProvider)null),
                                                        Convert.ToInt32(endIndexValue, (IFormatProvider)null));
                    break;
                }


                // Validate BAM Header record fileds.
                if (!IsReferenceIndex)
                {
                    ValidateBAMHeaderRecords(nodeName, seqAlignment);
                }

                IList <SAMAlignedSequence> alignedSeqs = seqAlignment.QuerySequences;

                Assert.AreEqual(alignedSeqCount, alignedSeqs.Count.ToString((IFormatProvider)null));

                // Get expected sequences
                using (FastAParser parserObj = new FastAParser(expectedAlignedSeqFilePath))
                {
                    IEnumerable <ISequence> expectedSequences = parserObj.Parse();

                    IList <ISequence> expectedSequencesList = expectedSequences.ToList();

                    // Validate aligned sequences from BAM file.
                    for (int index = 0; index < alignedSeqs.Count; index++)
                    {
                        Assert.AreEqual(
                            new string(expectedSequencesList[index].Select(a => (char)a).ToArray()),
                            new string(alignedSeqs[index].QuerySequence.Select(a => (char)a).ToArray()));

                        // Log to NUNIT GUI.
                        ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                               "BAM Parser BVT : Validated Aligned sequence :{0} successfully",
                                                               alignedSeqs[index].QuerySequence.ToString()));
                        Console.WriteLine(string.Format((IFormatProvider)null,
                                                        "BAM Parser BVT : Validated the aligned sequence :{0} successfully",
                                                        alignedSeqs[index].QuerySequence.ToString()));
                    }
                }
            }
            finally
            {
                bamParser.Dispose();
            }
        }
Example #9
0
        /// <summary>
        /// Get chromoses with orphan regions
        /// </summary>
        /// <param name="filename">Path of the BAM file</param>
        /// <param name="mean">Mean value</param>
        /// <param name="deviation">Standard deviation</param>
        /// <returns></returns>
        private void DisplayOrphans(string filename)
        {
            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.Parse(filename);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.Parse(filename);
            }

            // get reads from sequence alignment map object.
            IList <PairedRead> pairedReads = null;

            // Get Aligned sequences
            IList <SAMAlignedSequence> alignedSeqs = alignmentMapobj.QuerySequences;

            pairedReads = alignmentMapobj.GetPairedReads(0, 0);


            // Get the orphan regions.
            var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan);

            if (orphans.Count() == 0)
            {
                Console.WriteLine("No Orphans to display");
            }

            List <ISequenceRange> orphanRegions = new List <ISequenceRange>(orphans.Count());

            foreach (PairedRead orphanRead in orphans)
            {
                orphanRegions.Add(GetRegion(orphanRead.Read1));
            }

            // Get sequence range grouping object.
            SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions);

            if (rangeGroup.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Orphan reads to display");
            }
            else
            {
                Console.Write("Region of Orphan reads:");
                DisplaySequenceRange(rangeGroup);
            }

            SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps();

            if (mergedRegions.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo hot spots to display");
            }
            else
            {
                Console.Write("\r\nChromosomal hot spot:");
                DisplaySequenceRange(mergedRegions);
            }
        }
        } = new Dictionary <string, int>();                                                          // key: mappingStrand + strandFromGene

        /// <summary>
        /// Given a BAM file, try to guess the RNA-Seq experiment:
        ///	1) single-end or pair-end
        ///	2) strand_specific or not
        ///	3) if it is strand-specific, what's the strand_ness of the protocol
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="bamPath"></param>
        /// <param name="geneModelPath"></param>
        /// <param name="minFractionStrandSpecific"></param>
        /// <returns></returns>
        private void CheckProperties(string bamPath, string geneModelPath, Genome genome, double minFractionStrandSpecific)
        {
            GeneModel gm = new GeneModel(genome, geneModelPath);

            using (var reader = File.OpenRead(bamPath))
            {
                Console.WriteLine("Reading BAM file.");

                // read bam, and filter out reads that are QC failures, unmapped, duplicates, or secondary
                BAMParser bam             = new BAMParser();
                var       unfilteredReads = bam.Parse(reader).ToList();
                var       reads           = unfilteredReads.Where(read =>
                                                                  !read.Flag.HasFlag(SAMFlags.QualityCheckFailure) && !read.Flag.HasFlag(SAMFlags.UnmappedQuery) &&
                                                                  !read.Flag.HasFlag(SAMFlags.Duplicate) && !read.Flag.HasFlag(SAMFlags.NonPrimeAlignment)).ToList();

                Console.WriteLine("Evaluating reads.");

                Parallel.ForEach(reads, read =>
                {
                    // set the interval contained by this read, and get the gene regions nearby
                    bool isReversed       = read.Flag.HasFlag(SAMFlags.QueryOnReverseStrand);
                    Interval readInterval = new Interval(null, read.RName, "source", isReversed ? "-" : "+", read.Pos, read.RefEndPos, null);
                    bool hasNearbyRegion  = gm.GenomeForest.Forest.TryGetValue(readInterval.ChromosomeID, out IntervalTree nearbyGeneTree);
                    if (hasNearbyRegion)
                    {
                        List <Interval> nearbyGeneRegions = nearbyGeneTree.Query(readInterval);
                        if (nearbyGeneRegions.Count > 0)
                        {
                            // count up paired-end or single-end read properties
                            string mapStrand = isReversed ? "-" : "+";
                            bool isPaired    = read.Flag.HasFlag(SAMFlags.PairedRead);
                            bool isRead1     = read.Flag.HasFlag(SAMFlags.FirstReadInPair);
                            bool isRead2     = read.Flag.HasFlag(SAMFlags.SecondReadInPair);
                            string readId    = isRead1 ? "1" : isRead2 ? "2" : null;
                            HashSet <string> strandFromGene = new HashSet <string>(nearbyGeneRegions.Select(x => x.Strand));
                            foreach (string strand in strandFromGene)
                            {
                                Dictionary <string, int> dict = isPaired ? PairedStrandedness : SingleStrandedness;
                                string key = isPaired ?
                                             readId + mapStrand + strand :
                                             mapStrand + strand;
                                lock (dict)
                                {
                                    if (dict.TryGetValue(key, out int count))
                                    {
                                        count++;
                                    }
                                    else
                                    {
                                        dict[key] = 1;
                                    }
                                }
                            }
                        }
                    }
                });

                // From RSeQC:
                //      Not strand specific:
                // This is PairEnd Data
                // Fraction of reads failed to determine: 0.0172
                // Fraction of reads explained by "1++,1--,2+-,2-+": 0.4903
                // Fraction of reads explained by "1+-,1-+,2++,2--": 0.4925
                //      Strand specific:
                // This is PairEnd Data
                // Fraction of reads failed to determine: 0.0072
                // Fraction of reads explained by "1++,1--,2+-,2-+": 0.9441
                // Fraction of reads explained by "1+-,1-+,2++,2--": 0.0487
                SingleStrandedness.TryGetValue("++", out int sForward1);
                SingleStrandedness.TryGetValue("--", out int sForward2);

                SingleStrandedness.TryGetValue("+-", out int sReverse1);
                SingleStrandedness.TryGetValue("-+", out int sReverse2);

                PairedStrandedness.TryGetValue("1++", out int pForward1);
                PairedStrandedness.TryGetValue("1--", out int pForward2);
                PairedStrandedness.TryGetValue("2+-", out int pForward3);
                PairedStrandedness.TryGetValue("2-+", out int pForward4);

                PairedStrandedness.TryGetValue("1+-", out int pReverse1);
                PairedStrandedness.TryGetValue("1-+", out int pReverse2);
                PairedStrandedness.TryGetValue("2++", out int pReverse3);
                PairedStrandedness.TryGetValue("2--", out int pReverse4);

                if (PairedStrandedness.Count > 0 && SingleStrandedness.Count == 0)
                {
                    Protocol = RnaSeqProtocol.PairedEnd;
                    FractionForwardStranded = (double)(pForward1 + pForward2 + pForward3 + pForward4) / (double)PairedStrandedness.Values.Sum();
                    FractionReverseStranded = (double)(pReverse1 + pReverse2 + pReverse3 + pReverse4) / (double)PairedStrandedness.Values.Sum();
                    FractionUndetermined    = 1 - FractionForwardStranded - FractionReverseStranded;
                    if (FractionUndetermined > 0.5)
                    {
                        throw new ArgumentException("A large number of reads failed to determine the standedness of the protocol within " + bamPath);
                    }
                    Strandedness = FractionForwardStranded >= minFractionStrandSpecific ? Strandedness.Forward :
                                   FractionReverseStranded >= minFractionStrandSpecific ? Strandedness.Reverse :
                                   Strandedness.None;
                }
                else if (SingleStrandedness.Count > 0 && PairedStrandedness.Count == 0)
                {
                    Protocol = RnaSeqProtocol.SingleEnd;
                    FractionForwardStranded = (double)(sForward1 + sForward2) / (double)SingleStrandedness.Values.Sum();
                    FractionReverseStranded = (double)(sReverse1 + sReverse2) / (double)SingleStrandedness.Values.Sum();
                    FractionUndetermined    = 1 - FractionForwardStranded - FractionReverseStranded;
                    if (FractionUndetermined > 0.5)
                    {
                        throw new ArgumentException("A large number of reads failed to determine the standedness of the protocol within " + bamPath);
                    }
                    Strandedness = FractionForwardStranded >= minFractionStrandSpecific ? Strandedness.Forward :
                                   FractionReverseStranded >= minFractionStrandSpecific ? Strandedness.Reverse :
                                   Strandedness.None;
                }
                else
                {
                    Protocol                = RnaSeqProtocol.Mixture;
                    Strandedness            = Strandedness.None;
                    FractionForwardStranded = (double)(sForward1 + sForward2 + pForward1 + pForward2 + pForward3 + pForward4) / (double)PairedStrandedness.Values.Sum();
                    FractionReverseStranded = (double)(sReverse1 + sReverse2 + pReverse1 + pReverse2 + pReverse3 + pReverse4) / (double)PairedStrandedness.Values.Sum();
                    FractionUndetermined    = 1 - FractionForwardStranded - FractionReverseStranded;
                    if (FractionUndetermined > 0.5)
                    {
                        throw new ArgumentException("A large number of reads failed to determine the standedness of the protocol within " + bamPath);
                    }
                    Strandedness = FractionForwardStranded >= minFractionStrandSpecific ? Strandedness.Forward :
                                   FractionReverseStranded >= minFractionStrandSpecific ? Strandedness.Reverse :
                                   Strandedness.None;
                }
            }
        }