public void ImportTestWithSAM() { Import options = new Import(); options.FilePath = new string[2]; string tempFilename = Path.GetTempFileName(); options.FilePath[0] = tempFilename; options.FilePath[1] = @"TestUtils\SAM\SeqAlignment.bam"; options.DoImport(); using (BAMParser parser = new BAMParser()) { SequenceAlignmentMap map = parser.Parse(@"TestUtils\SAM\SeqAlignment.bam"); using (SAMParser parse = new SAMParser()) { SequenceAlignmentMap map1 = parse.Parse(tempFilename); Assert.IsTrue(CompareSAM(map, map1)); } } File.Delete(tempFilename); }
public void TestGettingPairedReads() { string bamfilePath = @"TestUtils\BAM\SeqAlignment.bam"; BAMParser parser = null; try { parser = new BAMParser(); SequenceAlignmentMap alignmentMap = parser.Parse(bamfilePath); Assert.IsTrue(alignmentMap != null); IList <PairedRead> pairedReads = alignmentMap.GetPairedReads(); Assert.IsTrue(pairedReads.Count > 0); pairedReads = alignmentMap.GetPairedReads(250, 50); Assert.IsTrue(pairedReads.Count > 0); } finally { if (parser != null) { parser.Dispose(); } } }
/// <summary> /// Validate formatted BAM file. /// </summary> /// <param name="nodeName">Different xml nodes used for different test cases</param> /// <param name="BAMParserPam">BAM Format method parameters</param> void ValidateBAMFormatter(string nodeName, BAMParserParameters BAMParserPam) { // Get input and output values from xml node. string bamFilePath = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedAlignedSeqFilePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string alignedSeqCount = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.AlignedSeqCountNode); Stream stream = null; SequenceAlignmentMap seqAlignment = null; using (BAMParser bamParserObj = new BAMParser()) { // Parse a BAM file. seqAlignment = bamParserObj.Parse(bamFilePath); // Create a BAM formatter object. BAMFormatter formatterObj = new BAMFormatter(); // Write/Format aligned sequences to BAM file. switch (BAMParserPam) { case BAMParserParameters.StreamWriter: using (stream = new FileStream(Constants.BAMTempFileName, FileMode.Create, FileAccess.Write)) { formatterObj.Format(seqAlignment, stream); } break; case BAMParserParameters.FileName: formatterObj.Format(seqAlignment, Constants.BAMTempFileName); break; case BAMParserParameters.IndexFile: formatterObj.Format(seqAlignment, Constants.BAMTempFileName, Constants.BAMTempIndexFile); File.Exists(Constants.BAMTempIndexFile); break; default: break; } // Parse formatted BAM file and validate aligned sequences. SequenceAlignmentMap expectedSeqAlignmentMap = bamParserObj.Parse( Constants.BAMTempFileName); // Validate Parsed BAM file Header record fileds. ValidateBAMHeaderRecords(nodeName, expectedSeqAlignmentMap); IList <SAMAlignedSequence> alignedSeqs = expectedSeqAlignmentMap.QuerySequences; Assert.AreEqual(alignedSeqCount, alignedSeqs.Count.ToString((IFormatProvider)null)); // Get expected sequences using (FastaParser parserObj = new FastaParser()) { IList <ISequence> expectedSequences = parserObj.Parse(expectedAlignedSeqFilePath); // Validate aligned sequences from BAM file. for (int index = 0; index < alignedSeqs.Count; index++) { Assert.AreEqual(expectedSequences[index].ToString(), alignedSeqs[index].QuerySequence.ToString()); // Log to NUNIT GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "BAM Formatter BVT : Validated Aligned sequence :{0} successfully", alignedSeqs[index].QuerySequence.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "BAM Formatter BVT : Validated the aligned sequence :{0} successfully", alignedSeqs[index].QuerySequence.ToString())); } } } File.Delete(Constants.BAMTempFileName); File.Delete(Constants.BAMTempIndexFile); }
/// <summary> /// Merge multiple sorted alignments. /// SAMUtil.exe out.bam in1.bam in2.bam /// </summary> public void DoMerge() { if (FilePaths == null) { throw new InvalidOperationException("FilePath"); } if (FilePaths.Length < 3) { throw new InvalidOperationException(Resources.MergeHelp); } IList <IList <BAMSortedIndex> > sortedIndexes = new List <IList <BAMSortedIndex> >(); IList <SequenceAlignmentMap> sequenceAlignmentMaps = new List <SequenceAlignmentMap>(); IList <int> help = new List <int>(); Parallel.For(1, FilePaths.Length, (int index) => { IList <BAMSortedIndex> sortedIndex; BAMParser parser = new BAMParser();; SequenceAlignmentMap map; if (index == 1) { try { map = parser.Parse(FilePaths[1]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } if (map == null) { throw new InvalidOperationException(Resources.EmptyFile); } if (string.IsNullOrEmpty(HeaderFile) && map.Header.RecordFields.Count == 0) { throw new InvalidOperationException(Resources.HeaderMissing); } if (!string.IsNullOrEmpty(HeaderFile)) { SAMParser parse = new SAMParser(); SequenceAlignmentMap head; try { head = parse.Parse(HeaderFile); } catch { throw new InvalidOperationException(Resources.IncorrectHeaderFile); } if (head == null) { throw new InvalidOperationException(Resources.EmptyFile); } _header = head.Header; } else { _header = map.Header; } sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates); } else { try { map = parser.Parse(FilePaths[index]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } if (map == null) { throw new InvalidOperationException(Resources.EmptyFile); } sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates); } lock (sortedIndexes) { sortedIndexes.Add(sortedIndex); sequenceAlignmentMaps.Add(map); } }); string filePath = Path.GetTempFileName(); using (FileStream fstemp = new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite)) { BAMFormatter formatter = new BAMFormatter(); formatter.WriteHeader(_header, fstemp); int[] indexes = new int[sortedIndexes.Count]; if (SortByReadName) { IList <BAMSortedIndex> sortedIndex = sortedIndexes.Select(a => a.First()).ToList(); WriteMergeFileSortedByReadName(sortedIndex, fstemp, formatter, sequenceAlignmentMaps); } else { WriteMergeFile(sortedIndexes, fstemp, formatter, sequenceAlignmentMaps); } using (FileStream fsoutput = new FileStream(FilePaths[0], FileMode.Create, FileAccess.Write)) { fstemp.Seek(0, SeekOrigin.Begin); formatter.CompressBAMFile(fstemp, fsoutput); } } File.Delete(filePath); }
/// <summary> /// Indentify hot spot chromosomes for length anamoly regions. /// </summary> /// <param name="inputFile"> Input file</param> /// <param name="mean">Mean value</param> /// <param name="standardDeviation">Standard deviation</param> private void IdentifyLentghAnamolies(string filename, float mean = -1, float deviation = -1) { bool calculateMeanNdeviation = false; if (mean == -1 || deviation == -1) { calculateMeanNdeviation = true; } SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.Parse(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.Parse(filename); } // get reads from sequence alignment map object. IList <PairedRead> pairedReads = null; if (calculateMeanNdeviation) { pairedReads = alignmentMapobj.GetPairedReads(); } else { pairedReads = alignmentMapobj.GetPairedReads(mean, deviation); } // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); if (orphans.Count() == 0) { Console.WriteLine("No Orphans to display"); } List <ISequenceRange> orphanRegions = new List <ISequenceRange>(orphans.Count()); foreach (PairedRead orphanRead in orphans) { orphanRegions.Add(GetRegion(orphanRead.Read1)); } // Get sequence range grouping for Orphan regions. SequenceRangeGrouping orphanRangegroup = new SequenceRangeGrouping(orphanRegions); // Get the Length anomalies regions. var lengthAnomalies = pairedReads.Where(PE => PE.PairedType == PairedReadType.LengthAnomaly); if (lengthAnomalies.Count() == 0) { Console.WriteLine("No Anomalies to display"); } List <ISequenceRange> lengthAnomalyRegions = new List <ISequenceRange>(lengthAnomalies.Count()); foreach (PairedRead laRead in lengthAnomalies) { SequenceRange range = new SequenceRange(); range.ID = laRead.Read1.RName; range.Start = laRead.Read1.Pos; range.End = laRead.Read1.Pos + laRead.InsertLength; lengthAnomalyRegions.Add(range); } // Get sequence range grouping for length anomaly regions. SequenceRangeGrouping lengthAnomalyRangegroup = new SequenceRangeGrouping(lengthAnomalyRegions); if (lengthAnomalyRangegroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Length anomalies reads to display"); } else { Console.Write("Region of length anomaly:"); DisplaySequenceRange(lengthAnomalyRangegroup); } if (orphanRangegroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("\r\nRegion of Orphan reads:"); DisplaySequenceRange(orphanRangegroup); } SequenceRangeGrouping intersectedRegions = lengthAnomalyRangegroup.Intersect(orphanRangegroup); if (intersectedRegions.GroupIDs.Count() == 0) { Console.Write("\r\nNo Hot spots found"); } else { Console.Write("\r\nChromosomal Hot spot of length anomaly and Orphan region:"); DisplaySequenceRange(intersectedRegions); } }
/// <summary> /// Validate different paired read types /// </summary> /// <param name="nodeName">XML node name</param> /// <param name="pams">GetPairedReadTypes method parameters</param> void ValidatePairedReadTypes(string nodeName, GetPairedReadTypeParameters pams) { // Get input and output values from xml node. string bamFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string mean = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.MeanNode); string deviation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DeviationValueNode); string library = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.LibraryNameNode); string[] pairedReadType = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.PairedReadTypeNode).Split(','); string[] insertLength = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.InsertLengthNode).Split(','); IList <PairedRead> pairedReads = null; BAMParser bamParser = new BAMParser(); SequenceAlignmentMap seqAlignmentMapObj = bamParser.Parse(bamFilePath); CloneLibraryInformation libraryInfo; int i = 0; try { switch (pams) { case GetPairedReadTypeParameters.PaireReadTypeUsingLibraryName: pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null), float.Parse(deviation, (IFormatProvider)null)); foreach (PairedRead read in pairedReads) { PairedReadType type = PairedRead.GetPairedReadType(read, library); Assert.AreEqual(type.ToString(), pairedReadType[i]); i++; } break; case GetPairedReadTypeParameters.PaireReadTypeUsingCloneLibraryInfo: pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null), float.Parse(deviation, (IFormatProvider)null)); libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library); foreach (PairedRead read in pairedReads) { PairedReadType type = PairedRead.GetPairedReadType(read, libraryInfo); Assert.AreEqual(type.ToString(), pairedReadType[i]); i++; } break; case GetPairedReadTypeParameters.PaireReadTypeUsingMeanAndDeviation: pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null), float.Parse(deviation, (IFormatProvider)null)); foreach (PairedRead read in pairedReads) { PairedReadType type = PairedRead.GetPairedReadType(read, float.Parse(mean, (IFormatProvider)null), float.Parse(deviation, (IFormatProvider)null)); Assert.AreEqual(type.ToString(), pairedReadType[i]); i++; } break; case GetPairedReadTypeParameters.PaireReadTypeUsingReadsAndLibrary: pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null), float.Parse(deviation, (IFormatProvider)null)); foreach (PairedRead read in pairedReads) { PairedReadType type = PairedRead.GetPairedReadType(read.Read1, read.Read2, library); Assert.AreEqual(type.ToString(), pairedReadType[i]); i++; } break; case GetPairedReadTypeParameters.PaireReadTypeUsingReadsAndLibraryInfo: pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null), float.Parse(deviation, (IFormatProvider)null)); libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library); foreach (PairedRead read in pairedReads) { PairedReadType type = PairedRead.GetPairedReadType(read.Read1, read.Read2, libraryInfo); Assert.AreEqual(type.ToString(), pairedReadType[i]); i++; } break; case GetPairedReadTypeParameters.GetInsertLength: pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, (IFormatProvider)null), float.Parse(deviation, (IFormatProvider)null)); libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library); foreach (PairedRead read in pairedReads) { int length = PairedRead.GetInsertLength(read.Read1, read.Read2); Assert.AreEqual(length.ToString((IFormatProvider)null), insertLength[i]); i++; } break; } ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "BAM Parser BVT : Validated Paired read Type Successfully")); } finally { bamParser.Dispose(); } }
/// <summary> /// Validate GetPaired method /// </summary> /// <param name="nodeName">XML node name</param> /// <param name="pams">GetPairedReads method parameters</param> void ValidatePairedReads(string nodeName, GetPairedReadParameters pams) { // Get input and output values from xml node. string bamFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedAlignedSeqFilePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string mean = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.MeanNode); string deviation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DeviationValueNode); string library = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.LibraryNameNode); string pairedReadsCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.PairedReadsNode); string[] insertLength = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.InsertLengthNode).Split(','); string[] pairedReadType = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.PairedReadTypeNode).Split(','); SequenceAlignmentMap seqAlignment = null; IList <PairedRead> pairedReads = null; BAMParser bamParser = new BAMParser(); FastAParser parserObj = new FastAParser(expectedAlignedSeqFilePath); try { seqAlignment = bamParser.Parse(bamFilePath); IEnumerable <ISequence> expectedSequences = parserObj.Parse(); switch (pams) { case GetPairedReadParameters.GetPairedReadWithParameters: pairedReads = seqAlignment.GetPairedReads(float.Parse(mean, (IFormatProvider)null), float.Parse(deviation, (IFormatProvider)null)); break; case GetPairedReadParameters.GetPairedReadWithLibraryName: pairedReads = seqAlignment.GetPairedReads(library); break; case GetPairedReadParameters.GetPairedReadWithCloneLibraryInfo: CloneLibraryInformation libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library); pairedReads = seqAlignment.GetPairedReads(libraryInfo); break; case GetPairedReadParameters.Default: pairedReads = seqAlignment.GetPairedReads(); break; } Assert.AreEqual(pairedReadsCount, pairedReads.Count.ToString((IFormatProvider)null)); int i = 0; foreach (PairedRead read in pairedReads) { Assert.AreEqual(insertLength[i], read.InsertLength.ToString((IFormatProvider)null)); Assert.AreEqual(pairedReadType[i], read.PairedType.ToString()); foreach (SAMAlignedSequence seq in read.Reads) { Assert.AreEqual(new string(expectedSequences.ElementAt(i).Select(a => (char)a).ToArray()), new string(seq.QuerySequence.Select(a => (char)a).ToArray())); // Log to NUNIT GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "BAM Parser BVT : Validated Paired read :{0} successfully", seq.QuerySequence.ToString())); } i++; } } finally { bamParser.Dispose(); } }
/// <summary> /// Parse BAM and validate parsed aligned sequences and its properties. /// </summary> /// <param name="nodeName">Different xml nodes used for different test cases</param> /// <param name="BAMParserPam">BAM Parse method parameters</param> /// <param name="IsEncoding">True for BAMParser ctor with encoding. /// False otherwise </param> void ValidateBAMParser(string nodeName, BAMParserParameters BAMParserPam, bool IsReferenceIndex) { // Get input and output values from xml node. string bamFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedAlignedSeqFilePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string refIndexValue = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.RefIndexNode); string startIndexValue = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.StartIndexNode); string endIndexValue = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.EndIndexNode); string alignedSeqCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.AlignedSeqCountNode); string refSeqName = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ChromosomeNameNode); SequenceAlignmentMap seqAlignment = null; BAMParser bamParser = null; try { bamParser = new BAMParser(); // Parse a BAM file with different parameters. switch (BAMParserPam) { case BAMParserParameters.StreamReader: using (Stream stream = new FileStream(bamFilePath, FileMode.Open, FileAccess.Read)) { seqAlignment = bamParser.Parse(stream); } break; case BAMParserParameters.FileName: seqAlignment = bamParser.Parse(bamFilePath); break; case BAMParserParameters.ParseRangeFileName: seqAlignment = bamParser.ParseRange(bamFilePath, Convert.ToInt32(refIndexValue, (IFormatProvider)null)); break; case BAMParserParameters.ParseRangeWithIndex: seqAlignment = bamParser.ParseRange(bamFilePath, Convert.ToInt32(refIndexValue, (IFormatProvider)null), Convert.ToInt32(startIndexValue, (IFormatProvider)null), Convert.ToInt32(endIndexValue, (IFormatProvider)null)); break; case BAMParserParameters.ParseRangeUsingRefSeq: seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName); break; case BAMParserParameters.ParseRangeUsingRefSeqAndFlag: seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName); break; case BAMParserParameters.ParseRangeUsingRefSeqUsingIndex: seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName, Convert.ToInt32(startIndexValue, (IFormatProvider)null), Convert.ToInt32(endIndexValue, (IFormatProvider)null)); break; case BAMParserParameters.ParseRangeUsingIndexesAndFlag: seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName, Convert.ToInt32(startIndexValue, (IFormatProvider)null), Convert.ToInt32(endIndexValue, (IFormatProvider)null)); break; } // Validate BAM Header record fileds. if (!IsReferenceIndex) { ValidateBAMHeaderRecords(nodeName, seqAlignment); } IList <SAMAlignedSequence> alignedSeqs = seqAlignment.QuerySequences; Assert.AreEqual(alignedSeqCount, alignedSeqs.Count.ToString((IFormatProvider)null)); // Get expected sequences using (FastAParser parserObj = new FastAParser(expectedAlignedSeqFilePath)) { IEnumerable <ISequence> expectedSequences = parserObj.Parse(); IList <ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate aligned sequences from BAM file. for (int index = 0; index < alignedSeqs.Count; index++) { Assert.AreEqual( new string(expectedSequencesList[index].Select(a => (char)a).ToArray()), new string(alignedSeqs[index].QuerySequence.Select(a => (char)a).ToArray())); // Log to NUNIT GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "BAM Parser BVT : Validated Aligned sequence :{0} successfully", alignedSeqs[index].QuerySequence.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "BAM Parser BVT : Validated the aligned sequence :{0} successfully", alignedSeqs[index].QuerySequence.ToString())); } } } finally { bamParser.Dispose(); } }
/// <summary> /// Get chromoses with orphan regions /// </summary> /// <param name="filename">Path of the BAM file</param> /// <param name="mean">Mean value</param> /// <param name="deviation">Standard deviation</param> /// <returns></returns> private void DisplayOrphans(string filename) { SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.Parse(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.Parse(filename); } // get reads from sequence alignment map object. IList <PairedRead> pairedReads = null; // Get Aligned sequences IList <SAMAlignedSequence> alignedSeqs = alignmentMapobj.QuerySequences; pairedReads = alignmentMapobj.GetPairedReads(0, 0); // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); if (orphans.Count() == 0) { Console.WriteLine("No Orphans to display"); } List <ISequenceRange> orphanRegions = new List <ISequenceRange>(orphans.Count()); foreach (PairedRead orphanRead in orphans) { orphanRegions.Add(GetRegion(orphanRead.Read1)); } // Get sequence range grouping object. SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions); if (rangeGroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("Region of Orphan reads:"); DisplaySequenceRange(rangeGroup); } SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps(); if (mergedRegions.GroupIDs.Count() == 0) { Console.Write("\r\nNo hot spots to display"); } else { Console.Write("\r\nChromosomal hot spot:"); DisplaySequenceRange(mergedRegions); } }
} = new Dictionary <string, int>(); // key: mappingStrand + strandFromGene /// <summary> /// Given a BAM file, try to guess the RNA-Seq experiment: /// 1) single-end or pair-end /// 2) strand_specific or not /// 3) if it is strand-specific, what's the strand_ness of the protocol /// </summary> /// <param name="spritzDirectory"></param> /// <param name="bamPath"></param> /// <param name="geneModelPath"></param> /// <param name="minFractionStrandSpecific"></param> /// <returns></returns> private void CheckProperties(string bamPath, string geneModelPath, Genome genome, double minFractionStrandSpecific) { GeneModel gm = new GeneModel(genome, geneModelPath); using (var reader = File.OpenRead(bamPath)) { Console.WriteLine("Reading BAM file."); // read bam, and filter out reads that are QC failures, unmapped, duplicates, or secondary BAMParser bam = new BAMParser(); var unfilteredReads = bam.Parse(reader).ToList(); var reads = unfilteredReads.Where(read => !read.Flag.HasFlag(SAMFlags.QualityCheckFailure) && !read.Flag.HasFlag(SAMFlags.UnmappedQuery) && !read.Flag.HasFlag(SAMFlags.Duplicate) && !read.Flag.HasFlag(SAMFlags.NonPrimeAlignment)).ToList(); Console.WriteLine("Evaluating reads."); Parallel.ForEach(reads, read => { // set the interval contained by this read, and get the gene regions nearby bool isReversed = read.Flag.HasFlag(SAMFlags.QueryOnReverseStrand); Interval readInterval = new Interval(null, read.RName, "source", isReversed ? "-" : "+", read.Pos, read.RefEndPos, null); bool hasNearbyRegion = gm.GenomeForest.Forest.TryGetValue(readInterval.ChromosomeID, out IntervalTree nearbyGeneTree); if (hasNearbyRegion) { List <Interval> nearbyGeneRegions = nearbyGeneTree.Query(readInterval); if (nearbyGeneRegions.Count > 0) { // count up paired-end or single-end read properties string mapStrand = isReversed ? "-" : "+"; bool isPaired = read.Flag.HasFlag(SAMFlags.PairedRead); bool isRead1 = read.Flag.HasFlag(SAMFlags.FirstReadInPair); bool isRead2 = read.Flag.HasFlag(SAMFlags.SecondReadInPair); string readId = isRead1 ? "1" : isRead2 ? "2" : null; HashSet <string> strandFromGene = new HashSet <string>(nearbyGeneRegions.Select(x => x.Strand)); foreach (string strand in strandFromGene) { Dictionary <string, int> dict = isPaired ? PairedStrandedness : SingleStrandedness; string key = isPaired ? readId + mapStrand + strand : mapStrand + strand; lock (dict) { if (dict.TryGetValue(key, out int count)) { count++; } else { dict[key] = 1; } } } } } }); // From RSeQC: // Not strand specific: // This is PairEnd Data // Fraction of reads failed to determine: 0.0172 // Fraction of reads explained by "1++,1--,2+-,2-+": 0.4903 // Fraction of reads explained by "1+-,1-+,2++,2--": 0.4925 // Strand specific: // This is PairEnd Data // Fraction of reads failed to determine: 0.0072 // Fraction of reads explained by "1++,1--,2+-,2-+": 0.9441 // Fraction of reads explained by "1+-,1-+,2++,2--": 0.0487 SingleStrandedness.TryGetValue("++", out int sForward1); SingleStrandedness.TryGetValue("--", out int sForward2); SingleStrandedness.TryGetValue("+-", out int sReverse1); SingleStrandedness.TryGetValue("-+", out int sReverse2); PairedStrandedness.TryGetValue("1++", out int pForward1); PairedStrandedness.TryGetValue("1--", out int pForward2); PairedStrandedness.TryGetValue("2+-", out int pForward3); PairedStrandedness.TryGetValue("2-+", out int pForward4); PairedStrandedness.TryGetValue("1+-", out int pReverse1); PairedStrandedness.TryGetValue("1-+", out int pReverse2); PairedStrandedness.TryGetValue("2++", out int pReverse3); PairedStrandedness.TryGetValue("2--", out int pReverse4); if (PairedStrandedness.Count > 0 && SingleStrandedness.Count == 0) { Protocol = RnaSeqProtocol.PairedEnd; FractionForwardStranded = (double)(pForward1 + pForward2 + pForward3 + pForward4) / (double)PairedStrandedness.Values.Sum(); FractionReverseStranded = (double)(pReverse1 + pReverse2 + pReverse3 + pReverse4) / (double)PairedStrandedness.Values.Sum(); FractionUndetermined = 1 - FractionForwardStranded - FractionReverseStranded; if (FractionUndetermined > 0.5) { throw new ArgumentException("A large number of reads failed to determine the standedness of the protocol within " + bamPath); } Strandedness = FractionForwardStranded >= minFractionStrandSpecific ? Strandedness.Forward : FractionReverseStranded >= minFractionStrandSpecific ? Strandedness.Reverse : Strandedness.None; } else if (SingleStrandedness.Count > 0 && PairedStrandedness.Count == 0) { Protocol = RnaSeqProtocol.SingleEnd; FractionForwardStranded = (double)(sForward1 + sForward2) / (double)SingleStrandedness.Values.Sum(); FractionReverseStranded = (double)(sReverse1 + sReverse2) / (double)SingleStrandedness.Values.Sum(); FractionUndetermined = 1 - FractionForwardStranded - FractionReverseStranded; if (FractionUndetermined > 0.5) { throw new ArgumentException("A large number of reads failed to determine the standedness of the protocol within " + bamPath); } Strandedness = FractionForwardStranded >= minFractionStrandSpecific ? Strandedness.Forward : FractionReverseStranded >= minFractionStrandSpecific ? Strandedness.Reverse : Strandedness.None; } else { Protocol = RnaSeqProtocol.Mixture; Strandedness = Strandedness.None; FractionForwardStranded = (double)(sForward1 + sForward2 + pForward1 + pForward2 + pForward3 + pForward4) / (double)PairedStrandedness.Values.Sum(); FractionReverseStranded = (double)(sReverse1 + sReverse2 + pReverse1 + pReverse2 + pReverse3 + pReverse4) / (double)PairedStrandedness.Values.Sum(); FractionUndetermined = 1 - FractionForwardStranded - FractionReverseStranded; if (FractionUndetermined > 0.5) { throw new ArgumentException("A large number of reads failed to determine the standedness of the protocol within " + bamPath); } Strandedness = FractionForwardStranded >= minFractionStrandSpecific ? Strandedness.Forward : FractionReverseStranded >= minFractionStrandSpecific ? Strandedness.Reverse : Strandedness.None; } } }