/// <summary> /// Get chromoses with orphan regions /// </summary> /// <param name="filename">Path of the BAM file</param> /// <param name="mean">Mean value</param> /// <param name="deviation">Standard deviation</param> /// <returns></returns> private void DisplayOrphans(string filename) { SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList<PairedRead> pairedReads = null; // Get Aligned sequences IList<SAMAlignedSequence> alignedSeqs = alignmentMapobj.QuerySequences; pairedReads = alignmentMapobj.GetPairedReads(0, 0); // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); int count = orphans.Count(); if (count == 0) { Console.WriteLine("No Orphans to display"); } var orphanRegions = new List<ISequenceRange>(count); orphanRegions.AddRange(orphans.Select(orphanRead => GetRegion(orphanRead.Read1))); // Get sequence range grouping object. SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions); if (!rangeGroup.GroupIDs.Any()) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("Region of Orphan reads:"); DisplaySequenceRange(rangeGroup); } SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps(); if (!mergedRegions.GroupIDs.Any()) { Console.Write("\r\nNo hot spots to display"); } else { Console.Write("\r\nChromosomal hot spot:"); DisplaySequenceRange(mergedRegions); } }
static void CreateBAMIndexFile(string bamFileName, string indexFileName) { using (var bamStream = File.OpenRead(bamFileName)) using (var indexStream = File.Create(indexFileName)) using (var indexStorage = new BAMIndexStorage(indexStream)) { BAMIndex indexFromBamStorage = new BAMParser().GetIndexFromBAMStorage(bamStream); indexStorage.Write(indexFromBamStorage); } }
public void TestFormatter() { string filePath = @"TestUtils\BAM\SeqAlignment.bam"; string outputfilePath = "BAMTests123.bam"; BAMParser parser = null; try { parser = new BAMParser(); BAMFormatter formatter = new BAMFormatter(); SequenceAlignmentMap alignmentMap = parser.Parse(filePath); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); formatter.Format(alignmentMap, outputfilePath); formatter.CreateSortedBAMFile = true; formatter.CreateIndexFile = true; alignmentMap = parser.Parse(filePath); formatter.Format(alignmentMap, outputfilePath); Assert.IsTrue(File.Exists("BAMTests123.bam.bai")); alignmentMap = parser.Parse(outputfilePath); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, int.MaxValue)); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, 28833)); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 1); } finally { if (parser != null) parser.Dispose(); } }
public void BAMProperties() { using (BAMParser parser = new BAMParser()) { Assert.AreEqual(parser.Name, Properties.Resource.BAM_NAME); Assert.AreEqual(parser.Description, Properties.Resource.BAMPARSER_DESCRIPTION); Assert.AreEqual(parser.SupportedFileTypes, Properties.Resource.BAM_FILEEXTENSION); } BAMFormatter formatter = new BAMFormatter(); Assert.AreEqual(formatter.Name, Properties.Resource.BAM_NAME); Assert.AreEqual(formatter.Description, Properties.Resource.BAMFORMATTER_DESCRIPTION); Assert.AreEqual(formatter.SupportedFileTypes, Properties.Resource.BAM_FILEEXTENSION); }
public void TestFormatter() { const string filePath = @"TestUtils\BAM\SeqAlignment.bam"; const string outputfilePath = "BAMTests123.bam"; string outputIndexFile = outputfilePath + ".bai"; BAMParser parser = new BAMParser(); SequenceAlignmentMap alignmentMap = parser.ParseOne<SequenceAlignmentMap>(filePath); Assert.IsNotNull(alignmentMap); Assert.IsNotNull(alignmentMap.Header); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.IsNotNull(alignmentMap.QuerySequences); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); BAMFormatter formatter = new BAMFormatter(); formatter.Format(alignmentMap, outputfilePath); formatter.CreateSortedBAMFile = true; formatter.CreateIndexFile = true; formatter.Format(alignmentMap, outputfilePath); Assert.IsTrue(File.Exists("BAMTests123.bam.bai")); alignmentMap = parser.ParseOne<SequenceAlignmentMap>(outputfilePath); Assert.IsNotNull(alignmentMap); Assert.IsNotNull(alignmentMap.Header); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, int.MaxValue)); Assert.IsNotNull(alignmentMap); Assert.IsNotNull(alignmentMap.Header); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); alignmentMap = parser.ParseRange("BAMTests123.bam", new SequenceRange("chr20", 0, 28833)); Assert.IsNotNull(alignmentMap); Assert.IsNotNull(alignmentMap.Header); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 1); File.Delete(outputfilePath); File.Delete(outputIndexFile); }
/// <summary> /// Get Chimera data /// </summary> /// <param name="filename">Path of the BAM file</param> /// <param name="mean">Mean value</param> /// <param name="deviation">Standard deviation</param> /// <returns></returns> private Matrix<string, string, string> GetChimeraData(string filename) { SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList<PairedRead> pairedReads = null; pairedReads = alignmentMapobj.GetPairedReads(200, 50); // select chimeras from reads. var chimeras = pairedReads.Where(PE => PE.PairedType == PairedReadType.Chimera); // Group chimeras based on first reads chromosomes name. var groupedChimeras = chimeras.GroupBy(PR => PR.Read1.RName); IList<string> chrs = alignmentMapobj.GetRefSequences(); // Declare sparse matrix to store statistics. SparseMatrix<string, string, string> statistics = SparseMatrix<string, string, string>.CreateEmptyInstance( chrs, chrs, "0"); // For each group create sub group depending on the second reads chromosomes. foreach (var group in groupedChimeras) { foreach (var subgroup in group.GroupBy(PE => PE.Read2.RName)) { // store the count to stats statistics[group.Key, subgroup.Key] = subgroup.Count().ToString(); } } return statistics; }
/// <summary> /// Display Sequence Item occurences percentage /// </summary> /// <param name="inputFile">Path of the input file</param> /// <param name="possibleOccurence">True to display Nculeaotide distribution</param> public void DisplaySequenceItemOccurences(string inputFile, bool possibleOccurence) { if (string.IsNullOrEmpty(inputFile)) { throw new InvalidOperationException("Input File Not specified"); } SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(inputFile); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(inputFile); } IList<string> chromosomes = alignmentMapobj.GetRefSequences(); if (possibleOccurence) { Console.Write("Nucleotide Distribution:"); Console.Write("\r\nPosition\tA\tT\tG\tC\tPossibility Of Occurences"); foreach (string str in chromosomes) { GetCoverage(str, alignmentMapobj, "true"); } } else { Console.Write("Coverage Profile:"); Console.Write("\r\nPosition\tA\tT\tG\tC"); foreach (string str in chromosomes) { GetCoverage(str, alignmentMapobj, "false"); } } }
public void TestParser() { const string FilePath = @"TestUtils\BAM\SeqAlignment.bam"; BAMParser parser = null; try { parser = new BAMParser(); SequenceAlignmentMap alignmentMap = parser.ParseOne<SequenceAlignmentMap>(FilePath); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); } finally { if (parser != null) parser.Dispose(); } }
/// <summary> /// Public method to sort BAM file. /// SAMUtil.exe in.bam out.bam /// </summary> public void DoSort() { string sortExtension = ".sort"; if (string.IsNullOrEmpty(InputFilename)) { throw new InvalidOperationException(Resources.SortHelp); } BAMParser parse = new BAMParser(); SequenceAlignmentMap map = null; try { map = parse.ParseOne<SequenceAlignmentMap>(InputFilename); } catch(Exception ex) { throw new InvalidOperationException(Resources.InvalidBAMFile, ex); } BAMFormatter format = new BAMFormatter { CreateSortedBAMFile = true, SortType = this.SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates }; if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = InputFilename + sortExtension; autoGeneratedOutputFilename = true; } format.Format(map, OutputFilename); if (autoGeneratedOutputFilename) { Console.WriteLine(Resources.SuccessMessageWithOutputFileName, OutputFilename); } }
public void ValidateBAMRangeQuery() { // Get input and output values from xml node. string BAMStoragePath = this.utilityObj.xmlUtil.GetTextValue( Constants.BAMHumanLargeNode, Constants.FilePathNode); var bp=new BAMParser(); var m = bp.ParseRange(BAMStoragePath, "MT"); var names = m.QuerySequences.Select(x => x.RName).Distinct().ToList(); var all = m.QuerySequences.Select(x => x).Distinct().ToList(); var tot = m.QuerySequences.Select(x => x.QName).Distinct().ToList(); var toTest = new[] {new RangeQuery("MT",1,100,48), new RangeQuery("1",0,Int32.MaxValue,45), new RangeQuery("11",0,Int32.MaxValue,2), new RangeQuery("MT",0,Int32.MaxValue,4335), new RangeQuery("MT",16300,16500,92) };//Last one is on a 2^14 boundary foreach (var r in toTest) { var res = bp.ParseRange(BAMStoragePath, r.RName, r.start, r.End); Assert.AreEqual(r.ExpectedReturns, res.QuerySequences.Count); } }
public void ValidateSAMToBAMConversion() { // Get values from xml config file. string expectedBamFilePath = this.utilityObj.xmlUtil.GetTextValue( Constants.BAMToSAMConversionNode, Constants.FilePathNode); string samFilePath = this.utilityObj.xmlUtil.GetTextValue( Constants.BAMToSAMConversionNode, Constants.FilePathNode1); // Parse expected BAM file. var bamParserObj = new BAMParser(); SequenceAlignmentMap expextedBamAlignmentObj = bamParserObj.ParseOne<SequenceAlignmentMap>(expectedBamFilePath); // Parse a SAM file. var samParserObj = new SAMParser(); SequenceAlignmentMap samSeqAlignment = samParserObj.ParseOne<SequenceAlignmentMap>(samFilePath); try { // Format SAM sequenceAlignment object to BAM file. var bamFormatterObj = new BAMFormatter(); bamFormatterObj.Format(samSeqAlignment, Constants.BAMTempFileName); // Parse a formatted BAM file. SequenceAlignmentMap bamSeqAlignment = bamParserObj.ParseOne<SequenceAlignmentMap>(Constants.BAMTempFileName); // Validate converted BAM file with expected BAM file. Assert.IsTrue(CompareSequencedAlignmentHeader(bamSeqAlignment, expextedBamAlignmentObj)); // Validate BAM file aligned sequences. Assert.IsTrue(CompareAlignedSequences(bamSeqAlignment, expextedBamAlignmentObj)); } finally { // Delete temporary file. File.Delete(Constants.BAMTempFileName); } }
/// <summary> /// For each sequence read by the parser, pass it to the metric handler. Also update GUI summary panels /// </summary> private void ProcessSequences(string filename, BAMParser parser) { double incrementOne = 1; // the progress bar will update every [incrementOne] many clusters double incrementTwo = 1; // the progress bar will update every [incrementTwo] many clusters int increaseIncrementAfter = 100; // after this many clusters, increase the increment at which we update the gui double increment; // the current increment at which progress bar will update int updateDisplayForClusterIndex = -1, clusterCount = -1; // whether we have already updated the gui for this cluster foreach (SAMAlignedSequence se in parser.ParseSequence(filename)) { if (metricHandler.Add(se)) { clusterCount = metricHandler.ClusterCount; increment = (clusterCount < increaseIncrementAfter) ? incrementOne : incrementTwo; if ((clusterCount == 1 || (double)clusterCount % increment == 0) && clusterCount != updateDisplayForClusterIndex) { updateDisplayForClusterIndex = clusterCount; Dispatcher.BeginInvoke( System.Windows.Threading.DispatcherPriority.Normal, new StatsDelegate(UpdateStatsPanel), metricHandler.GoodCount, clusterCount, metricHandler.MaxSampleCount, metricHandler.MaxAlignmentQuality, metricHandler.MaxReadQuality, metricHandler.AverageDirt, metricHandler.AverageMapQ, metricHandler.AverageReadQ, metricHandler.AverageDirtGood, metricHandler.AverageMapQGood, metricHandler.AverageReadQGood); } } else { Console.WriteLine(Properties.Resources.HANDLER_FINISHED); break; } } // Tell the handler that there are no more sequences to receive metricHandler.SetComplete(); }
/// <summary> /// Read input file header and set progress bar based on number of sequences in input file /// </summary> private void ReadHeader(string filename, BAMParser parser, int numClustersInInputFile) { using (Stream readStream = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read)) { metricHandler.InputHeader = parser.GetHeader(readStream); numClustersInInputFile = metricHandler.InputHeader.ReferenceSequences.Count; Console.WriteLine(Properties.Resources.CLUSTER_COUNT_DISPLAY + numClustersInInputFile); } Dispatcher.BeginInvoke(System.Windows.Threading.DispatcherPriority.Normal, new IntDelegate(SetProgressBar), numClustersInInputFile); }
/// <summary> /// Creates BAMIndex object from the specified BAM file and writes to specified BAMIndex file. /// </summary> /// <param name="compressedBAMStream"></param> /// <param name="indexStorage"></param> private static void CreateBAMIndexFile(Stream compressedBAMStream, BAMIndexStorage indexStorage) { var parser = new BAMParser(); BAMIndex bamIndex = parser.GetIndexFromBAMStorage(compressedBAMStream); indexStorage.Write(bamIndex); }
/// <summary> /// Create handler and initialise handler settings, then process sequences /// </summary> private System.Delegate ParseBAMMetric(string filename, string ploidy, string dirtCutoff, string ploidyDisagreement, string alignQualCutoff, string readQualCutoff, string popPercent, string hapMaxCutoff, bool? onlyHaplotypeGood, string numSamples, bool? outputToFile, bool? metricFileParent, bool? metricFileChild, bool? outputOverviewParent, bool? outputOverviewChild, bool? genotypesToFile, bool? haplotypesToFile) { using (BAMParser parser = new BAMParser()) { int numClustersInInputFile = 0; // Initialise the metric handler InitHandler(filename, ploidy, dirtCutoff, ploidyDisagreement, alignQualCutoff, readQualCutoff, popPercent, hapMaxCutoff, onlyHaplotypeGood, numSamples, outputToFile, metricFileParent, metricFileChild, outputOverviewParent, outputOverviewChild, genotypesToFile, haplotypesToFile); ReadHeader(filename, parser, numClustersInInputFile); // Begin parsing sequences Dispatcher.BeginInvoke( System.Windows.Threading.DispatcherPriority.Normal, new QuickDelegate(UpdateGui_BeganParsing)); // updates GUI to indicate that parsing has begun ProcessSequences(filename, parser); // Finished sequences for this input file Dispatcher.BeginInvoke( System.Windows.Threading.DispatcherPriority.Normal, new QuickDelegate(UpdateGui_FinishedParsing)); // Close handler and return Console.WriteLine(Properties.Resources.FINISHED); metricHandler.Dispose(); } return null; }
/// <summary> /// Validate GetPaired method /// </summary> /// <param name="nodeName">XML node name</param> /// <param name="pams">GetPairedReads method parameters</param> private void ValidatePairedReads(string nodeName, GetPairedReadParameters pams) { // Get input and output values from xml node. string bamFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedAlignedSeqFilePath = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string mean = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.MeanNode); string deviation = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DeviationValueNode); string library = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.LibraryNameNode); string pairedReadsCount = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.PairedReadsNode); string[] insertLength = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.InsertLengthNode).Split(','); string[] pairedReadType = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.PairedReadTypeNode).Split(','); SequenceAlignmentMap seqAlignment = null; IList<PairedRead> pairedReads = null; var bamParser = new BAMParser(); var parserObj = new FastAParser(); try { seqAlignment = bamParser.ParseOne<SequenceAlignmentMap>(bamFilePath); IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedAlignedSeqFilePath); switch (pams) { case GetPairedReadParameters.GetPairedReadWithParameters: pairedReads = seqAlignment.GetPairedReads(float.Parse(mean, null), float.Parse(deviation, null)); break; case GetPairedReadParameters.GetPairedReadWithLibraryName: pairedReads = seqAlignment.GetPairedReads(library); break; case GetPairedReadParameters.GetPairedReadWithCloneLibraryInfo: CloneLibraryInformation libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library); pairedReads = seqAlignment.GetPairedReads(libraryInfo); break; case GetPairedReadParameters.Default: pairedReads = seqAlignment.GetPairedReads(); break; } Assert.AreEqual(pairedReadsCount, pairedReads.Count.ToString((IFormatProvider) null)); int i = 0; foreach (PairedRead read in pairedReads) { Assert.AreEqual(insertLength[i], read.InsertLength.ToString((IFormatProvider) null)); Assert.AreEqual(pairedReadType[i], read.PairedType.ToString()); foreach (SAMAlignedSequence seq in read.Reads) { Assert.AreEqual(new string(expectedSequences.ElementAt(i).Select(a => (char) a).ToArray()), new string(seq.QuerySequence.Select(a => (char) a).ToArray())); // Log to VSTest GUI. ApplicationLog.WriteLine(string.Format(null, "BAM Parser BVT : Validated Paired read :{0} successfully", seq.QuerySequence)); } i++; } } finally { bamParser.Dispose(); } }
public BAMWindowReader(string filename) { _disposed = false; _stream = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); _parser = new BAMParser(); }
/// <summary> /// Validate formatted BAM file. /// </summary> /// <param name="nodeName">Different xml nodes used for different test cases</param> /// <param name="BAMParserPam">BAM Format method parameters</param> private void ValidateBAMFormatter(string nodeName, BAMParserParameters BAMParserPam) { // Get input and output values from xml node. string bamFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedAlignedSeqFilePath = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string alignedSeqCount = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.AlignedSeqCountNode); using (var bamParserObj = new BAMParser()) { // Parse a BAM file. var seqAlignment = bamParserObj.ParseOne<SequenceAlignmentMap>(bamFilePath); // Create a BAM formatter object. var formatterObj = new BAMFormatter(); // Write/Format aligned sequences to BAM file. switch (BAMParserPam) { case BAMParserParameters.StreamWriter: Stream stream; using (stream = new FileStream(Constants.BAMTempFileName, FileMode.Create, FileAccess.Write)) { formatterObj.Format(stream, seqAlignment); } break; case BAMParserParameters.FileName: formatterObj.Format(seqAlignment, Constants.BAMTempFileName); break; case BAMParserParameters.IndexFile: formatterObj.Format(seqAlignment, Constants.BAMTempFileName, Constants.BAMTempIndexFile); File.Exists(Constants.BAMTempIndexFile); break; default: break; } // Parse formatted BAM file and validate aligned sequences. SequenceAlignmentMap expectedSeqAlignmentMap = bamParserObj.ParseOne<SequenceAlignmentMap>(Constants.BAMTempFileName); // Validate Parsed BAM file Header record fileds. this.ValidateBAMHeaderRecords(nodeName, expectedSeqAlignmentMap); IList<SAMAlignedSequence> alignedSeqs = expectedSeqAlignmentMap.QuerySequences; Assert.AreEqual(alignedSeqCount, alignedSeqs.Count.ToString((IFormatProvider) null)); // Get expected sequences var parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedAlignedSeqFilePath); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate aligned sequences from BAM file. for (int index = 0; index < alignedSeqs.Count; index++) { Assert.AreEqual( new string(expectedSequencesList[index].Select(a => (char) a).ToArray()), new string(alignedSeqs[index].QuerySequence.Select(a => (char) a).ToArray())); // Log to VSTest GUI. ApplicationLog.WriteLine(string.Format("BAM Formatter BVT : Validated Aligned sequence :{0} successfully", alignedSeqs[index].QuerySequence)); } } } File.Delete(Constants.BAMTempFileName); File.Delete(Constants.BAMTempIndexFile); }
public void TestFormatterWithSort() { string inputFilePath = @"TestUtils\BAM\SeqAlignment.bam"; string outputFilePath1 = "output1.bam"; string outputFilePath2 = "output2.bam"; BAMParser parser = null; try { parser = new BAMParser(); BAMFormatter formatter = new BAMFormatter(); SequenceAlignmentMap alignmentMap = parser.Parse(inputFilePath); Assert.IsTrue(alignmentMap != null); Assert.AreEqual(alignmentMap.Header.GetReferenceSequencesInfoFromSQHeader().Count, 1); Assert.AreEqual(alignmentMap.Header.ReferenceSequences.Count, 1); Assert.AreEqual(alignmentMap.QuerySequences.Count, 2); formatter.CreateSortedBAMFile = true; formatter.SortType = BAMSortByFields.ChromosomeCoordinates; formatter.Format(alignmentMap, outputFilePath1); alignmentMap = parser.Parse(inputFilePath); formatter.Format(alignmentMap, outputFilePath2); Assert.IsTrue(File.Exists(outputFilePath1)); Assert.IsTrue(File.Exists(outputFilePath2)); Assert.AreEqual(true, FileCompare(outputFilePath1, outputFilePath2)); } finally { if (parser != null) parser.Dispose(); } }
/// <summary> /// Validate different paired read types /// </summary> /// <param name="nodeName">XML node name</param> /// <param name="pams">GetPairedReadTypes method parameters</param> private void ValidatePairedReadTypes(string nodeName, GetPairedReadTypeParameters pams) { // Get input and output values from xml node. string bamFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string mean = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.MeanNode); string deviation = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DeviationValueNode); string library = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.LibraryNameNode); string[] pairedReadType = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.PairedReadTypeNode).Split(','); string[] insertLength = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.InsertLengthNode).Split(','); var bamParser = new BAMParser(); SequenceAlignmentMap seqAlignmentMapObj = bamParser.ParseOne<SequenceAlignmentMap>(bamFilePath); CloneLibraryInformation libraryInfo; int i = 0; try { IList<PairedRead> pairedReads; switch (pams) { case GetPairedReadTypeParameters.PaireReadTypeUsingLibraryName: pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, null), float.Parse(deviation, null)); foreach (PairedRead read in pairedReads) { PairedReadType type = PairedRead.GetPairedReadType(read, library); Assert.AreEqual(type.ToString(), pairedReadType[i]); i++; } break; case GetPairedReadTypeParameters.PaireReadTypeUsingCloneLibraryInfo: pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, null), float.Parse(deviation, null)); libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library); foreach (PairedRead read in pairedReads) { PairedReadType type = PairedRead.GetPairedReadType(read, libraryInfo); Assert.AreEqual(type.ToString(), pairedReadType[i]); i++; } break; case GetPairedReadTypeParameters.PaireReadTypeUsingMeanAndDeviation: pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, null), float.Parse(deviation, null)); foreach (PairedRead read in pairedReads) { PairedReadType type = PairedRead.GetPairedReadType(read, float.Parse(mean, null), float.Parse(deviation, null)); Assert.AreEqual(type.ToString(), pairedReadType[i]); i++; } break; case GetPairedReadTypeParameters.PaireReadTypeUsingReadsAndLibrary: pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, null), float.Parse(deviation, null)); foreach (PairedRead read in pairedReads) { PairedReadType type = PairedRead.GetPairedReadType(read.Read1, read.Read2, library); Assert.AreEqual(type.ToString(), pairedReadType[i]); i++; } break; case GetPairedReadTypeParameters.PaireReadTypeUsingReadsAndLibraryInfo: pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, null), float.Parse(deviation, null)); libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library); foreach (PairedRead read in pairedReads) { PairedReadType type = PairedRead.GetPairedReadType(read.Read1, read.Read2, libraryInfo); Assert.AreEqual(type.ToString(), pairedReadType[i]); i++; } break; case GetPairedReadTypeParameters.GetInsertLength: pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, null), float.Parse(deviation, null)); libraryInfo = CloneLibrary.Instance.GetLibraryInformation(library); foreach (PairedRead read in pairedReads) { int length = PairedRead.GetInsertLength(read.Read1, read.Read2); Assert.AreEqual(length.ToString((IFormatProvider) null), insertLength[i]); i++; } break; } ApplicationLog.WriteLine(string.Format(null, "BAM Parser BVT : Validated Paired read Type Successfully")); } finally { bamParser.Dispose(); } }
/// <summary> /// Parse BAM and validate parsed aligned sequences and its properties. /// </summary> /// <param name="nodeName">Different xml nodes used for different test cases</param> /// <param name="BAMParserPam">BAM Parse method parameters</param> /// <param name="IsEncoding"> /// True for BAMParser ctor with encoding. /// False otherwise /// </param> private void ValidateBAMParser(string nodeName, BAMParserParameters BAMParserPam, bool IsReferenceIndex) { // Get input and output values from xml node. string bamFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedAlignedSeqFilePath = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string refIndexValue = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.RefIndexNode); string startIndexValue = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.StartIndexNode); string endIndexValue = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.EndIndexNode); string alignedSeqCount = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.AlignedSeqCountNode); string refSeqName = this.utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ChromosomeNameNode); SequenceAlignmentMap seqAlignment = null; BAMParser bamParser = null; try { bamParser = new BAMParser(); // Parse a BAM file with different parameters. switch (BAMParserPam) { case BAMParserParameters.StreamReader: using (Stream stream = new FileStream(bamFilePath, FileMode.Open, FileAccess.Read)) { seqAlignment = bamParser.ParseOne(stream); } break; case BAMParserParameters.FileName: seqAlignment = bamParser.ParseOne<SequenceAlignmentMap>(bamFilePath); break; case BAMParserParameters.ParseRangeFileName: seqAlignment = bamParser.ParseRange(bamFilePath, Convert.ToInt32(refIndexValue, null)); break; case BAMParserParameters.ParseRangeWithIndex: seqAlignment = bamParser.ParseRange(bamFilePath, Convert.ToInt32(refIndexValue, null), Convert.ToInt32(startIndexValue, null), Convert.ToInt32(endIndexValue, null)); break; case BAMParserParameters.ParseRangeUsingRefSeq: seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName); break; case BAMParserParameters.ParseRangeUsingRefSeqAndFlag: seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName); break; case BAMParserParameters.ParseRangeUsingRefSeqUsingIndex: seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName, Convert.ToInt32(startIndexValue, null), Convert.ToInt32(endIndexValue, null)); break; case BAMParserParameters.ParseRangeUsingIndexesAndFlag: seqAlignment = bamParser.ParseRange(bamFilePath, refSeqName, Convert.ToInt32(startIndexValue, null), Convert.ToInt32(endIndexValue, null)); break; } // Validate BAM Header record fileds. if (!IsReferenceIndex) { this.ValidateBAMHeaderRecords(nodeName, seqAlignment); } IList<SAMAlignedSequence> alignedSeqs = seqAlignment.QuerySequences; Assert.AreEqual(alignedSeqCount, alignedSeqs.Count.ToString((IFormatProvider) null)); // Get expected sequences var parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedAlignedSeqFilePath); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate aligned sequences from BAM file. for (int index = 0; index < alignedSeqs.Count; index++) { Assert.IsFalse(alignedSeqs[index].IsDummyRead); Assert.AreEqual( new string(expectedSequencesList[index].Select(a => (char) a).ToArray()), new string(alignedSeqs[index].QuerySequence.Select(a => (char) a).ToArray())); // Log to VSTest GUI. ApplicationLog.WriteLine(string.Format(null, "BAM Parser BVT : Validated Aligned sequence :{0} successfully", alignedSeqs[index].QuerySequence)); } } } finally { bamParser.Dispose(); } }
public void ValidateBAMParserWithDummyReads() { string bamFilePath = utilityObj.xmlUtil.GetTextValue(Constants.BAMFileWithDummyReads, Constants.FilePathNode); SequenceAlignmentMap seqAlignment = null; BAMParser bamParser = null; bamParser = new BAMParser(); seqAlignment = bamParser.ParseOne<SequenceAlignmentMap>(bamFilePath); var seq = seqAlignment.QuerySequences.First(); Assert.AreEqual("fakeref", seq.RName); Assert.AreEqual("1M", seq.CIGAR); Assert.AreEqual(10, seq.Pos); Assert.IsNull(seq.QuerySequence); var optField = seq.OptionalFields.First(); Assert.AreEqual("CT", optField.Tag); Assert.AreEqual("Z", optField.VType); Assert.AreEqual(".;ESDN;", optField.Value); Assert.IsTrue(seq.IsDummyRead); }
/// <summary> /// Initializes required parsers, formatters, input and output files based on user option. /// </summary> private void Initialize() { bamparser = new BAMParser(); bamformatter = new BAMFormatter(); bamUncompressedOutStream = null; bamCompressedOutStream = null; if (string.IsNullOrEmpty(OutputFilename)) { writer = Console.Out; } else { if (UnCompressedBAM || BAMOutput) { writer = null; if (UnCompressedBAM) { bamUncompressedOutStream = new FileStream(OutputFilename, FileMode.Create, FileAccess.ReadWrite); } else { bamCompressedOutStream = new FileStream(OutputFilename, FileMode.Create, FileAccess.ReadWrite); } } else { writer = new StreamWriter(OutputFilename); } } #region Intialize temp files long inputfileSize = (new FileInfo(InputFilePath)).Length; long unCompressedSize = inputfileSize; if (!SAMInput) { unCompressedSize = inputfileSize * 4; // as uncompressed bam file will be Aprox 4 times that of the compressed file. } long compressedSize = unCompressedSize / 4; // uncompressed file is required for both uncompressed and compressed outputs. if ((UnCompressedBAM || BAMOutput) && bamUncompressedOutStream == null) { if (HeaderOnly || (MemStreamLimit >= unCompressedSize)) { bamUncompressedOutStream = new MemoryStream(); } else { uncompressedTempfile = Path.GetTempFileName(); bamUncompressedOutStream = new FileStream(uncompressedTempfile, FileMode.Open, FileAccess.ReadWrite); } } if (BAMOutput && !UnCompressedBAM && bamCompressedOutStream == null) { if (HeaderOnly || (MemStreamLimit >= compressedSize)) { bamCompressedOutStream = new MemoryStream((int)(inputfileSize)); } else { compressedTempfile = Path.GetTempFileName(); bamCompressedOutStream = new FileStream(compressedTempfile, FileMode.Open, FileAccess.ReadWrite); } } #endregion Intialize temp files }
public void ValidateSortByChromosomeNameAndCoordinates() { using (var parser = new BAMParser()) { string bamFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeBAMFileNode, Constants.FilePathNode); Assert.IsNotNull(bamFilePath); SequenceAlignmentMap seqAlignment = parser.ParseOne<SequenceAlignmentMap>(bamFilePath); this.ValidateSort(seqAlignment, BAMSortByFields.ChromosomeNameAndCoordinates); } }
public void ValidateSortByReadNames() { using (var parser = new BAMParser()) { string bamFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeBAMFileNode, Constants.FilePathNode); SequenceAlignmentMap seqAlignment = parser.ParseOne<SequenceAlignmentMap>(bamFilePath); this.ValidateSort(seqAlignment, BAMSortByFields.ReadNames); } }
public void ValidateGetInsertLengthWithValidate() { // Get input and output values from xml node. string bamFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.PairedReadTypesForLibraryInfoNode, Constants.FilePathNode); string mean = this.utilityObj.xmlUtil.GetTextValue( Constants.PairedReadTypesForLibraryInfoNode, Constants.MeanNode); string deviation = this.utilityObj.xmlUtil.GetTextValue( Constants.PairedReadTypesForLibraryInfoNode, Constants.DeviationValueNode); string[] insertLength = this.utilityObj.xmlUtil.GetTextValue( Constants.PairedReadTypesForLibraryInfoNode, Constants.InsertLengthNode).Split(','); using (var bamParser = new BAMParser()) { SequenceAlignmentMap seqAlignmentMapObj = bamParser.ParseOne<SequenceAlignmentMap>(bamFilePath); int i = 0; try { IList<PairedRead> pairedReads = seqAlignmentMapObj.GetPairedReads(float.Parse(mean, null), float.Parse(deviation, null)); foreach (PairedRead read in pairedReads) { //pass true for validate parameter int length = PairedRead.GetInsertLength(read.Read1, read.Read2, true); Assert.AreEqual(length.ToString((IFormatProvider) null), insertLength[i]); i++; } ApplicationLog.WriteLine(string.Format(null, "BAM Parser BVT : Validated GetInsertLength Successfully")); } finally { bamParser.Dispose(); } } }
/// <summary> /// Displays pending data and closes all streams. /// /// </summary> private void Close() { if (writer != null) { writer.Close(); } if (bamCompressedOutStream != null) { bamCompressedOutStream.Close(); bamCompressedOutStream = null; } if (bamUncompressedOutStream != null) { bamUncompressedOutStream.Close(); bamUncompressedOutStream = null; } if (string.IsNullOrEmpty(uncompressedTempfile) && File.Exists(uncompressedTempfile)) { File.Delete(uncompressedTempfile); } if (string.IsNullOrEmpty(compressedTempfile) && File.Exists(compressedTempfile)) { File.Delete(compressedTempfile); } bamformatter = null; if (bamparser != null) { bamparser.Dispose(); bamparser = null; } }
public void ValidateErrorIndexingUnsortedBAM() { //samtools throws the following error, we should do the same //[bam_index_core] the alignment is not sorted (H0KTMADXX130517:2:1111:17648:28366): 12370 > 12324 in 25-th chr // Get filepath from xml config file. string bamFilePath = this.utilityObj.xmlUtil.GetTextValue( Constants.BAMUnsortedFilePath, Constants.FilePathNode); using (Stream bamStream = new FileStream(bamFilePath, FileMode.Open, FileAccess.Read)) { BAMParser parser = new BAMParser(); BAMIndex bamIndex; try { bamIndex = parser.GetIndexFromBAMStorage(bamStream); Assert.Fail(); } catch(InvalidDataException) { } catch(Exception) { Assert.Fail(); } finally { parser.Dispose(); } } // Log message to VSTest GUI. ApplicationLog.WriteLine(string.Format(null, "BAM Parser BVT : Validated error is thrown on indexing unsorted file")); }
/// <summary> /// Parses SAM/BAm file based on input file. /// </summary> private void PerformParse() { string samExtension = ".sam"; string bamExtension = ".bam"; if (Helper.IsBAM(InputFilename)) { BAMParser parser = new BAMParser(); try { _sequenceAlignmentMap = parser.ParseOne<SequenceAlignmentMap>(InputFilename); } catch (Exception ex) { throw new InvalidOperationException(Resources.InvalidBAMFile, ex); } if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = InputFilename + samExtension; } } else { SAMParser parser = new SAMParser(); try { _sequenceAlignmentMap = parser.ParseOne<SequenceAlignmentMap>(InputFilename); } catch (Exception ex) { throw new InvalidOperationException(Resources.InvalidSAMFile, ex); } _isSAM = true; if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = InputFilename + bamExtension; } } }
private string getSortedOutput(IList<BAMSortedIndex> sortedIndex) { using (var parser = new BAMParser()) { BAMSortedIndex index = sortedIndex.ElementAt(0); IEnumerator<int> sortedIndexList = index.GetEnumerator(); var temp = new StringBuilder(); temp.Append(sortedIndexList.Current.ToString((IFormatProvider) null)); while (sortedIndexList.MoveNext()) { temp.Append("|" + sortedIndexList.Current.ToString((IFormatProvider) null)); } Assert.IsNotNull(temp); Assert.AreNotEqual(0, temp.Length); sortedIndexList.Dispose(); return temp.ToString(); } }