public void ValidateSAMParserHeader() { string filePath = utilityObj.xmlUtil.GetTextValue( Constants.SmallSAMFileNode, Constants.FilePathNode); string[] expectedHeaderTagValues = utilityObj.xmlUtil.GetTextValue( Constants.SmallSAMFileNode, Constants.RecordTagValuesNode).Split(','); string[] expectedHeaderTagKeys = utilityObj.xmlUtil.GetTextValue( Constants.SmallSAMFileNode, Constants.RecordTagKeysNode).Split(','); string[] expectedHeaderTypes = utilityObj.xmlUtil.GetTextValue( Constants.SmallSAMFileNode, Constants.HeaderTyepsNodes).Split(','); SAMAlignmentHeader aligntHeader = SAMParser.ParseSAMHeader(filePath); int tagKeysCount = 0; int tagValuesCount = 0; for (int index = 0; index < aligntHeader.RecordFields.Count; index++) { Assert.AreEqual(expectedHeaderTypes[index].Replace("/", ""), aligntHeader.RecordFields[index].Typecode.ToString((IFormatProvider)null).Replace("/", "")); for (int tags = 0; tags < aligntHeader.RecordFields[index].Tags.Count; tags++) { Assert.AreEqual( expectedHeaderTagKeys[tagKeysCount].Replace("/", ""), aligntHeader.RecordFields[index].Tags[tags].Tag.ToString((IFormatProvider)null).Replace("/", "")); Assert.AreEqual( expectedHeaderTagValues[tagValuesCount].Replace("/", ""), aligntHeader.RecordFields[index].Tags[tags].Value.ToString((IFormatProvider)null).Replace("/", "").Replace("\r", "").Replace("\n", "")); tagKeysCount++; tagValuesCount++; } } }
/// <summary> /// Gets Aligned seqeunces in the Specified SAM file. /// </summary> /// <param name="textReader">SAM file stream.</param> private IEnumerable <SAMAlignedSequence> GetAlignedSequence(TextReader textReader) { bool isFilterRequired = IsFilterApplied(); bool display = true; //Displays SAM as output. string line = ReadNextLine(textReader); while (line != null) { // Ignore headers. if (!line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { SAMAlignedSequence alignedSequence = SAMParser.ParseSequence(line); if (isFilterRequired) { display = Filter(alignedSequence); } if (display) { yield return(alignedSequence); } } line = ReadNextLine(textReader); } }
/// <summary> /// Parse SAM or BAM file based on user input. /// </summary> private void DoParse() { if (!SAMInput) { BAMParser parse = new BAMParser(); parse.EnforceDataVirtualization = true; try { _seqAlignmentMap = parse.Parse(InputFilePath); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } } else { SAMParser parse = new SAMParser(); parse.EnforceDataVirtualization = true; try { _seqAlignmentMap = parse.Parse(InputFilePath); } catch { throw new InvalidOperationException(Resources.InvalidSAMFile); } } }
public void TestFormatter() { string filePath = @"TestUtils\SAM\SeqAlignment1.sam"; string outputfilePath = "samtest.sam"; ISequenceAlignmentParser parser = new SAMParser(); IList<ISequenceAlignment> alignments = parser.Parse(filePath).ToList(); Assert.IsTrue(alignments != null); Assert.AreEqual(alignments.Count, 1); Assert.AreEqual(alignments[0].AlignedSequences.Count, 2); try { SAMFormatter formatter = new SAMFormatter(); formatter.Format(alignments[0], outputfilePath); alignments = parser.Parse(outputfilePath).ToList(); Assert.IsTrue(alignments != null); Assert.AreEqual(alignments.Count, 1); Assert.AreEqual(alignments[0].AlignedSequences.Count, 2); } finally { File.Delete(outputfilePath); } }
/// <summary> /// General method to Invalidate Quality Sequences /// <param name="method">enum type to execute different overload</param> /// </summary> private static void ValidateQualitySeqLength(ParseOrFormatQualLength method) { SAMAlignedSequence align = new SAMAlignedSequence(); try { switch (method) { case ParseOrFormatQualLength.AlignedSeq: SAMParser.ParseQualityNSequence( align, Alphabets.DNA, null, String.Empty); break; case ParseOrFormatQualLength.Sequencedata: align.QName = "Quality Value"; SAMParser.ParseQualityNSequence( align, Alphabets.DNA, null, String.Empty); break; case ParseOrFormatQualLength.Qualitydata: align.QName = "Quality Value"; SAMParser.ParseQualityNSequence( align, Alphabets.DNA, null, Constants.QualitySequence); break; case ParseOrFormatQualLength.QualityLength: align.QName = "Quality Value"; SAMParser.ParseQualityNSequence( align, Alphabets.DNA, null, Constants.QualitySequence); break; default: break; } Assert.Fail(); } catch (ArgumentException) { ApplicationLog.WriteLine( "SAM Parser P2 : Successfully validated the exception"); } catch (FormatException) { ApplicationLog.WriteLine( "SAM Parser P2 : Successfully validated the exception"); } }
public void ValidateVirtualSAMAlignedSequenceListContains() { // Get values from XML node. string filePath = _utilityObj._xmlUtil.GetTextValue(Constants.SAMFileWithAllFieldsNode, Constants.FilePathNode1); // Parse a SAM file. using (SAMParser samParserObj = new SAMParser()) { samParserObj.EnforceDataVirtualization = true; SequenceAlignmentMap alignedSeqList = samParserObj.Parse(filePath); IList <SAMAlignedSequence> samAlignedList = alignedSeqList.QuerySequences; VirtualAlignedSequenceList <SAMAlignedSequence> virtualASeqList = GetSAMAlignedSequence(Constants.SAMFileWithAllFieldsNode); // Validate contains. Assert.IsTrue(virtualASeqList.Contains(virtualASeqList.First( Q => Q.QuerySequence.ToString().Equals( samAlignedList[0].QuerySequence.ToString())))); // Log to Nunit GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Virtual AlignedSequenceList P1 : VAS {0} is present in the virtualAlignedSequence List", virtualASeqList[10])); Console.WriteLine(string.Format((IFormatProvider)null, "Virtual AlignedSequenceList P1 : VAS {0} is present in the virtualAlignedSequence List", virtualASeqList[10])); } }
public void TestFormatter() { string filePath = @"TestUtils\SAM\SeqAlignment1.sam".TestDir(); string outputfilePath = "samtest.sam"; ISequenceAlignmentParser parser = new SAMParser(); IList <ISequenceAlignment> alignments = parser.Parse(filePath).ToList(); Assert.IsTrue(alignments != null); Assert.AreEqual(alignments.Count, 1); Assert.AreEqual(alignments[0].AlignedSequences.Count, 2); try { SAMFormatter formatter = new SAMFormatter(); formatter.Format(alignments[0], outputfilePath); alignments = parser.Parse(outputfilePath).ToList(); Assert.IsTrue(alignments != null); Assert.AreEqual(alignments.Count, 1); Assert.AreEqual(alignments[0].AlignedSequences.Count, 2); } finally { File.Delete(outputfilePath); } }
/// <summary> /// General method to validate SAM parser method. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParser(string nodeName) { // Gets the expected sequence from the Xml string filePath = Utility._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = Utility._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); SAMParser parser = new SAMParser(); SequenceAlignmentMap alignments = null; // Parse SAM File using (TextReader reader = new StreamReader(filePath)) { alignments = parser.Parse(reader); } // Get expected sequences FastaParser parserObj = new FastaParser(); IList <ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); // Validate parsed output with expected output for (int index = 0; index < alignments.QuerySequences.Count; index++) { for (int count = 0; count < alignments.QuerySequences[index].Sequences.Count; count++) { Assert.AreEqual(expectedSequences[index].ToString(), alignments.QuerySequences[index].Sequences[count].ToString()); } } }
/// <summary> /// Get chromoses with orphan regions /// </summary> /// <param name="filename">Path of the BAM file</param> /// <param name="mean">Mean value</param> /// <param name="deviation">Standard deviation</param> /// <returns></returns> private void DisplayOrphans(string filename) { SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList<PairedRead> pairedReads = null; // Get Aligned sequences IList<SAMAlignedSequence> alignedSeqs = alignmentMapobj.QuerySequences; pairedReads = alignmentMapobj.GetPairedReads(0, 0); // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); int count = orphans.Count(); if (count == 0) { Console.WriteLine("No Orphans to display"); } var orphanRegions = new List<ISequenceRange>(count); orphanRegions.AddRange(orphans.Select(orphanRead => GetRegion(orphanRead.Read1))); // Get sequence range grouping object. SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions); if (!rangeGroup.GroupIDs.Any()) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("Region of Orphan reads:"); DisplaySequenceRange(rangeGroup); } SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps(); if (!mergedRegions.GroupIDs.Any()) { Console.Write("\r\nNo hot spots to display"); } else { Console.Write("\r\nChromosomal hot spot:"); DisplaySequenceRange(mergedRegions); } }
public void ValidateSAMFormatterWithTextWriterAndAlignments() { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( Constants.SmallSAMFileNode, Constants.FilePathNode); ISequenceAlignmentParser parser = new SAMParser(); try { IList <ISequenceAlignment> alignments = parser.Parse(filePath); SAMFormatter formatter = new SAMFormatter(); try { using (TextWriter writer = new StreamWriter(Constants.SAMTempFileName)) { formatter.Format(alignments, writer); } Assert.Fail(); } catch (NotSupportedException) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SAM Parser BVT : Validated the exception successfully")); Console.WriteLine(string.Format((IFormatProvider)null, "SAM Parser BVT : Validated the exception successfully")); } } finally { (parser as SAMParser).Dispose(); } }
/// <summary> /// Validate formatter all format method overloads with filePath\textwriter /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="formatTypes">enum type to execute different overload</param> void ValidateSAMFormatter(string nodeName, ParseOrFormatTypes formatTypes) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); try { IList <ISequenceAlignment> alignments = parser.Parse(filePath); SAMFormatter formatter = new SAMFormatter(); switch (formatTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (TextWriter writer = new StreamWriter(Constants.SAMTempFileName)) { formatter.Format(alignments[0], writer); } break; case ParseOrFormatTypes.ParseOrFormatFileName: formatter.Format(alignments[0], Constants.SAMTempFileName); break; } alignments = parser.Parse(Constants.SAMTempFileName); // Get expected sequences using (FastAParser parserObj = new FastAParser(expectedSequenceFile)) { IEnumerable <ISequence> expectedSequences = parserObj.Parse(); IList <ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignments[index].AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); count++; } } } } } finally { (parser as SAMParser).Dispose(); } }
public void TestSNPDetectionUsingSAMFile() { using (SAMParser parser = new SAMParser()) { SequenceAlignmentMap map = parser.Parse(@"TestUtils\SAM\PairedReadsTest.sam"); TestCoverage("chr1", map, "true"); } }
public void ValidateSAMParserWithEmptyAlignmentMap() { SAMParser parser = new SAMParser(); { SequenceAlignmentMap alignment = parser.ParseOne<SequenceAlignmentMap>(utilityObj.xmlUtil.GetTextValue(Constants.EmptySamFileNode, Constants.FilePathNode)); Assert.IsNotNull(alignment); } }
public void ValidateSAMParserWithEmptyAlignmentMap() { SAMParser parser = new SAMParser(); { SequenceAlignmentMap alignment = parser.ParseOne <SequenceAlignmentMap>(utilityObj.xmlUtil.GetTextValue(Constants.EmptySamFileNode, Constants.FilePathNode)); Assert.IsNotNull(alignment); } }
/// <summary> /// Validate parser parse method overloads with filePath\textreader /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParser(string nodeName, ParseOrFormatTypes parseTypes) { // Gets the expected sequence from the Xml string filePath = Utility._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = Utility._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); IList <ISequenceAlignment> alignments = null; // Parse SAM File switch (parseTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (TextReader reader = new StreamReader(filePath)) { alignments = parser.Parse(reader); } break; case ParseOrFormatTypes.ParseOrFormatTextWithFlag: using (TextReader reader = new StreamReader(filePath)) { alignments = parser.Parse(reader, true); } break; case ParseOrFormatTypes.ParseOrFormatFileName: alignments = parser.Parse(filePath); break; case ParseOrFormatTypes.ParseOrFormatFileNameWithFlag: alignments = parser.Parse(filePath, true); break; } // Get expected sequences FastaParser parserObj = new FastaParser(); IList <ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(expectedSequences[count].ToString(), alignments[index].AlignedSequences[ialigned].Sequences[iseq].ToString()); count++; } } } }
public void TestParser() { string filePath = @"TestUtils\SAM\SeqAlignment1.sam"; ISequenceAlignmentParser parser = new SAMParser(); IList<ISequenceAlignment> alignments = parser.Parse(filePath).ToList(); Assert.IsTrue(alignments != null); Assert.AreEqual(alignments.Count, 1); Assert.AreEqual(alignments[0].AlignedSequences.Count, 2); }
/// <summary> /// General method to validate SAM Formatter method. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMFormatterSeqAlign( string nodeName, ParseOrFormatTypes parseTypes) { // Gets the expected sequence from the Xml string filePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); using (SAMParser parser = new SAMParser()) { SequenceAlignmentMap alignments = parser.Parse(filePath); SAMFormatter formatter = new SAMFormatter(); switch (parseTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (TextWriter writer = new StreamWriter(Constants.SAMTempFileName)) { formatter.Format(alignments, writer); } break; case ParseOrFormatTypes.ParseOrFormatFileName: formatter.Format(alignments, Constants.SAMTempFileName); break; case ParseOrFormatTypes.ParseOrFormatFileNameWithFlag: formatter.Format(alignments, Constants.SAMTempFileName); break; } alignments = parser.Parse(Constants.SAMTempFileName); // Get expected sequences using (FastaParser parserObj = new FastaParser()) { IList <ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); // Validate parsed output with expected output for (int index = 0; index < alignments.QuerySequences.Count; index++) { for (int count = 0; count < alignments.QuerySequences[index].Sequences.Count; count++) { Assert.AreEqual(expectedSequences[index].ToString(), alignments.QuerySequences[index].Sequences[count].ToString()); } } } } }
public void ValidateSAMProperties() { using (SAMParser parser = new SAMParser()) { Assert.AreEqual(Constants.SAMParserDescription, parser.Description); Assert.AreEqual(Constants.SAMFileType, parser.SupportedFileTypes); Assert.AreEqual(Constants.SAMName, parser.Name); } ApplicationLog.WriteLine("Successfully validated all the properties of SAM Parser class."); }
public void ValidateSAMParserWithEmptyAlignmentMap() { SAMParser parser = new SAMParser(); SequenceAlignmentMap alignment = parser.Parse(Utility._xmlUtil.GetTextValue( Constants.EmptySamFileNode, Constants.FilePathNode)); Assert.AreEqual(null, alignment); }
public void TestOrphanRegionssUsingSAMFile() { string samFilename = @"TestUtils\SAM\PairedReadsTest.sam"; using (SAMParser bamParser = new SAMParser()) { SequenceAlignmentMap alignmentMapobj = bamParser.Parse(samFilename); TestOrphanRegions(alignmentMapobj); } }
public void TestLengthAnomaliesUsingSAMfile() { string samFilename = @"TestUtils\SAM\PairedReadsTest.sam"; using (SAMParser bamParser = new SAMParser()) { SequenceAlignmentMap alignmentMapobj = bamParser.Parse(samFilename); TestLengthAnomalies(alignmentMapobj); } }
public void TestParser() { string filePath = @"TestData\SAM\SeqAlignment1.sam"; ISequenceAlignmentParser parser = new SAMParser(); IList <ISequenceAlignment> alignments = parser.Parse(filePath); Assert.IsTrue(alignments != null); Assert.AreEqual(alignments.Count, 1); Assert.AreEqual(alignments[0].AlignedSequences.Count, 2); }
/// <summary> /// Validate parser parse one method overloads with filePath\textreader /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParserWithParseOne(string nodeName, ParseOrFormatTypes parseTypes) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); try { ISequenceAlignment alignment = null; // Parse SAM File switch (parseTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (TextReader reader = new StreamReader(filePath)) { alignment = parser.ParseOne(reader); } break; case ParseOrFormatTypes.ParseOrFormatFileName: alignment = parser.ParseOne(filePath); break; } // Get expected sequences using (FastAParser parserObj = new FastAParser(expectedSequenceFile)) { IEnumerable <ISequence> expectedSequences = parserObj.Parse(); IList <ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; for (int ialigned = 0; ialigned < alignment.AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignment.AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignment.AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); count++; } } } } finally { (parser as SAMParser).Dispose(); } }
public void TestALLtypePairedReadsInSAMFile() { string samfilePath = @"TestUtils\SAM\PairedReadsTest.sam"; using (SAMParser parser = new SAMParser()) { SequenceAlignmentMap map = parser.Parse(samfilePath); int TotalReadsCount = 77; int unpairedReadsCount = 2; int multipleHitsPairCount = 2; int multipleHitReadsCount = 8; int normalPairCount = 4; int normalreadsCount = 2 * normalPairCount; int orphanPairsCount = 3; int orphanreadsCount = 5; int chimerapaircount = 15; int chimerareadsCount = 2 * chimerapaircount; int strucAnomPairCount = 3; int strucAnomReadsCount = 2 * strucAnomPairCount; int lenAnomPairCount = 9; int LenAnomReadsCount = 2 * lenAnomPairCount; int total = unpairedReadsCount + multipleHitReadsCount + normalreadsCount + orphanreadsCount + chimerareadsCount + strucAnomReadsCount + LenAnomReadsCount; Assert.AreEqual(TotalReadsCount, total); IList <PairedRead> reads = map.GetPairedReads(200, 50); IList <PairedRead> multipleHits = reads.Where(PE => PE.PairedType == PairedReadType.MultipleHits).ToList(); IList <PairedRead> normal = reads.Where(PE => PE.PairedType == PairedReadType.Normal).ToList(); IList <PairedRead> orphan = reads.Where(PE => PE.PairedType == PairedReadType.Orphan).ToList(); IList <PairedRead> chimera = reads.Where(PE => PE.PairedType == PairedReadType.Chimera).ToList(); IList <PairedRead> strucAnom = reads.Where(PE => PE.PairedType == PairedReadType.StructuralAnomaly).ToList(); IList <PairedRead> lenAnom = reads.Where(PE => PE.PairedType == PairedReadType.LengthAnomaly).ToList(); Assert.AreEqual(TotalReadsCount, map.QuerySequences.Count); Assert.AreEqual(multipleHitsPairCount, multipleHits.Count()); Assert.AreEqual(multipleHitReadsCount, multipleHits.Sum(PE => PE.Reads.Count)); Assert.AreEqual(normalPairCount, normal.Count()); Assert.AreEqual(normalreadsCount, normal.Sum(PE => PE.Reads.Count)); Assert.AreEqual(orphanPairsCount, orphan.Count()); Assert.AreEqual(orphanreadsCount, orphan.Sum(PE => PE.Reads.Count)); Assert.AreEqual(chimerapaircount, chimera.Count()); Assert.AreEqual(chimerareadsCount, chimera.Sum(PE => PE.Reads.Count)); Assert.AreEqual(strucAnomPairCount, strucAnom.Count()); Assert.AreEqual(strucAnomReadsCount, strucAnom.Sum(PE => PE.Reads.Count)); Assert.AreEqual(lenAnomPairCount, lenAnom.Count()); Assert.AreEqual(LenAnomReadsCount, lenAnom.Sum(PE => PE.Reads.Count)); } }
/// <summary> /// Parses the BAM file and returns the Header. /// </summary> private SAMAlignmentHeader GetHeader() { var header = new SAMAlignmentHeader(); RefSeqNames = new RegexValidatedStringList(SAMAlignedSequenceHeader.RNameRegxExprPattern); _refSeqLengths = new List <int>(); ReadStream.Seek(0, SeekOrigin.Begin); _deCompressedStream = null; var array = new byte[8]; ReadUnCompressedData(array, 0, 8); var lText = Helper.GetInt32(array, 4); var samHeaderData = new byte[lText]; if (lText != 0) { ReadUnCompressedData(samHeaderData, 0, lText); } ReadUnCompressedData(array, 0, 4); var noofRefSeqs = Helper.GetInt32(array, 0); for (var i = 0; i < noofRefSeqs; i++) { ReadUnCompressedData(array, 0, 4); var len = Helper.GetInt32(array, 0); var refName = new byte[len]; ReadUnCompressedData(refName, 0, len); ReadUnCompressedData(array, 0, 4); var refLen = Helper.GetInt32(array, 0); RefSeqNames.Add(Encoding.ASCII.GetString(refName, 0, refName.Length - 1)); _refSeqLengths.Add(refLen); } if (samHeaderData.Length != 0) { var str = Encoding.ASCII.GetString(samHeaderData); using (var reader = new StringReader(str)) { header = SAMParser.ParseSAMHeader(reader); } } header.ReferenceSequences.Clear(); for (var i = 0; i < RefSeqNames.Count; i++) { var refname = RefSeqNames[i]; var length = _refSeqLengths[i]; header.ReferenceSequences.Add(new ReferenceSequenceInfo(refname, length)); } return(header); }
/// <summary> /// General method to validate SAM parser method. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParserSeqAlign( string nodeName, ParseOrFormatTypes method) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); using (SAMParser parser = new SAMParser()) { SequenceAlignmentMap alignments = null; // Parse SAM File switch (method) { case ParseOrFormatTypes.ParseOrFormatText: using (TextReader reader = new StreamReader(filePath)) { alignments = parser.Parse(reader); } break; case ParseOrFormatTypes.ParseOrFormatFileName: alignments = parser.Parse(filePath); break; } // Get expected sequences using (FastAParser parserObj = new FastAParser(expectedSequenceFile)) { IEnumerable <ISequence> expectedSequences = parserObj.Parse(); IList <ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output for (int index = 0; index < alignments.QuerySequences.Count; index++) { for (int count = 0; count < alignments.QuerySequences[index].Sequences.Count; count++) { Assert.AreEqual(new string(expectedSequencesList[index].Select(a => (char)a).ToArray()), new string(alignments.QuerySequences[index].Sequences[count].Select(a => (char)a).ToArray())); } } } } }
/// <summary> /// General method to validate SAM Formatter method. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMFormatterSeqAlign( string nodeName, ParseOrFormatTypes parseTypes) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode).TestDir(); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence).TestDir(); SAMParser parser = new SAMParser(); { SequenceAlignmentMap alignments = parser.ParseOne <SequenceAlignmentMap>(filePath); SAMFormatter formatter = new SAMFormatter(); switch (parseTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (var writer = File.Create(Constants.SAMTempFileName)) { formatter.Format(writer, alignments); } break; case ParseOrFormatTypes.ParseOrFormatFileName: formatter.Format(alignments, Constants.SAMTempFileName); break; } alignments = parser.ParseOne <SequenceAlignmentMap>(Constants.SAMTempFileName); // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable <ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList <ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output for (int index = 0; index < alignments.QuerySequences.Count; index++) { for (int count = 0; count < alignments.QuerySequences[index].Sequences.Count; count++) { Assert.AreEqual(new string(expectedSequencesList[index].Select(a => (char)a).ToArray()), new string(alignments.QuerySequences[index].Sequences[count].Select(a => (char)a).ToArray())); } } } } }
/// <summary> /// Validate parser and formatter by parsing the same file which contains /// extended CIGAR string. Validate the CIGAR property in aligned sequence /// metadata information is updated as expected. /// </summary> /// <param name="nodeName">xml node name</param> void ValidateSAMParseAndFormatWithCIGARFormat(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string expectedCIGARString = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CIGARNode); // Create parser using encoding ISequenceAlignmentParser parser = new SAMParser(); try { IList <ISequenceAlignment> alignments = parser.Parse(filePath); // Get expected sequences using (FastAParser parserObj = new FastAParser(expectedSequenceFile)) { IEnumerable <ISequence> expectedSequences = parserObj.Parse(); IList <ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignments[index].AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); foreach (string key in alignments[index].AlignedSequences[ialigned].Metadata.Keys) { SAMAlignedSequenceHeader header = (SAMAlignedSequenceHeader) alignments[index].AlignedSequences[ialigned].Metadata[key]; Assert.AreEqual(expectedCIGARString, header.CIGAR); } count++; } } } } } finally { (parser as SAMParser).Dispose(); } }
public void ValidateSAMFormatterFormatString() { string filePath = Utility._xmlUtil.GetTextValue( Constants.SamFormatterFileNode, Constants.FilePathNode); ISequenceAlignmentParser parser = new SAMParser(); IList <ISequenceAlignment> alignment = parser.Parse(filePath); SAMFormatter formatter = new SAMFormatter(); string writer = formatter.FormatString(alignment[0]); Assert.AreEqual(writer, Constants.FormatterString); }
public void ValidateSAMFormatterFormatString() { string filePath = utilityObj.xmlUtil.GetTextValue( Constants.SamFormatterFileNode, Constants.FilePathNode).TestDir(); ISequenceAlignmentParser parser = new SAMParser(); IList <ISequenceAlignment> alignment = parser.Parse(filePath).ToList(); SAMFormatter formatter = new SAMFormatter(); string writer = formatter.FormatString(alignment[0]); Assert.AreEqual(writer, Constants.FormatterString.Replace("\r\n", Environment.NewLine)); }
public void SAMProperties() { ISequenceAlignmentParser parser = new SAMParser(); Assert.AreEqual(parser.Name, Properties.Resource.SAM_NAME); Assert.AreEqual(parser.Description, Properties.Resource.SAMPARSER_DESCRIPTION); Assert.AreEqual(parser.SupportedFileTypes, Properties.Resource.SAM_FILEEXTENSION); ISequenceAlignmentFormatter formatter = new SAMFormatter(); Assert.AreEqual(formatter.Name, Properties.Resource.SAM_NAME); Assert.AreEqual(formatter.Description, Properties.Resource.SAMFORMATTER_DESCRIPTION); Assert.AreEqual(formatter.SupportedFileTypes, Properties.Resource.SAM_FILEEXTENSION); }
public void ValidateSAMToBAMConversionWithDVEnabled() { // Get values from xml config file. string expectedBamFilePath = _utilityObj._xmlUtil.GetTextValue( Constants.BAMToSAMConversionNode, Constants.FilePathNode); string samFilePath = _utilityObj._xmlUtil.GetTextValue( Constants.BAMToSAMConversionNode, Constants.FilePathNode1); using (BAMParser bamParserObj = new BAMParser()) { using (SAMParser samParserObj = new SAMParser()) { BAMFormatter bamFormatterObj = new BAMFormatter(); SequenceAlignmentMap samSeqAlignment = null; SequenceAlignmentMap bamSeqAlignment = null; // Enforce DV samParserObj.EnforceDataVirtualization = true; // Parse expected BAM file. SequenceAlignmentMap expextedBamAlignmentObj = bamParserObj.Parse( expectedBamFilePath); // Parse a SAM file. samSeqAlignment = samParserObj.Parse(samFilePath); // Format SAM sequenceAlignment object to BAM file. bamFormatterObj.Format(samSeqAlignment, Constants.BAMTempFileName); // Parse a formatted BAM file. bamSeqAlignment = bamParserObj.Parse(Constants.BAMTempFileName); // Validate converted BAM file with expected BAM file. Assert.IsTrue(CompareSequencedAlignmentHeader(bamSeqAlignment, expextedBamAlignmentObj)); // Validate BAM file aligned sequences. Assert.IsTrue(CompareAlignedSequences(bamSeqAlignment, expextedBamAlignmentObj)); // Log message to NUnit GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SAM Parser BVT : Validated the SAM->BAM conversion successfully")); Console.WriteLine(string.Format((IFormatProvider)null, "SAM Parser BVT : Validated the SAM->BAM conversion successfully")); // Delete temporary file. File.Delete(Constants.BAMTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } } }
public void SAMProperties() { ISequenceAlignmentParser parser = new SAMParser(); Assert.AreEqual(parser.Name, Properties.Resource.SAM_NAME); Assert.AreEqual(parser.Description, Properties.Resource.SAMPARSER_DESCRIPTION); Assert.AreEqual(parser.FileTypes, Properties.Resource.SAM_FILEEXTENSION); ISequenceAlignmentFormatter formatter = new SAMFormatter(); Assert.AreEqual(formatter.Name, Properties.Resource.SAM_NAME); Assert.AreEqual(formatter.Description, Properties.Resource.SAMFORMATTER_DESCRIPTION); Assert.AreEqual(formatter.FileTypes, Properties.Resource.SAM_FILEEXTENSION); }
/// <summary> /// Validate parser and formatter by parsing the sam file with quality values /// </summary> /// <param name="nodeName">xml node name</param> void ValidateSAMParseAndFormatWithQualityValues(string nodeName) { // Gets the expected sequence from the Xml string filePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string scoreCount = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ScoresCount); // Create parser using encoding ISequenceAlignmentParser parser = new SAMParser(); try { IList <ISequenceAlignment> alignments = parser.Parse(filePath); // Get expected sequences using (FastaParser parserObj = new FastaParser()) { IList <ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.IsInstanceOfType(alignments[index].AlignedSequences[ialigned].Sequences[iseq], typeof(QualitativeSequence)); QualitativeSequence qualSequence = (QualitativeSequence)alignments[index].AlignedSequences[ialigned].Sequences[iseq]; Assert.AreEqual(scoreCount, qualSequence.Scores.Length.ToString((IFormatProvider)null)); Assert.AreEqual(expectedSequences[count].ToString(), qualSequence.ToString()); count++; } } } } } finally { (parser as SAMParser).Dispose(); } }
/// <summary> /// Validate parser and formatter by parsing the sam file with quality values /// </summary> /// <param name="nodeName">xml node name</param> void ValidateSAMParseAndFormatWithQualityValues(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); // Create parser using encoding ISequenceAlignmentParser parser = new SAMParser(); try { IList <ISequenceAlignment> alignments = parser.Parse(filePath); // Get expected sequences using (FastAParser parserObj = new FastAParser(expectedSequenceFile)) { IEnumerable <ISequence> expectedSequences = parserObj.Parse(); IList <ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.IsInstanceOfType(alignments[index].AlignedSequences[ialigned].Sequences[iseq], typeof(QualitativeSequence)); QualitativeSequence qualSequence = (QualitativeSequence)alignments[index].AlignedSequences[ialigned].Sequences[iseq]; Assert.AreEqual( new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(qualSequence.Select(a => (char)a).ToArray())); count++; } } } } } finally { (parser as SAMParser).Dispose(); } }
/// <summary> /// Get Chimera data /// </summary> /// <param name="filename">Path of the BAM file</param> /// <param name="mean">Mean value</param> /// <param name="deviation">Standard deviation</param> /// <returns></returns> private Matrix<string, string, string> GetChimeraData(string filename) { SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList<PairedRead> pairedReads = null; pairedReads = alignmentMapobj.GetPairedReads(200, 50); // select chimeras from reads. var chimeras = pairedReads.Where(PE => PE.PairedType == PairedReadType.Chimera); // Group chimeras based on first reads chromosomes name. var groupedChimeras = chimeras.GroupBy(PR => PR.Read1.RName); IList<string> chrs = alignmentMapobj.GetRefSequences(); // Declare sparse matrix to store statistics. SparseMatrix<string, string, string> statistics = SparseMatrix<string, string, string>.CreateEmptyInstance( chrs, chrs, "0"); // For each group create sub group depending on the second reads chromosomes. foreach (var group in groupedChimeras) { foreach (var subgroup in group.GroupBy(PE => PE.Read2.RName)) { // store the count to stats statistics[group.Key, subgroup.Key] = subgroup.Count().ToString(); } } return statistics; }
/// <summary> /// Display Sequence Item occurences percentage /// </summary> /// <param name="inputFile">Path of the input file</param> /// <param name="possibleOccurence">True to display Nculeaotide distribution</param> public void DisplaySequenceItemOccurences(string inputFile, bool possibleOccurence) { if (string.IsNullOrEmpty(inputFile)) { throw new InvalidOperationException("Input File Not specified"); } SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(inputFile); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(inputFile); } IList<string> chromosomes = alignmentMapobj.GetRefSequences(); if (possibleOccurence) { Console.Write("Nucleotide Distribution:"); Console.Write("\r\nPosition\tA\tT\tG\tC\tPossibility Of Occurences"); foreach (string str in chromosomes) { GetCoverage(str, alignmentMapobj, "true"); } } else { Console.Write("Coverage Profile:"); Console.Write("\r\nPosition\tA\tT\tG\tC"); foreach (string str in chromosomes) { GetCoverage(str, alignmentMapobj, "false"); } } }
/// <summary> /// Genaral method to Invalidate SAM Formatter /// <param name="method">enum type to execute different overload</param> /// </summary> void ValidateSamFormatter(ParseOrFormatTypes method) { string filePath = utilityObj.xmlUtil.GetTextValue( Constants.SmallSAMFileNode, Constants.FilePathNode); ISequenceAlignmentParser parser = new SAMParser(); ISequenceAlignment alignment = null; try { switch (method) { case ParseOrFormatTypes.ParseOrFormatSeqText: new SAMFormatter().Format(null, null as ISequenceAlignment); break; case ParseOrFormatTypes.ParseOrFormatSeqTextWithFlag: alignment = parser.ParseOne(filePath); new SAMFormatter().Format(null, alignment); break; case ParseOrFormatTypes.ParseOrFormatIseq: new SAMFormatter().Format( null as ISequenceAlignment, null as string); break; case ParseOrFormatTypes.ParseOrFormatIseqFile: alignment = parser.ParseOne(filePath); new SAMFormatter().Format( alignment, null as string); break; case ParseOrFormatTypes.ParseOrFormatCollString: new SAMFormatter().Format( null as ICollection<ISequenceAlignment>, null as string); break; case ParseOrFormatTypes.ParseOrFormatCollection: new SAMFormatter().Format(null, null as ICollection<ISequenceAlignment>); break; case ParseOrFormatTypes.ParseOneOrFormatSeq: SequenceAlignmentMap align = new SequenceAlignmentMap(); new SAMFormatter().Format(align, null as string); break; case ParseOrFormatTypes.ParseOneOrFormatSeqFile: new SAMFormatter().Format(null as SequenceAlignmentMap, null as string); break; case ParseOrFormatTypes.ParseOrFormatIseqT: SequenceAlignmentMap alignments = new SequenceAlignmentMap(); new SAMFormatter().Format(alignments, null as string); break; case ParseOrFormatTypes.ParseOrFormatIseqText: new SAMFormatter().Format(null as SequenceAlignmentMap, null as string); break; case ParseOrFormatTypes.ParseOrFormatFormatString: break; default: break; } } catch (ArgumentNullException) { ApplicationLog.WriteLine( "SAM Formatter P2 : Successfully validated the exception"); } catch (NotSupportedException) { ApplicationLog.WriteLine( "SAM Formatter P2 : Successfully validated the exception"); } }
/// <summary> /// Genaral method to Invalidate ISequence Alignment /// <param name="method">enum type to execute different overload</param> /// </summary> private static void ValidateISeqAlignParser(ParseOrFormatTypes method) { ISequenceAlignmentParser parser = new SAMParser(); try { switch (method) { case ParseOrFormatTypes.ParseOrFormatText: parser.Parse(null).First(); break; case ParseOrFormatTypes.ParseOrFormatFileName: parser.Parse(null as string).First(); break; case ParseOrFormatTypes.ParseOneOrFormatHeader: SAMParser.ParseSAMHeader(null as TextReader); break; case ParseOrFormatTypes.ParseOneOrFormatHeaderFn: SAMParser.ParseSAMHeader(null as Stream); break; default: break; } Assert.Fail(); } catch (ArgumentNullException) { ApplicationLog.WriteLine( "SAM Parser P2 : Successfully validated the exception"); } }
public void InvalidateSAMParseAlhabetProp() { try { SAMParser sparserObj = new SAMParser(); { sparserObj.Alphabet = Alphabets.DNA; } Assert.Fail(); } catch (NotSupportedException) { ApplicationLog.WriteLine( "SAM Parser P2 : Successfully validated the exception"); } }
public void ValidateSAMFormatterFormatString() { string filePath = utilityObj.xmlUtil.GetTextValue( Constants.SamFormatterFileNode, Constants.FilePathNode); ISequenceAlignmentParser parser = new SAMParser(); IList<ISequenceAlignment> alignment = parser.Parse(filePath).ToList(); SAMFormatter formatter = new SAMFormatter(); string writer = formatter.FormatString(alignment[0]); Assert.AreEqual(writer, Constants.FormatterString.Replace("\r\n", Environment.NewLine)); }
/// <summary> /// Merge multiple sorted alignments. /// SAMUtil.exe out.bam in1.bam in2.bam /// </summary> public void DoMerge() { if (FilePaths == null) { throw new InvalidOperationException("FilePath"); } if (FilePaths.Length < 2) { throw new InvalidOperationException(Resources.MergeHelp); } IList<IList<BAMSortedIndex>> sortedIndexes = new List<IList<BAMSortedIndex>>(); IList<SequenceAlignmentMap> sequenceAlignmentMaps = new List<SequenceAlignmentMap>(); Parallel.For(0, FilePaths.Length, (int index) => { IList<BAMSortedIndex> sortedIndex; BAMParser parser = new BAMParser(); ; SequenceAlignmentMap map; if (index == 0) { try { map = parser.ParseOne<SequenceAlignmentMap>(FilePaths[0]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } if (map == null) { throw new InvalidOperationException(Resources.EmptyFile); } if (string.IsNullOrEmpty(HeaderFile) && map.Header.RecordFields.Count == 0) { throw new InvalidOperationException(Resources.HeaderMissing); } if (!string.IsNullOrEmpty(HeaderFile)) { SAMParser parse = new SAMParser(); SequenceAlignmentMap head; try { head = parse.ParseOne<SequenceAlignmentMap>(HeaderFile); } catch { throw new InvalidOperationException(Resources.IncorrectHeaderFile); } if (head == null) { throw new InvalidOperationException(Resources.EmptyFile); } header = head.Header; } else { header = map.Header; } sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates); } else { try { map = parser.ParseOne<SequenceAlignmentMap>(FilePaths[index]); } catch { throw new InvalidOperationException(Resources.InvalidBAMFile); } if (map == null) { throw new InvalidOperationException(Resources.EmptyFile); } sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates); } lock (sortedIndexes) { sortedIndexes.Add(sortedIndex); sequenceAlignmentMaps.Add(map); } }); if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = "out.bam"; autoGeneratedOutputFilename = true; } string filePath = Path.GetTempFileName(); using (FileStream fstemp = new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite)) { BAMFormatter formatter = new BAMFormatter(); formatter.WriteHeader(header, fstemp); if (SortByReadName) { IList<BAMSortedIndex> sortedIndex = sortedIndexes.Select(a => a.First()).ToList(); WriteMergeFileSortedByReadName(sortedIndex, fstemp, formatter, sequenceAlignmentMaps); } else { WriteMergeFile(sortedIndexes, fstemp, formatter, sequenceAlignmentMaps); } using (FileStream fsoutput = new FileStream(OutputFilename, FileMode.Create, FileAccess.Write)) { fstemp.Seek(0, SeekOrigin.Begin); formatter.CompressBAMFile(fstemp, fsoutput); } } File.Delete(filePath); if (autoGeneratedOutputFilename) { Console.WriteLine(Properties.Resources.SuccessMessageWithOutputFileName, OutputFilename); } }
public void ValidateSAMFormatterWithTextWriterAndAlignments() { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( Constants.SmallSAMFileNode, Constants.FilePathNode); ISequenceAlignmentParser parser = new SAMParser(); IEnumerable<ISequenceAlignment> alignments = parser.Parse(filePath); SAMFormatter formatter = new SAMFormatter(); try { using (var writer = File.Create(Constants.SAMTempFileName)) { formatter.Format(writer, alignments); } Assert.Fail(); } catch (NotSupportedException) { ApplicationLog.WriteLine("SAM Parser BVT : Validated the exception successfully"); } }
/// <summary> /// Validate parser parse one method overloads with filePath\textreader /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParserWithParseOne(string nodeName, ParseOrFormatTypes parseTypes) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); ISequenceAlignment alignment = null; // Parse SAM File switch (parseTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (var reader = File.OpenRead(filePath)) { alignment = parser.ParseOne(reader); } break; case ParseOrFormatTypes.ParseOrFormatFileName: alignment = parser.ParseOne(filePath); break; } // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; foreach (IAlignedSequence alignedSequence in alignment.AlignedSequences) { foreach (ISequence sequence in alignedSequence.Sequences) { Assert.AreEqual(expectedSequencesList[count].ConvertToString(), sequence.ConvertToString()); count++; } } } }
/// <summary> /// Validate SAM to BAM conversion. /// </summary> /// <param name="nodeName">Different xml node name used for different test cases</param> void ValidateSAMToBAMConversion(string nodeName) { // Get values from xml config file. string expectedBAMStoragePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string samFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); BAMParser bamParserObj = new BAMParser(); SAMParser samParserObj = new SAMParser(); BAMFormatter bamFormatterObj = new BAMFormatter { CreateSortedBAMFile = true, CreateIndexFile = true }; SequenceAlignmentMap samSeqAlignment = null; SequenceAlignmentMap bamSeqAlignment = null; // Parse expected BAM file. SequenceAlignmentMap expextedBamAlignmentObj = bamParserObj.ParseOne<SequenceAlignmentMap>(expectedBAMStoragePath); // Parse a SAM file. samSeqAlignment = samParserObj.ParseOne<SequenceAlignmentMap>(samFilePath); try { // Format SAM sequenceAlignment object to BAM file. bamFormatterObj.Format(samSeqAlignment, Constants.BAMTempFileName); // Parse a formatted BAM file. bamSeqAlignment = bamParserObj.ParseOne<SequenceAlignmentMap>(Constants.BAMTempFileName); // Validate converted BAM file with expected BAM file. Assert.IsTrue(CompareSequencedAlignmentHeader(bamSeqAlignment, expextedBamAlignmentObj)); // Validate BAM file aligned sequences. Assert.IsTrue(CompareAlignedSequences(bamSeqAlignment, expextedBamAlignmentObj)); // Log message to VSTest GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "BAM Parser P1 : Validated the SAM->BAM conversion successfully")); } finally { // Delete temporary file. File.Delete(Constants.BAMTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } }
public void ValidateSAMProperties() { SAMParser parser = new SAMParser(); { Assert.AreEqual(Constants.SAMParserDescription, parser.Description); Assert.AreEqual(Constants.SAMFileType, parser.SupportedFileTypes); Assert.AreEqual(Constants.SAMName, parser.Name); } ApplicationLog.WriteLine("Successfully validated all the properties of SAM Parser class."); }
public void ValidateSAMParserQualityNSeq() { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( Constants.OneEmptySequenceSamFileNode, Constants.FilePathNode); string expectedSequence = utilityObj.xmlUtil.GetTextValue( Constants.OneEmptySequenceSamFileNode, Constants.ExpectedSequence); SAMParser parser = new SAMParser(); { SequenceAlignmentMap alignments = null; using (var reader = File.OpenRead(filePath)) { alignments = parser.Parse(reader); } Assert.AreEqual(expectedSequence, alignments.QuerySequences[0].Sequences[0].ConvertToString()); Assert.AreEqual(0, alignments.QuerySequences[1].Sequences.Count); } }
/// <summary> /// Validate parser and formatter by parsing the same file which contains /// extended CIGAR string. Validate the CIGAR property in aligned sequence /// metadata information is updated as expected. /// </summary> /// <param name="nodeName">xml node name</param> void ValidateSAMParseAndFormatWithCIGARFormat(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string expectedCIGARString = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CIGARNode); // Create parser using encoding ISequenceAlignmentParser parser = new SAMParser(); try { var alignments = parser.Parse(filePath).ToList(); // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignments[index].AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); foreach (string key in alignments[index].AlignedSequences[ialigned].Metadata.Keys) { SAMAlignedSequenceHeader header = (SAMAlignedSequenceHeader) alignments[index].AlignedSequences[ialigned].Metadata[key]; Assert.AreEqual(expectedCIGARString, header.CIGAR); } count++; } } } } } finally { } }
/// <summary> /// Validate parser and formatter by parsing the sam file with quality values /// </summary> /// <param name="nodeName">xml node name</param> void ValidateSAMParseAndFormatWithQualityValues(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); // Create parser using encoding ISequenceAlignmentParser parser = new SAMParser(); try { var alignments = parser.Parse(filePath).ToList(); // Get expected sequences FastAParser parserObj = new FastAParser(); { var expectedSequencesList = parserObj.Parse(expectedSequenceFile).ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.IsInstanceOf<QualitativeSequence>(alignments[index].AlignedSequences[ialigned].Sequences[iseq]); QualitativeSequence qualSequence = (QualitativeSequence)alignments[index].AlignedSequences[ialigned].Sequences[iseq]; Assert.AreEqual( new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(qualSequence.Select(a => (char)a).ToArray())); count++; } } } } } finally { } }
/// <summary> /// Validate formatter all format method overloads with filePath\textwriter /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="formatTypes">enum type to execute different overload</param> void ValidateSAMFormatter(string nodeName, ParseOrFormatTypes formatTypes) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); try { IList<ISequenceAlignment> alignments = parser.Parse(filePath).ToList(); SAMFormatter formatter = new SAMFormatter(); switch (formatTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (var writer = File.Create(Constants.SAMTempFileName)) { formatter.Format(writer, alignments[0]); } break; case ParseOrFormatTypes.ParseOrFormatFileName: formatter.Format(alignments[0], Constants.SAMTempFileName); break; } alignments = parser.Parse(Constants.SAMTempFileName).ToList(); // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignments[index].AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); count++; } } } } } finally { } }
/// <summary> /// General method to validate SAM Formatter method. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMFormatter(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); SAMParser parser = new SAMParser(); { SequenceAlignmentMap alignments = (SequenceAlignmentMap) parser.ParseOne(filePath); SAMFormatter formatter = new SAMFormatter(); using (var writer = File.Create(Constants.SAMTempFileName)) { formatter.Format(writer, alignments); } alignments = parser.ParseOne<SequenceAlignmentMap>(Constants.SAMTempFileName); // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output for (int index = 0; index < alignments.QuerySequences.Count; index++) { for (int count = 0; count < alignments.QuerySequences[index].Sequences.Count; count++) { Assert.AreEqual( new string(expectedSequencesList[index].Select(a => (char)a).ToArray()), new string(alignments.QuerySequences[index].Sequences[count].Select(a => (char)a).ToArray())); } } } } }
/// <summary> /// Parses SAM/BAm file based on input file. /// </summary> private void PerformParse() { string samExtension = ".sam"; string bamExtension = ".bam"; if (Helper.IsBAM(InputFilename)) { BAMParser parser = new BAMParser(); try { _sequenceAlignmentMap = parser.ParseOne<SequenceAlignmentMap>(InputFilename); } catch (Exception ex) { throw new InvalidOperationException(Resources.InvalidBAMFile, ex); } if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = InputFilename + samExtension; } } else { SAMParser parser = new SAMParser(); try { _sequenceAlignmentMap = parser.ParseOne<SequenceAlignmentMap>(InputFilename); } catch (Exception ex) { throw new InvalidOperationException(Resources.InvalidSAMFile, ex); } _isSAM = true; if (string.IsNullOrEmpty(OutputFilename)) { OutputFilename = InputFilename + bamExtension; } } }
/// <summary> /// Validate parser parse method overloads with filePath\textreader /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParser(string nodeName, ParseOrFormatTypes parseTypes) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); IList<ISequenceAlignment> alignments = null; // Parse SAM File switch (parseTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (var reader = File.OpenRead(filePath)) { alignments = parser.Parse(reader).ToList(); } break; case ParseOrFormatTypes.ParseOrFormatFileName: alignments = parser.Parse(filePath).ToList(); break; } // Get expected sequences FastAParser parserObj = new FastAParser(); var expectedSequencesList = parserObj.Parse(expectedSequenceFile).ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignments[index].AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); count++; } } } }
/// <summary> /// General method to validate SAM parser method. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParser(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); var parser = new SAMParser(); { SequenceAlignmentMap alignments = null; // Parse SAM File using (var reader = File.OpenRead(filePath)) { alignments = parser.Parse(reader); } // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output for (int index = 0; index < alignments.QuerySequences.Count; index++) { for (int count = 0; count < alignments.QuerySequences[index].Sequences.Count; count++) { Assert.AreEqual(new string(expectedSequencesList[index].Select(a => (char)a).ToArray()), new string(alignments.QuerySequences[index].Sequences[count].Select(a => (char)a).ToArray())); } } } } }
public void ValidateSAMToBAMConversion() { // Get values from xml config file. string expectedBamFilePath = this.utilityObj.xmlUtil.GetTextValue( Constants.BAMToSAMConversionNode, Constants.FilePathNode); string samFilePath = this.utilityObj.xmlUtil.GetTextValue( Constants.BAMToSAMConversionNode, Constants.FilePathNode1); // Parse expected BAM file. var bamParserObj = new BAMParser(); SequenceAlignmentMap expextedBamAlignmentObj = bamParserObj.ParseOne<SequenceAlignmentMap>(expectedBamFilePath); // Parse a SAM file. var samParserObj = new SAMParser(); SequenceAlignmentMap samSeqAlignment = samParserObj.ParseOne<SequenceAlignmentMap>(samFilePath); try { // Format SAM sequenceAlignment object to BAM file. var bamFormatterObj = new BAMFormatter(); bamFormatterObj.Format(samSeqAlignment, Constants.BAMTempFileName); // Parse a formatted BAM file. SequenceAlignmentMap bamSeqAlignment = bamParserObj.ParseOne<SequenceAlignmentMap>(Constants.BAMTempFileName); // Validate converted BAM file with expected BAM file. Assert.IsTrue(CompareSequencedAlignmentHeader(bamSeqAlignment, expextedBamAlignmentObj)); // Validate BAM file aligned sequences. Assert.IsTrue(CompareAlignedSequences(bamSeqAlignment, expextedBamAlignmentObj)); } finally { // Delete temporary file. File.Delete(Constants.BAMTempFileName); } }
/// <summary> /// Indentify hot spot chromosomes for length anamoly regions. /// </summary> /// <param name="inputFile"> Input file</param> /// <param name="mean">Mean value</param> /// <param name="standardDeviation">Standard deviation</param> private void IdentifyLentghAnamolies(string filename, float mean = -1, float deviation = -1) { bool calculateMeanNdeviation = false; if (mean == -1 || deviation == -1) { calculateMeanNdeviation = true; } SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList<PairedRead> pairedReads = null; if (calculateMeanNdeviation) { pairedReads = alignmentMapobj.GetPairedReads(); } else { pairedReads = alignmentMapobj.GetPairedReads(mean, deviation); } // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); if (orphans.Count() == 0) { Console.WriteLine("No Orphans to display"); } List<ISequenceRange> orphanRegions = new List<ISequenceRange>(orphans.Count()); foreach (PairedRead orphanRead in orphans) { orphanRegions.Add(GetRegion(orphanRead.Read1)); } // Get sequence range grouping for Orphan regions. SequenceRangeGrouping orphanRangegroup = new SequenceRangeGrouping(orphanRegions); // Get the Length anomalies regions. var lengthAnomalies = pairedReads.Where(PE => PE.PairedType == PairedReadType.LengthAnomaly); if (lengthAnomalies.Count() == 0) { Console.WriteLine("No Anomalies to display"); } List<ISequenceRange> lengthAnomalyRegions = new List<ISequenceRange>(lengthAnomalies.Count()); foreach (PairedRead laRead in lengthAnomalies) { SequenceRange range = new SequenceRange(); range.ID = laRead.Read1.RName; range.Start = laRead.Read1.Pos; range.End = laRead.Read1.Pos + laRead.InsertLength; lengthAnomalyRegions.Add(range); } // Get sequence range grouping for length anomaly regions. SequenceRangeGrouping lengthAnomalyRangegroup = new SequenceRangeGrouping(lengthAnomalyRegions); if (lengthAnomalyRangegroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Length anomalies reads to display"); } else { Console.Write("Region of length anomaly:"); DisplaySequenceRange(lengthAnomalyRangegroup); } if (orphanRangegroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("\r\nRegion of Orphan reads:"); DisplaySequenceRange(orphanRangegroup); } SequenceRangeGrouping intersectedRegions = lengthAnomalyRangegroup.Intersect(orphanRangegroup); if (intersectedRegions.GroupIDs.Count() == 0) { Console.Write("\r\nNo Hot spots found"); } else { Console.Write("\r\nChromosomal Hot spot of length anomaly and Orphan region:"); DisplaySequenceRange(intersectedRegions); } }
public void ValidateSAMFormatterWithFileNameAndAlignments() { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( Constants.SmallSAMFileNode.Replace("\r\n", System.Environment.NewLine), Constants.FilePathNode); ISequenceAlignmentParser parser = new SAMParser(); IEnumerable<ISequenceAlignment> alignments = parser.Parse(filePath); SAMFormatter formatter = new SAMFormatter(); try { formatter.Format(alignments, Constants.SAMTempFileName); Assert.Fail(); } catch (NotSupportedException) { ApplicationLog.WriteLine("SAM Parser BVT : Validated the exception successfully"); } }