public void TestFormatter() { string filePath = @"TestUtils\SAM\SeqAlignment1.sam"; string outputfilePath = "samtest.sam"; ISequenceAlignmentParser parser = new SAMParser(); IList<ISequenceAlignment> alignments = parser.Parse(filePath).ToList(); Assert.IsTrue(alignments != null); Assert.AreEqual(alignments.Count, 1); Assert.AreEqual(alignments[0].AlignedSequences.Count, 2); try { SAMFormatter formatter = new SAMFormatter(); formatter.Format(alignments[0], outputfilePath); alignments = parser.Parse(outputfilePath).ToList(); Assert.IsTrue(alignments != null); Assert.AreEqual(alignments.Count, 1); Assert.AreEqual(alignments[0].AlignedSequences.Count, 2); } finally { File.Delete(outputfilePath); } }
public void TestParser() { string filePath = @"TestUtils\SAM\SeqAlignment1.sam"; ISequenceAlignmentParser parser = new SAMParser(); IList<ISequenceAlignment> alignments = parser.Parse(filePath).ToList(); Assert.IsTrue(alignments != null); Assert.AreEqual(alignments.Count, 1); Assert.AreEqual(alignments[0].AlignedSequences.Count, 2); }
/// <summary> /// General method to validate SAM parser method. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParser(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); var parser = new SAMParser(); { SequenceAlignmentMap alignments = null; // Parse SAM File using (var reader = File.OpenRead(filePath)) { alignments = parser.Parse(reader); } // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output for (int index = 0; index < alignments.QuerySequences.Count; index++) { for (int count = 0; count < alignments.QuerySequences[index].Sequences.Count; count++) { Assert.AreEqual(new string(expectedSequencesList[index].Select(a => (char)a).ToArray()), new string(alignments.QuerySequences[index].Sequences[count].Select(a => (char)a).ToArray())); } } } } }
/// <summary> /// Genaral method to Invalidate ISequence Alignment /// <param name="method">enum type to execute different overload</param> /// </summary> private static void ValidateISeqAlignParser(ParseOrFormatTypes method) { ISequenceAlignmentParser parser = new SAMParser(); try { switch (method) { case ParseOrFormatTypes.ParseOrFormatText: parser.Parse(null).First(); break; case ParseOrFormatTypes.ParseOrFormatFileName: parser.Parse(null as string).First(); break; case ParseOrFormatTypes.ParseOneOrFormatHeader: SAMParser.ParseSAMHeader(null as TextReader); break; case ParseOrFormatTypes.ParseOneOrFormatHeaderFn: SAMParser.ParseSAMHeader(null as Stream); break; default: break; } Assert.Fail(); } catch (ArgumentNullException) { ApplicationLog.WriteLine( "SAM Parser P2 : Successfully validated the exception"); } }
/// <summary> /// Validate parser and formatter by parsing the same file which contains /// extended CIGAR string. Validate the CIGAR property in aligned sequence /// metadata information is updated as expected. /// </summary> /// <param name="nodeName">xml node name</param> void ValidateSAMParseAndFormatWithCIGARFormat(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string expectedCIGARString = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CIGARNode); // Create parser using encoding ISequenceAlignmentParser parser = new SAMParser(); try { var alignments = parser.Parse(filePath).ToList(); // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignments[index].AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); foreach (string key in alignments[index].AlignedSequences[ialigned].Metadata.Keys) { SAMAlignedSequenceHeader header = (SAMAlignedSequenceHeader) alignments[index].AlignedSequences[ialigned].Metadata[key]; Assert.AreEqual(expectedCIGARString, header.CIGAR); } count++; } } } } } finally { } }
/// <summary> /// Validate parser and formatter by parsing the sam file with quality values /// </summary> /// <param name="nodeName">xml node name</param> void ValidateSAMParseAndFormatWithQualityValues(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); // Create parser using encoding ISequenceAlignmentParser parser = new SAMParser(); try { var alignments = parser.Parse(filePath).ToList(); // Get expected sequences FastAParser parserObj = new FastAParser(); { var expectedSequencesList = parserObj.Parse(expectedSequenceFile).ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.IsInstanceOf<QualitativeSequence>(alignments[index].AlignedSequences[ialigned].Sequences[iseq]); QualitativeSequence qualSequence = (QualitativeSequence)alignments[index].AlignedSequences[ialigned].Sequences[iseq]; Assert.AreEqual( new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(qualSequence.Select(a => (char)a).ToArray())); count++; } } } } } finally { } }
/// <summary> /// Validate formatter all format method overloads with filePath\textwriter /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="formatTypes">enum type to execute different overload</param> void ValidateSAMFormatter(string nodeName, ParseOrFormatTypes formatTypes) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); try { IList<ISequenceAlignment> alignments = parser.Parse(filePath).ToList(); SAMFormatter formatter = new SAMFormatter(); switch (formatTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (var writer = File.Create(Constants.SAMTempFileName)) { formatter.Format(writer, alignments[0]); } break; case ParseOrFormatTypes.ParseOrFormatFileName: formatter.Format(alignments[0], Constants.SAMTempFileName); break; } alignments = parser.Parse(Constants.SAMTempFileName).ToList(); // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignments[index].AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); count++; } } } } } finally { } }
/// <summary> /// Validate parser parse method overloads with filePath\textreader /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParser(string nodeName, ParseOrFormatTypes parseTypes) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); IList<ISequenceAlignment> alignments = null; // Parse SAM File switch (parseTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (var reader = File.OpenRead(filePath)) { alignments = parser.Parse(reader).ToList(); } break; case ParseOrFormatTypes.ParseOrFormatFileName: alignments = parser.Parse(filePath).ToList(); break; } // Get expected sequences FastAParser parserObj = new FastAParser(); var expectedSequencesList = parserObj.Parse(expectedSequenceFile).ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignments[index].AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); count++; } } } }
public void ValidateSAMFormatterFormatString() { string filePath = utilityObj.xmlUtil.GetTextValue( Constants.SamFormatterFileNode, Constants.FilePathNode); ISequenceAlignmentParser parser = new SAMParser(); IList<ISequenceAlignment> alignment = parser.Parse(filePath).ToList(); SAMFormatter formatter = new SAMFormatter(); string writer = formatter.FormatString(alignment[0]); Assert.AreEqual(writer, Constants.FormatterString.Replace("\r\n", Environment.NewLine)); }
public void ValidateSAMFormatterWithTextWriterAndAlignments() { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( Constants.SmallSAMFileNode, Constants.FilePathNode); ISequenceAlignmentParser parser = new SAMParser(); IEnumerable<ISequenceAlignment> alignments = parser.Parse(filePath); SAMFormatter formatter = new SAMFormatter(); try { using (var writer = File.Create(Constants.SAMTempFileName)) { formatter.Format(writer, alignments); } Assert.Fail(); } catch (NotSupportedException) { ApplicationLog.WriteLine("SAM Parser BVT : Validated the exception successfully"); } }
public void ValidateSAMFormatterWithFileNameAndAlignments() { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( Constants.SmallSAMFileNode.Replace("\r\n", System.Environment.NewLine), Constants.FilePathNode); ISequenceAlignmentParser parser = new SAMParser(); IEnumerable<ISequenceAlignment> alignments = parser.Parse(filePath); SAMFormatter formatter = new SAMFormatter(); try { formatter.Format(alignments, Constants.SAMTempFileName); Assert.Fail(); } catch (NotSupportedException) { ApplicationLog.WriteLine("SAM Parser BVT : Validated the exception successfully"); } }
public void ValidateSAMParserQualityNSeq() { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( Constants.OneEmptySequenceSamFileNode, Constants.FilePathNode); string expectedSequence = utilityObj.xmlUtil.GetTextValue( Constants.OneEmptySequenceSamFileNode, Constants.ExpectedSequence); SAMParser parser = new SAMParser(); { SequenceAlignmentMap alignments = null; using (var reader = File.OpenRead(filePath)) { alignments = parser.Parse(reader); } Assert.AreEqual(expectedSequence, alignments.QuerySequences[0].Sequences[0].ConvertToString()); Assert.AreEqual(0, alignments.QuerySequences[1].Sequences.Count); } }
/// <summary> /// Indentify hot spot chromosomes for length anamoly regions. /// </summary> /// <param name="inputFile"> Input file</param> /// <param name="mean">Mean value</param> /// <param name="standardDeviation">Standard deviation</param> private void IdentifyLentghAnamolies(string filename, float mean = -1, float deviation = -1) { bool calculateMeanNdeviation = false; if (mean == -1 || deviation == -1) { calculateMeanNdeviation = true; } SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.Parse(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.Parse(filename); } // get reads from sequence alignment map object. IList <PairedRead> pairedReads = null; if (calculateMeanNdeviation) { pairedReads = alignmentMapobj.GetPairedReads(); } else { pairedReads = alignmentMapobj.GetPairedReads(mean, deviation); } // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); if (orphans.Count() == 0) { Console.WriteLine("No Orphans to display"); } List <ISequenceRange> orphanRegions = new List <ISequenceRange>(orphans.Count()); foreach (PairedRead orphanRead in orphans) { orphanRegions.Add(GetRegion(orphanRead.Read1)); } // Get sequence range grouping for Orphan regions. SequenceRangeGrouping orphanRangegroup = new SequenceRangeGrouping(orphanRegions); // Get the Length anomalies regions. var lengthAnomalies = pairedReads.Where(PE => PE.PairedType == PairedReadType.LengthAnomaly); if (lengthAnomalies.Count() == 0) { Console.WriteLine("No Anomalies to display"); } List <ISequenceRange> lengthAnomalyRegions = new List <ISequenceRange>(lengthAnomalies.Count()); foreach (PairedRead laRead in lengthAnomalies) { SequenceRange range = new SequenceRange(); range.ID = laRead.Read1.RName; range.Start = laRead.Read1.Pos; range.End = laRead.Read1.Pos + laRead.InsertLength; lengthAnomalyRegions.Add(range); } // Get sequence range grouping for length anomaly regions. SequenceRangeGrouping lengthAnomalyRangegroup = new SequenceRangeGrouping(lengthAnomalyRegions); if (lengthAnomalyRangegroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Length anomalies reads to display"); } else { Console.Write("Region of length anomaly:"); DisplaySequenceRange(lengthAnomalyRangegroup); } if (orphanRangegroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("\r\nRegion of Orphan reads:"); DisplaySequenceRange(orphanRangegroup); } SequenceRangeGrouping intersectedRegions = lengthAnomalyRangegroup.Intersect(orphanRangegroup); if (intersectedRegions.GroupIDs.Count() == 0) { Console.Write("\r\nNo Hot spots found"); } else { Console.Write("\r\nChromosomal Hot spot of length anomaly and Orphan region:"); DisplaySequenceRange(intersectedRegions); } }
/// <summary> /// Get chromoses with orphan regions /// </summary> /// <param name="filename">Path of the BAM file</param> /// <param name="mean">Mean value</param> /// <param name="deviation">Standard deviation</param> /// <returns></returns> private void DisplayOrphans(string filename) { SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.Parse(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.Parse(filename); } // get reads from sequence alignment map object. IList <PairedRead> pairedReads = null; // Get Aligned sequences IList <SAMAlignedSequence> alignedSeqs = alignmentMapobj.QuerySequences; pairedReads = alignmentMapobj.GetPairedReads(0, 0); // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); if (orphans.Count() == 0) { Console.WriteLine("No Orphans to display"); } List <ISequenceRange> orphanRegions = new List <ISequenceRange>(orphans.Count()); foreach (PairedRead orphanRead in orphans) { orphanRegions.Add(GetRegion(orphanRead.Read1)); } // Get sequence range grouping object. SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions); if (rangeGroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("Region of Orphan reads:"); DisplaySequenceRange(rangeGroup); } SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps(); if (mergedRegions.GroupIDs.Count() == 0) { Console.Write("\r\nNo hot spots to display"); } else { Console.Write("\r\nChromosomal hot spot:"); DisplaySequenceRange(mergedRegions); } }
/// <summary> /// General method to validate SAM parser method. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParserSeqAlign( string nodeName, ParseOrFormatTypes method) { // Gets the expected sequence from the Xml string filePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); using (SAMParser parser = new SAMParser()) { SequenceAlignmentMap alignments = null; // Parse SAM File switch (method) { case ParseOrFormatTypes.ParseOrFormatText: using (TextReader reader = new StreamReader(filePath)) { alignments = parser.Parse(reader); } break; case ParseOrFormatTypes.ParseOrFormatTextWithFlag: using (TextReader reader = new StreamReader(filePath)) { alignments = parser.Parse(reader, true); } break; case ParseOrFormatTypes.ParseOrFormatFileName: alignments = parser.Parse(filePath); break; case ParseOrFormatTypes.ParseOrFormatFileNameWithFlag: alignments = parser.Parse(filePath, true); break; } // Get expected sequences using (FastaParser parserObj = new FastaParser()) { IList <ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); // Validate parsed output with expected output for (int index = 0; index < alignments.QuerySequences.Count; index++) { for (int count = 0; count < alignments.QuerySequences[index].Sequences.Count; count++) { Assert.AreEqual(expectedSequences[index].ToString(), alignments.QuerySequences[index].Sequences[count].ToString()); } } } } }
/// <summary> /// Validate formatter all format method overloads with filePath\textwriter /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="formatTypes">enum type to execute different overload</param> void ValidateSAMFormatter(string nodeName, ParseOrFormatTypes formatTypes) { // Gets the expected sequence from the Xml string filePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); try { IList <ISequenceAlignment> alignments = parser.Parse(filePath); SAMFormatter formatter = new SAMFormatter(); switch (formatTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (TextWriter writer = new StreamWriter(Constants.SAMTempFileName)) { formatter.Format(alignments[0], writer); } break; case ParseOrFormatTypes.ParseOrFormatFileName: formatter.Format(alignments[0], Constants.SAMTempFileName); break; case ParseOrFormatTypes.ParseOrFormatFileNameWithFlag: formatter.Format(alignments, Constants.SAMTempFileName); break; } alignments = parser.Parse(Constants.SAMTempFileName); // Get expected sequences using (FastaParser parserObj = new FastaParser()) { IList <ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(expectedSequences[count].ToString(), alignments[index].AlignedSequences[ialigned].Sequences[iseq].ToString()); count++; } } } } } finally { (parser as SAMParser).Dispose(); } }