public void TestFastaFor186972391() { string expectedSequence = "IFYEPVEILGYDNKSSLVLVKRLITRMYQQKSLISSLNDSNQNEFWGHKNSFSSHFSSQMVSEGFGVILE" + "IPFSSRLVSSLEEKRIPKSQNLRSIHSIFPFLEDKLSHLNYVSDLLIPHPIHLEILVQILQCWIKDVPSL" + "HLLRLFFHEYHNLNSLITLNKSIYVFSKRKKRFFGFLHNSYVYECEYLFLFIRKKSSYLRSISSGVFLER" + "THFYGKIKYLLVVCCNSFQRILWFLKDTFIHYVRYQGKAIMASKGTLILMKKWKFHLVNFWQSYFHFWFQ" + "PYRINIKQLPNYSFSFLGYFSSVRKNPLVVRNQMLENSFLINTLTQKLDTIVPAISLIGSLSKAQFCTVL" + "GHPISKPIWTDLSDSDILDRFCRICRNLCRYHSGSSKKQVLYRIKYIFRLSCARTLARKHKSTVRTFMRR" + "LGSGFLEEFFLEEE"; // parse string filepath = System.IO.Path.Combine("TestUtils","Fasta", "186972391.fasta"); Assert.IsTrue(File.Exists(filepath)); FastAParser parser = new FastAParser { Alphabet = Alphabets.Protein }; foreach (ISequence seq in parser.Parse(filepath)) { Assert.IsNotNull(seq); Assert.AreEqual(434, seq.Count); string actual = seq.Aggregate("", (current, b) => current + (char)b); Assert.AreEqual(expectedSequence, actual); Assert.AreEqual(seq.Alphabet.Name, "Protein"); Assert.AreEqual("gi|186972391|gb|ACC99454.1| maturase K [Scaphosepalum rapax]", seq.ID); } }
public MismatchTest() { // // TODO: Add constructor logic here // FastAParser Reference = new FastAParser(@"Reference.txt"); Mismatch_Test = new Mismatcher(Reference.Parse().First()); }
private void ValidateMUMmerAlignGeneralTestCases(string nodeName) { // Gets the reference sequence from the configuration file string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer P2 : Successfully validated the File Path '{0}'.", filePath)); var fastaParserObj = new FastAParser(); IEnumerable<ISequence> referenceSeqs = fastaParserObj.Parse(filePath); ISequence referenceSeq = referenceSeqs.ElementAt(0); // Gets the reference sequence from the configuration file string queryFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer P2 : Successfully validated the Search File Path '{0}'.", queryFilePath)); var fastaParserObj1 = new FastAParser(); IEnumerable<ISequence> querySeqs = fastaParserObj1.Parse(queryFilePath); string mumLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); var mum = new MUMmerAligner { LengthOfMUM = long.Parse(mumLength, null), StoreMUMs = true, PairWiseAlgorithm = new NeedlemanWunschAligner(), GapOpenCost = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null) }; IList<IPairwiseSequenceAlignment> align = mum.Align(referenceSeq, querySeqs); // Validate FinalMUMs and MUMs Properties. Assert.IsNotNull(mum.MUMs); string expectedScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); string[] expectedSequences = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(AlignmentHelpers.CompareAlignment(align, expectedOutput)); ApplicationLog.WriteLine("MUMmer P2 : Successfully validated the aligned sequences."); }
/// <summary> /// Initializes a new instance of the FastASequencePositionParser class by /// loading the specified stream. /// </summary> /// <param name="stream">Stream to load</param> /// <param name="reverseReversePairedRead">Flag to indicate to get the forward strand sequence of a reverse paired read.</param> public FastASequencePositionParser(Stream stream, bool reverseReversePairedRead = false) { if (stream == null) { throw new ArgumentNullException("stream"); } this.fastaParser = new FastAParser(); this.stream = stream; this.reverseReversePairedRead = reverseReversePairedRead; }
public void TestMethod1() { FastAParser Query = new FastAParser(@"Query.txt"); var Mismatches_query = Mismatch_Test.GetMismatches(Query.Parse().First()); Assert.AreEqual(2,Mismatches_query.Count()); var first = Mismatches_query.First(); Assert.AreEqual(0, first.QuerySequenceOffset); var last = Mismatches_query.Last(); Assert.AreEqual(179, last.QuerySequenceOffset); }
/// <summary> /// Executes the cross-link search for LC-IMS-TOF data. /// </summary> /// <param name="settings">Settings object to control parameters for cross-linking.</param> /// <param name="fastAFile">The FileInfo object for the FASTA file containg all protein sequences you want to search.</param> /// <param name="featureFile">The FileInfo object for the LC-IMS-MS features file, created by the LC-IMS-MS Feature Finder. (email [email protected] for more info)</param> /// <param name="peaksFile">The FileInfo object for the Isotopic Peaks file, created by DeconTools. (email [email protected] for more info)</param> /// <returns>An enumerable of CrossLinkResult objects.</returns> public static IList<CrossLinkResult> Execute(CrossLinkSettings settings, FileInfo fastAFile, FileInfo featureFile, FileInfo peaksFile) { IEnumerable<ISequence> sequenceEnumerable; List<LcImsMsFeature> featureList; List<IsotopicPeak> peakEnumerable; Console.WriteLine(); try { // Read in FASTA File var fastAParser = new FastAParser(fastAFile.FullName); sequenceEnumerable = fastAParser.Parse(); Console.WriteLine("FASTA file: " + GetRelativePath(fastAFile.FullName)); } catch (Exception ex) { Console.WriteLine("Error reading the FASTA file: " + ex.Message); throw; } try { // Read in LC-IMS-MS Features featureList = LcImsMsFeatureReader.ReadFile(featureFile); Console.WriteLine("Features file: " + GetRelativePath(featureFile.FullName)); } catch (Exception ex) { Console.WriteLine("Error reading the LCMSFeatures file: " + ex.Message); throw; } try { // Read in Isotopic Peaks (not Isotopic Profile) peakEnumerable = IsotopicPeakReader.ReadFile(peaksFile); Console.WriteLine("Peaks file: " + GetRelativePath(peaksFile.FullName)); } catch (Exception ex) { Console.WriteLine("Error reading the Isotopic Peaks file: " + ex.Message); throw; } // Now call the executor that expects the opbjects instead of the file locations return Execute(settings, sequenceEnumerable, featureList, peakEnumerable); }
public void TestFastaWhenParsingOneOfMany() { // parse string filepath = System.IO.Path.Combine("TestUtils","Fasta","5_sequences.fasta"); FastAParser parser = new FastAParser { Alphabet = Alphabets.Protein }; using (parser.Open(filepath)) { int[] sequenceCountArray = { 27, 29, 30, 35, 32 }; int i = 0; foreach (ISequence seq in parser.Parse()) { Assert.IsNotNull(seq); Assert.AreEqual(seq.Count, sequenceCountArray[i]); i++; } } }
public void ValidateSequenceToString() { ISequence seqSmall = new Sequence(Alphabets.DNA, "ATCG"); string seqLargeString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.seqLargeStringNode); ISequence seqLarge = new Sequence(Alphabets.DNA, seqLargeString); string ActualSmallString = seqSmall.ToString(); string ActualLargeString = seqLarge.ToString(); string ExpectedSmallString = "ATCG"; string seqLargeExpected = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.seqLargeExpected2Node); string expectedLargeString = string.Format(CultureInfo.CurrentCulture, seqLargeExpected, (seqLarge.Count - Helper.AlphabetsToShowInToString)); Assert.AreEqual(ExpectedSmallString, ActualSmallString); Assert.AreEqual(expectedLargeString, ActualLargeString); //check with blank sequence var seqBlank = new Sequence(Alphabets.DNA, ""); string blankString = seqBlank.ToString(); Assert.AreEqual(string.Empty, blankString); // Gets the expected sequence from the Xml string filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode); //read sequence from file var parser = new FastAParser { Alphabet = Alphabets.Protein }; List<ISequence> seqsList = parser.Parse(filePath).ToList(); var seqString = new string(seqsList[0].Select(a => (char) a).ToArray()); if (seqString.Length > Helper.AlphabetsToShowInToString) { //check if the whole sequence string contains the string retrieved from ToString Assert.IsTrue(seqString.Contains(seqsList[0].ToString().Substring(0, Helper.AlphabetsToShowInToString))); Assert.IsTrue(seqsList[0].ToString().Contains("... +[")); } else { Assert.AreEqual(seqString, seqsList[0].ToString()); } }
/// <summary> /// General method to validate SAM Formatter method. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMFormatter(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); SAMParser parser = new SAMParser(); { SequenceAlignmentMap alignments = (SequenceAlignmentMap) parser.ParseOne(filePath); SAMFormatter formatter = new SAMFormatter(); using (var writer = File.Create(Constants.SAMTempFileName)) { formatter.Format(writer, alignments); } alignments = parser.ParseOne<SequenceAlignmentMap>(Constants.SAMTempFileName); // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output for (int index = 0; index < alignments.QuerySequences.Count; index++) { for (int count = 0; count < alignments.QuerySequences[index].Sequences.Count; count++) { Assert.AreEqual( new string(expectedSequencesList[index].Select(a => (char)a).ToArray()), new string(alignments.QuerySequences[index].Sequences[count].Select(a => (char)a).ToArray())); } } } } }
/// <summary> /// General method to validate SAM parser method. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParser(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); var parser = new SAMParser(); { SequenceAlignmentMap alignments = null; // Parse SAM File using (var reader = File.OpenRead(filePath)) { alignments = parser.Parse(reader); } // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output for (int index = 0; index < alignments.QuerySequences.Count; index++) { for (int count = 0; count < alignments.QuerySequences[index].Sequences.Count; count++) { Assert.AreEqual(new string(expectedSequencesList[index].Select(a => (char)a).ToArray()), new string(alignments.QuerySequences[index].Sequences[count].Select(a => (char)a).ToArray())); } } } } }
/// <summary> /// Method to get the reads from file/xml. /// </summary> /// <param name="nodeName">Parent node in Xml</param> /// <returns></returns> public List<ISequence> GetReads(string nodeName) { List<ISequence> readSeqList = new List<ISequence>(); // Gets the reads from the FastA file string readFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); Assert.IsNotNull(readFilePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Comparative P1 : Successfully validated the File Path '{0}'.", readFilePath)); using (FastAParser queryParser = new FastAParser(readFilePath)) { IEnumerable<ISequence> querySeqList = queryParser.Parse(); foreach (ISequence seq in querySeqList) { readSeqList.Add(seq); } } return readSeqList; }
/// <summary> /// Validates Assemble method .Step 1-5. /// </summary> /// <param name="nodeName">Parent Node name in Xml</param> /// <param name="isFilePath">Sequence location.</param> public void ValidateComparativeAssembleMethod(string nodeName, bool isEcOli) { ComparativeGenomeAssembler assemble = new ComparativeGenomeAssembler(); List<ISequence> referenceSeqList = new List<ISequence>(); string expectedSequence = null; string LengthOfMUM = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string fixedSeparation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FixedSeparationNode); string minimumScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MinimumScoreNode); string separationFactor = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SeparationFactorNode); string maximumSeparation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MaximumSeparationNode); string breakLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BreakLengthNode); // Gets the reference sequence from the FastA file string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Comparative P1 : Successfully validated the File Path '{0}'.", filePath)); using (FastAParser parser = new FastAParser(filePath)) { IEnumerable<ISequence> referenceList = parser.Parse(); foreach (ISequence seq in referenceList) { referenceSeqList.Add(seq); } } //Get the reads from configurtion file . string readFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); assemble.LengthOfMum = int.Parse(LengthOfMUM, CultureInfo.InvariantCulture); assemble.KmerLength = int.Parse(kmerLength, CultureInfo.InvariantCulture); assemble.FixedSeparation = int.Parse(fixedSeparation, CultureInfo.InvariantCulture); assemble.MinimumScore = int.Parse(minimumScore, CultureInfo.InvariantCulture); assemble.SeparationFactor = float.Parse(separationFactor, CultureInfo.InvariantCulture); assemble.MaximumSeparation = int.Parse(maximumSeparation, CultureInfo.InvariantCulture); assemble.BreakLength = int.Parse(breakLength, CultureInfo.InvariantCulture); using (FastASequencePositionParser queryparser = new FastASequencePositionParser(readFilePath)) { IEnumerable<ISequence> outputAssemble = assemble.Assemble(referenceSeqList, queryparser); if (isEcOli) { expectedSequence = utilityObj.xmlUtil.GetFileTextValue(nodeName, Constants.ExpectedSequenceNode); } else { expectedSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); } var outputStrings = outputAssemble.Select(seq => seq.ConvertToString()).ToList(); outputStrings.Sort(); Assert.AreEqual(expectedSequence.ToUpperInvariant(), String.Join("", outputStrings).ToUpperInvariant()); } }
/// <summary> /// Validate parser and formatter by parsing the same file which contains /// extended CIGAR string. Validate the CIGAR property in aligned sequence /// metadata information is updated as expected. /// </summary> /// <param name="nodeName">xml node name</param> void ValidateSAMParseAndFormatWithCIGARFormat(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string expectedCIGARString = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CIGARNode); // Create parser using encoding ISequenceAlignmentParser parser = new SAMParser(); try { var alignments = parser.Parse(filePath).ToList(); // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignments[index].AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); foreach (string key in alignments[index].AlignedSequences[ialigned].Metadata.Keys) { SAMAlignedSequenceHeader header = (SAMAlignedSequenceHeader) alignments[index].AlignedSequences[ialigned].Metadata[key]; Assert.AreEqual(expectedCIGARString, header.CIGAR); } count++; } } } } } finally { } }
/// <summary> /// Validate parser and formatter by parsing the sam file with quality values /// </summary> /// <param name="nodeName">xml node name</param> void ValidateSAMParseAndFormatWithQualityValues(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); // Create parser using encoding ISequenceAlignmentParser parser = new SAMParser(); try { var alignments = parser.Parse(filePath).ToList(); // Get expected sequences FastAParser parserObj = new FastAParser(); { var expectedSequencesList = parserObj.Parse(expectedSequenceFile).ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.IsInstanceOf<QualitativeSequence>(alignments[index].AlignedSequences[ialigned].Sequences[iseq]); QualitativeSequence qualSequence = (QualitativeSequence)alignments[index].AlignedSequences[ialigned].Sequences[iseq]; Assert.AreEqual( new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(qualSequence.Select(a => (char)a).ToArray())); count++; } } } } } finally { } }
public void ValidateFastaAFileSequence() { // Gets the expected sequence from the Xml string expectedSequence = this.utilityObj.xmlUtil.GetTextValue( Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); string fastAFilePath = this.utilityObj.xmlUtil.GetTextValue( Constants.SimpleFastaNodeName, Constants.FilePathNode); string alphabet = this.utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode); Assert.IsTrue(File.Exists(fastAFilePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The File exist in the Path ", fastAFilePath)); IEnumerable<ISequence> sequence = null; FastAParser parser = new FastAParser(); { // Parse a FastA file Using Parse method and convert the same to sequence. parser.Alphabet = Utility.GetAlphabet(alphabet); sequence = parser.Parse(fastAFilePath); Assert.IsNotNull(sequence); Sequence fastASequence = (Sequence)sequence.ElementAt(0); Assert.IsNotNull(fastASequence); char[] seqString = sequence.ElementAt(0).Select(a => (char)a).ToArray(); string newSequence = new string(seqString); Assert.AreEqual(expectedSequence, newSequence); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The Sequence is as expected.")); byte[] tmpEncodedSeq = new byte[fastASequence.Count]; (fastASequence as IEnumerable<byte>).ToArray().CopyTo(tmpEncodedSeq, 0); Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: Sequence Length is as expected.")); Assert.AreEqual(this.utilityObj.xmlUtil.GetTextValue( Constants.SimpleProteinAlphabetNode, Constants.SequenceIdNode), fastASequence.ID); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: SequenceID is as expected.")); Assert.AreEqual(fastASequence.Alphabet.Name, this.utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode)); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: Sequence Alphabet is as expected.")); } }
private void ValidateNUCmerAlignSimpleGeneralTestCases(string nodeName, bool isFilePath, bool isAlignList) { IList<ISequence> refSeqList = new List<ISequence>(); IList<ISequence> searchSeqList = new List<ISequence>(); if (isFilePath) { // Gets the reference sequence from the FastA file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer P1 : Successfully validated the File Path '{0}'.", filePath)); var fastaparserobj = new FastAParser(); IEnumerable<ISequence> referenceSeqList = fastaparserobj.Parse(filePath); foreach (ISequence seq in referenceSeqList) { refSeqList.Add(seq); } // Gets the query sequence from the FastA file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null,"NUCmer P1 : Successfully validated the File Path '{0}'.", queryFilePath)); var fastaParserobj = new FastAParser(); IEnumerable<ISequence> querySeqList = fastaParserobj.Parse(queryFilePath); foreach (ISequence seq in querySeqList) { searchSeqList.Add(seq); } } else { // Gets the reference & search sequences from the configuration file string[] referenceSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode); string[] searchSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SearchSequencesNode); IAlphabet seqAlphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); foreach (Sequence referSeq in referenceSequences.Select(t => new Sequence(seqAlphabet, Encoding.ASCII.GetBytes(t)))) { refSeqList.Add(referSeq); } foreach (Sequence searchSeq in searchSequences.Select(t => new Sequence(seqAlphabet, Encoding.ASCII.GetBytes(t)))) { searchSeqList.Add(searchSeq); } } // Gets the mum length from the xml string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); var nucmerObj = new NucmerPairwiseAligner { MaximumSeparation = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode), null), MinimumScore = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode), null), SeparationFactor = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode), null), BreakLength = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode), null), LengthOfMUM = long.Parse(mumLength, null) }; IList<ISequenceAlignment> alignSimple = null; if (isAlignList) { var listOfSeq = new List<ISequence> {refSeqList[0], searchSeqList[0]}; alignSimple = nucmerObj.AlignSimple(listOfSeq); } string expectedSequences = isFilePath ? this.utilityObj.xmlUtil.GetFileTextValue(nodeName, Constants.ExpectedSequencesNode) : this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequencesNode); string[] expSeqArray = expectedSequences.Split(','); int j = 0; // Gets all the aligned sequences in comma separated format foreach (PairwiseAlignedSequence alignedSeq in alignSimple.Cast<IPairwiseSequenceAlignment>().SelectMany(seqAlignment => seqAlignment)) { Assert.AreEqual(expSeqArray[j], alignedSeq.FirstSequence.ConvertToString()); ++j; Assert.AreEqual(expSeqArray[j], alignedSeq.SecondSequence.ConvertToString()); j++; } ApplicationLog.WriteLine("NUCmer P1 : Successfully validated all the aligned sequences."); }
private void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath, AdditionalParameters additionalParam, PropertyParameters propParam) { ISequence referenceSeq; var searchSeqList = new List<ISequence>(); if (isFilePath) { // Gets the reference sequence from the FastA file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer P1 : Successfully validated the File Path '{0}'.", filePath)); var parser = new FastAParser(); IEnumerable<ISequence> referenceSeqList = parser.Parse(filePath); var byteList = new List<Byte>(); foreach (ISequence seq in referenceSeqList) { byteList.AddRange(seq); byteList.Add((byte) '+'); } referenceSeq = new Sequence(referenceSeqList.First().Alphabet.GetMummerAlphabet(), byteList.ToArray()); // Gets the query sequence from the FastA file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer P1 : Successfully validated the File Path '{0}'.", queryFilePath)); var queryParserObj = new FastAParser(); IEnumerable<ISequence> querySeqList = queryParserObj.Parse(queryFilePath); searchSeqList.AddRange(querySeqList); } else { // Gets the reference & search sequences from the configuration file string[] referenceSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode); string[] searchSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SearchSequencesNode); IAlphabet seqAlphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); var refSeqList = referenceSequences.Select(t => new Sequence(seqAlphabet, Encoding.ASCII.GetBytes(t))).Cast<ISequence>().ToList(); var byteList = new List<Byte>(); foreach (ISequence seq in refSeqList) { byteList.AddRange(seq); byteList.Add((byte) '+'); } referenceSeq = new Sequence(refSeqList.First().Alphabet.GetMummerAlphabet(), byteList.ToArray()); searchSeqList.AddRange(searchSequences.Select(t => new Sequence(seqAlphabet, Encoding.ASCII.GetBytes(t)))); } string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); // Builds the suffix for the reference sequence passed. var suffixTreeBuilder = new MultiWaySuffixTree(referenceSeq as Sequence) { MinLengthOfMatch = long.Parse(mumLength, null) }; var matches = searchSeqList.ToDictionary(t => t, suffixTreeBuilder.SearchMatchesUniqueInReference); var mums = new List<Match>(); foreach (var a in matches.Values) { mums.AddRange(a); } switch (additionalParam) { case AdditionalParameters.FindUniqueMatches: // Validates the Unique Matches. ApplicationLog.WriteLine("NUCmer P1 : Validating the Unique Matches"); Assert.IsTrue(this.ValidateUniqueMatches(mums, nodeName, isFilePath)); ApplicationLog.WriteLine("NUCmer P1 : Successfully validated the all the unique matches for the sequences."); break; case AdditionalParameters.PerformClusterBuilder: // Validates the Unique Matches. ApplicationLog.WriteLine( "NUCmer P1 : Validating the Unique Matches using Cluster Builder"); Assert.IsTrue(this.ValidateClusterBuilderMatches(mums, nodeName, propParam)); ApplicationLog.WriteLine("NUCmer P1 : Successfully validated the all the cluster builder matches for the sequences."); break; default: break; } ApplicationLog.WriteLine("NUCmer P1 : Successfully validated the all the unique matches for the sequences."); }
/// <summary> /// Validate Parallel Denovo Assembly Assembled sequences. /// </summary> /// <param name="nodeName">XML node used to validate different test scenarios</param> internal void ValidatePadenaAssembledSeqs(string nodeName) { // Get values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.RedundantThreshold); string libraray = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName); string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean); string assembledSequences = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequencePathNode); string assembledSeqCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AssembledSeqCountNode); string[] updatedAssembledSeqs = assembledSequences.Split(','); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; using (FastAParser parser = new FastAParser(filePath)) { sequenceReads = parser.Parse(); // Create a ParallelDeNovoAssembler instance. ParallelDeNovoAssembler denovoObj = null; try { denovoObj = new ParallelDeNovoAssembler(); denovoObj.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null); denovoObj.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null); denovoObj.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null); CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, (IFormatProvider)null), float.Parse(stdDeviation, (IFormatProvider)null)); byte[] symbols = sequenceReads.ElementAt(0).Alphabet.GetSymbolValueMap(); IDeNovoAssembly assembly = denovoObj.Assemble(sequenceReads.Select(a => new Sequence(Alphabets.DNA, a.Select(b => symbols[b]).ToArray()) { ID = a.ID }), true); IList<ISequence> assembledSequenceList = assembly.AssembledSequences.ToList(); // Validate assembled sequences. Assert.AreEqual(assembledSeqCount, assembledSequenceList.Count.ToString((IFormatProvider)null)); for (int i = 0; i < assembledSequenceList.Count; i++) { Assert.IsTrue(assembledSequences.Contains( new string(assembledSequenceList[i].Select(a => (char)a).ToArray())) || updatedAssembledSeqs.Contains( new string(assembledSequenceList[i].GetReverseComplementedSequence().Select(a => (char)a).ToArray()))); } } finally { if (denovoObj != null) denovoObj.Dispose(); } } ApplicationLog.WriteLine("Padena P1 : Assemble() validation for Padena step6:step7 completed successfully"); }
private void InValidateSmithWatermanAlignmentWithInvalidSimilarityMatrix(string nodeName, bool isTextFile, SimilarityMatrixInvalidTypes invalidType, AlignParameters additionalParameter, AlignmentType alignType) { Sequence aInput = null; Sequence bInput = null; ISequence inputSequence1; ISequence inputSequence2; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string firstInputFilepath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string secondInputFilepath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); // Parse the files and get the sequence. var parseObjectForFile1 = new FastAParser { Alphabet = alphabet }; inputSequence1 = parseObjectForFile1.Parse(firstInputFilepath).ElementAt(0); inputSequence2 = parseObjectForFile1.Parse(secondInputFilepath).ElementAt(0); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(new string(inputSequence1.Select(a => (char) a).ToArray()), new string(inputSequence2.Select(a => (char) a).ToArray()), alphabet, SequenceCaseType.LowerCase, out aInput, out bInput); } else { string firstInputSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string secondInputSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(firstInputSequence, secondInputSequence, alphabet, SequenceCaseType.LowerCase, out aInput, out bInput); } ApplicationLog.WriteLine(string.Concat( "SmithWatermanAligner P2 : First sequence used is '{0}'.", new string(aInput.Select(a => (char) a).ToArray()))); ApplicationLog.WriteLine(string.Concat( "SmithWatermanAligner P2 : Second sequence used is '{0}'.", new string(bInput.Select(a => (char) a).ToArray()))); // Create similarity matrix object for a invalid file. string blosumFilePath = this.GetSimilarityMatrixFileWithInvalidType(nodeName, invalidType); Exception actualExpection = null; // For invalid similarity matrix data format; exception will be thrown while instantiating SimilarityMatrix sm = null; try { if (invalidType != SimilarityMatrixInvalidTypes.NullSimilarityMatrix) { sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); } } catch (InvalidDataException ex) { actualExpection = ex; } // For non matching similarity matrix exception will be thrown while alignment if (actualExpection == null) { int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create SmithWatermanAligner instance and set its values. var smithWatermanObj = new SmithWatermanAligner(); if (additionalParameter != AlignParameters.AllParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; smithWatermanObj.GapExtensionCost = gapExtensionCost; } // Align the input sequences and catch the exception. switch (additionalParameter) { case AlignParameters.AlignList: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(new List<ISequence> {aInput, bInput}); } catch (ArgumentException ex) { actualExpection = ex; } break; default: try { smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput}); } catch (ArgumentException ex) { actualExpection = ex; } break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(aInput, bInput); } catch (ArgumentException ex) { actualExpection = ex; } break; default: try { smithWatermanObj.AlignSimple(aInput, bInput); } catch (ArgumentException ex) { actualExpection = ex; } break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); } catch (ArgumentException ex) { actualExpection = ex; } break; default: try { smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); } catch (ArgumentException ex) { actualExpection = ex; } break; } break; default: break; } } // Validate that expected exception is thrown using error message. string expectedErrorMessage = this.GetExpectedErrorMeesageWithInvalidSimilarityMatrixType(nodeName, invalidType); Assert.AreEqual(expectedErrorMessage, actualExpection.Message); ApplicationLog.WriteLine(string.Concat( "SmithWatermanAligner P2 : Expected Error message is thrown ", expectedErrorMessage)); }
private void InValidateSmithWatermanAlignmentWithInvalidSequence(string nodeName, bool isTextFile, InvalidSequenceType invalidSequenceType, AlignParameters additionalParameter, AlignmentType alignType, InvalidSequenceType sequenceType) { IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); Exception actualException = null; Sequence aInput = null; Sequence bInput = null; if (isTextFile) { // Read the xml file for getting both the files for aligning. string filepath = this.GetInputFileNameWithInvalidType(nodeName, invalidSequenceType); // Create input sequence for sequence string in different cases. try { // Parse the files and get the sequence. IEnumerable<ISequence> seqs = null; var parser = new FastAParser(); seqs = parser.Parse(filepath); aInput = new Sequence(alphabet, new string(seqs.ElementAt(0).Select(a => (char) a).ToArray())); } catch (Exception ex) { actualException = ex; } } else { string originalSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.InvalidSequence1); // Create input sequence for sequence string in different cases. try { aInput = new Sequence(alphabet, originalSequence); } catch (ArgumentException ex) { actualException = ex; } } if (actualException == null) { bInput = aInput; // Create similarity matrix object for a given file. string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); var sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create SmithWatermanAligner instance and set its values. var smithWatermanObj = new SmithWatermanAligner(); if (additionalParameter != AlignParameters.AllParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; smithWatermanObj.GapExtensionCost = gapExtensionCost; } // Align the input sequences and catch the exception. switch (additionalParameter) { case AlignParameters.AlignList: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(new List<ISequence> {aInput, bInput}); } catch (ArgumentException ex) { actualException = ex; } break; default: try { smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput}); } catch (ArgumentException ex) { actualException = ex; } break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(aInput, bInput); } catch (ArgumentException ex) { actualException = ex; } break; default: try { smithWatermanObj.AlignSimple(aInput, bInput); } catch (ArgumentException ex) { actualException = ex; } break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); } catch (ArgumentException ex) { actualException = ex; } break; default: try { smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); } catch (ArgumentException ex) { actualException = ex; } break; } break; default: break; } } // Validate Error messages for Invalid Sequence types. string expectedErrorMessage = this.GetExpectedErrorMeesageWithInvalidSequenceType(nodeName, sequenceType); Assert.AreEqual(expectedErrorMessage, actualException.Message); ApplicationLog.WriteLine(string.Concat( "SmithWatermanAligner P2 : Expected Error message is thrown ", expectedErrorMessage)); }
private void ValidateSmithWatermanAlignment(string nodeName, bool isTextFile, SequenceCaseType caseType, AlignParameters additionalParameter, AlignmentType alignType, SimilarityMatrixParameters similarityMatrixParam) { Sequence aInput, bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); // Parse the files and get the sequence. ISequence originalSequence1 = null; ISequence originalSequence2 = null; var parseObjectForFile1 = new FastAParser { Alphabet = alphabet }; originalSequence1 = parseObjectForFile1.Parse(filePath1).ElementAt(0); originalSequence2 = parseObjectForFile1.Parse(filePath2).ElementAt(0); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(originalSequence1.ConvertToString(), originalSequence2.ConvertToString(), alphabet, caseType, out aInput, out bInput); } else { string originalSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string originalSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType( originalSequence1, originalSequence2, alphabet, caseType, out aInput, out bInput); } ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : First sequence used is '{0}'.", aInput.ConvertToString())); ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : Second sequence used is '{0}'.", bInput.ConvertToString())); // Create similarity matrix object for a given file. string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create SmithWatermanAligner instance and set its values. var smithWatermanObj = new SmithWatermanAligner(); if (additionalParameter != AlignParameters.AllParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; smithWatermanObj.GapExtensionCost = gapExtensionCost; } IList<IPairwiseSequenceAlignment> result = null; // Align the input sequences. switch (additionalParameter) { case AlignParameters.AlignList: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(new List<ISequence> {aInput, bInput}); break; default: result = smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput}); break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(aInput, bInput); break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } // Get the expected sequence and scorde from xml config. string expectedSequence1, expectedSequence2, expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); switch (caseType) { case SequenceCaseType.LowerCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence1InLower); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence2InLower); break; default: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence2Node); break; } break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); switch (caseType) { case SequenceCaseType.LowerCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence1inLowerNode); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence2inLowerNode); break; case SequenceCaseType.LowerUpperCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence1inLowerNode); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; default: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } break; } // Match the alignment result with expected result. IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequence1), SecondSequence = new Sequence(alphabet, expectedSequence2), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned Second Sequence is '{0}'.", expectedSequence2)); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void TestFastaWhenParsingSequenceWithEmptyData() { // parse const string relativepath = @"\TestUtils\Fasta\EmptySequenceWithID.fasta"; string assemblypath = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().GetName().CodeBase).Substring(6); string filepath = assemblypath + relativepath; FastAParser parser = new FastAParser(); try { parser.Parse(filepath).First(); Assert.Fail(); } catch (Exception) { // OK } }
/// <summary> /// Parses a FastA file which has one or more sequences. /// </summary> /// <param name="filename">Path to the file to be parsed.</param> /// <returns>List of ISequence objects</returns> private static IEnumerable<ISequence> ParseFastA(string filename) { // A new parser to import a file FastAParser parser = new FastAParser(); return parser.Parse(filename); }
/// <summary> /// Validates PairwiseOverlapAlignment algorithm for the parameters passed. /// </summary> /// <param name="nodeName">Node Name in the xml.</param> /// <param name="alignParam">parameter based on which certain validations are done.</param> /// <param name="similarityMatrixParam">Similarity Matrix Parameter.</param> /// <param name="alignType">Alignment Type</param> private void ValidatePairwiseOverlapAlignment(string nodeName, AlignParameters alignParam, SimilarityMatrixParameters similarityMatrixParam, AlignmentType alignType) { ISequence aInput; ISequence bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (alignParam.ToString().Contains("Code")) { string sequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, sequence1); bInput = new Sequence(alphabet, sequence2); } else { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); var parser1 = new FastAParser { Alphabet = alphabet }; aInput = parser1.Parse(filePath1).ElementAt(0); bInput = parser1.Parse(filePath2).ElementAt(0); } string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); var pairwiseOverlapObj = new PairwiseOverlapAligner(); if (AlignParameters.AllParam != alignParam) { pairwiseOverlapObj.SimilarityMatrix = sm; pairwiseOverlapObj.GapOpenCost = gapOpenCost; } IList<IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignParameters.AlignList: case AlignParameters.AlignListCode: var sequences = new List<ISequence> {aInput, bInput}; switch (alignType) { case AlignmentType.Align: pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; result = pairwiseOverlapObj.Align(sequences); break; default: result = pairwiseOverlapObj.AlignSimple(sequences); break; } break; case AlignParameters.AllParam: case AlignParameters.AllParamCode: switch (alignType) { case AlignmentType.Align: pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; case AlignParameters.AlignTwo: case AlignParameters.AlignTwoCode: switch (alignType) { case AlignmentType.Align: pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; result = pairwiseOverlapObj.Align(aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(aInput, bInput); break; } break; default: break; } // Read the xml file for getting both the files for aligning. string expectedSequence1; string expectedSequence2; string expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); var seperators = new [] {';'}; string[] expectedSequences1 = expectedSequence1.Split(seperators); string[] expectedSequences2 = expectedSequence2.Split(seperators); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); for (int i = 0; i < expectedSequences1.Length; i++) { PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequences1[i]), SecondSequence = new Sequence(alphabet, expectedSequences2[i]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); } expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput, true)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Aligned Second Sequence is '{0}'.", expectedSequence2)); }
public void TestLargeFasta() { if (Environment.Is64BitProcess) { int sequenceCount = 300 * 1024 * 1024; // 300 MB of data string filePath = CreateData(sequenceCount); Assert.IsTrue(File.Exists(filePath)); try { var parser = new FastAParser { Alphabet = Alphabets.Protein }; int count = 0; foreach (ISequence seq in parser.Parse(filePath)) { Assert.IsNotNull(seq); Assert.AreEqual(sequenceCount, seq.Count); Assert.AreEqual(seq.Alphabet.Name, "Protein"); count++; } Assert.AreEqual(1, count); } finally { File.Delete(filePath); } } }
/// <summary> /// Validate parser parse method overloads with filePath\textreader /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParser(string nodeName, ParseOrFormatTypes parseTypes) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); IList<ISequenceAlignment> alignments = null; // Parse SAM File switch (parseTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (var reader = File.OpenRead(filePath)) { alignments = parser.Parse(reader).ToList(); } break; case ParseOrFormatTypes.ParseOrFormatFileName: alignments = parser.Parse(filePath).ToList(); break; } // Get expected sequences FastAParser parserObj = new FastAParser(); var expectedSequencesList = parserObj.Parse(expectedSequenceFile).ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignments[index].AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); count++; } } } }
private void ValidateNUCmerAlignGeneralTestCases(string nodeName, bool isFilePath, bool isAlignList, AdditionalParameters addParam, PropertyParameters propParam, bool isAmbiguous) { IList<ISequence> refSeqList = new List<ISequence>(); IList<ISequence> searchSeqList = new List<ISequence>(); if (isFilePath) { // Gets the reference sequence from the FastA file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer P1 : Successfully validated the File Path '{0}'.", filePath)); var fastaparserobj = new FastAParser(); IEnumerable<ISequence> referenceSeqList = fastaparserobj.Parse(filePath); foreach (ISequence seq in referenceSeqList) { refSeqList.Add(seq); } // Gets the query sequence from the FastA file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer P1 : Successfully validated the File Path '{0}'.", queryFilePath)); var queryParserobj = new FastAParser(); IEnumerable<ISequence> serSeqList = queryParserobj.Parse(queryFilePath); foreach (ISequence seq in serSeqList) { searchSeqList.Add(seq); } } else { // Gets the reference & search sequences from the configuration file string[] referenceSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode); string[] searchSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SearchSequencesNode); IAlphabet seqAlphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); IAlphabet ambAlphabet = null; if (isAmbiguous) { switch (seqAlphabet.Name.ToLower(CultureInfo.CurrentCulture)) { case "dna": case "ambiguousdna": ambAlphabet = AmbiguousDnaAlphabet.Instance; break; case "rna": case "ambiguousrna": ambAlphabet = AmbiguousRnaAlphabet.Instance; break; case "protein": case "ambiguousprotein": ambAlphabet = AmbiguousProteinAlphabet.Instance; break; default: break; } } else { ambAlphabet = seqAlphabet; } for (int i = 0; i < referenceSequences.Length; i++) { ISequence referSeq = new Sequence(ambAlphabet, Encoding.ASCII.GetBytes(referenceSequences[i])); referSeq.ID = "ref " + i; refSeqList.Add(referSeq); } for (int i = 0; i < searchSequences.Length; i++) { ISequence searchSeq = new Sequence(ambAlphabet, Encoding.ASCII.GetBytes(searchSequences[i])); searchSeq.ID = "qry " + i; searchSeqList.Add(searchSeq); } } // Gets the mum length from the xml string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); var nucmerObj = new NucmerPairwiseAligner(); // Check for additional parameters and update the object accordingly switch (addParam) { case AdditionalParameters.AlignSimilarityMatrix: nucmerObj.SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); break; default: break; } // Update other values for NUCmer object nucmerObj.MaximumSeparation = 0; nucmerObj.MinimumScore = 2; nucmerObj.SeparationFactor = 0.12f; nucmerObj.BreakLength = 2; nucmerObj.LengthOfMUM = long.Parse(mumLength, null); switch (propParam) { case PropertyParameters.MinimumScore: nucmerObj.MinimumScore = int.Parse( this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MinimumScoreNode), null); break; case PropertyParameters.MaximumSeparation: nucmerObj.MaximumSeparation = int.Parse( this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MaximumSeparationNode), null); break; case PropertyParameters.FixedSeparation: nucmerObj.FixedSeparation = int.Parse( this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FixedSeparationNode), null); break; case PropertyParameters.SeparationFactor: nucmerObj.SeparationFactor = int.Parse( this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SeparationFactorNode), null); break; case PropertyParameters.FixedSeparationAndSeparationFactor: nucmerObj.SeparationFactor = int.Parse( this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SeparationFactorNode), null); nucmerObj.FixedSeparation = int.Parse( this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FixedSeparationNode), null); break; case PropertyParameters.MaximumFixedAndSeparationFactor: nucmerObj.MaximumSeparation = int.Parse( this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MaximumSeparationNode), null); nucmerObj.SeparationFactor = int.Parse( this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SeparationFactorNode), null); nucmerObj.FixedSeparation = int.Parse( this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FixedSeparationNode), null); break; default: break; } IList<ISequenceAlignment> align = null; if (isAlignList) { var listOfSeq = new List<ISequence> {refSeqList.ElementAt(0), searchSeqList.ElementAt(0)}; align = nucmerObj.Align(listOfSeq); } else { align = nucmerObj.Align(refSeqList, searchSeqList); } string expectedSequences = isFilePath ? this.utilityObj.xmlUtil.GetFileTextValue(nodeName, Constants.ExpectedSequencesNode) : this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequencesNode); string[] expSeqArray = expectedSequences.Split(','); // Gets all the aligned sequences in comma separated format foreach (IPairwiseSequenceAlignment seqAlignment in align) { foreach (PairwiseAlignedSequence alignedSeq in seqAlignment) { var actualStr = alignedSeq.FirstSequence.ConvertToString(); Assert.IsTrue(expSeqArray.Contains(actualStr)); actualStr = alignedSeq.SecondSequence.ConvertToString(); Assert.IsTrue(expSeqArray.Contains(actualStr)); } } ApplicationLog.WriteLine("NUCmer P1 : Successfully validated all the aligned sequences."); }
/// <summary> /// Validate parser parse one method overloads with filePath\textreader /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParserWithParseOne(string nodeName, ParseOrFormatTypes parseTypes) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); ISequenceAlignment alignment = null; // Parse SAM File switch (parseTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (var reader = File.OpenRead(filePath)) { alignment = parser.ParseOne(reader); } break; case ParseOrFormatTypes.ParseOrFormatFileName: alignment = parser.ParseOne(filePath); break; } // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; foreach (IAlignedSequence alignedSequence in alignment.AlignedSequences) { foreach (ISequence sequence in alignedSequence.Sequences) { Assert.AreEqual(expectedSequencesList[count].ConvertToString(), sequence.ConvertToString()); count++; } } } }
/// <summary> /// Parses the file. /// </summary> /// <param name="fileName">The FileName.</param> /// <returns>List of sequence.</returns> private static IEnumerable<ISequence> Parse(string fileName) { if (Helper.IsZippedFasta(fileName)) { var parser = new GZipFastAParser(); return parser.Parse(fileName); } else { var parser = new FastAParser(); return parser.Parse(fileName); } }
/// <summary> /// Validate formatter all format method overloads with filePath\textwriter /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="formatTypes">enum type to execute different overload</param> void ValidateSAMFormatter(string nodeName, ParseOrFormatTypes formatTypes) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); try { IList<ISequenceAlignment> alignments = parser.Parse(filePath).ToList(); SAMFormatter formatter = new SAMFormatter(); switch (formatTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (var writer = File.Create(Constants.SAMTempFileName)) { formatter.Format(writer, alignments[0]); } break; case ParseOrFormatTypes.ParseOrFormatFileName: formatter.Format(alignments[0], Constants.SAMTempFileName); break; } alignments = parser.Parse(Constants.SAMTempFileName).ToList(); // Get expected sequences FastAParser parserObj = new FastAParser(); { IEnumerable<ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); IList<ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignments[index].AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); count++; } } } } } finally { } }