/// <summary> /// Reads in the fasta file. /// </summary> /// <param name="reader">your local filepath for genbank</param> /// <returns>list of BioPatML Sequences</returns> public override SequenceList Read(TextReader reader) { //Create the parser first ISequenceParser fastaParser = new FastaParser(); List<ISequence> mbfSequences = fastaParser.Parse(reader); SequenceList bioSeqList = new SequenceList(); foreach (Sequence mbfseq in mbfSequences) { bioSeqList.Add(ConvertToBioPatMLSeq(mbfseq)); } return bioSeqList; }
public void TestMsaBenchMarkOnBralibase() { List <float> allQ = new List <float>(); List <float> allTC = new List <float>(); string fileDirectory = @"testData\FASTA\RNA\k10"; DirectoryInfo iD = new DirectoryInfo(fileDirectory); PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; MoleculeType mt = MoleculeType.RNA; SimilarityMatrix similarityMatrix; int gapOpenPenalty = -20; int gapExtendPenalty = -5; int kmerLength = 4; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProductCached; switch (mt) { case (MoleculeType.DNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); break; case (MoleculeType.RNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); break; case (MoleculeType.Protein): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); break; default: throw new Exception("Invalid molecular type"); } foreach (DirectoryInfo fi in iD.GetDirectories()) { foreach (FileInfo fiii in fi.GetFiles()) { String filePath = fiii.FullName; Console.WriteLine(filePath); ISequenceParser parser = new FastaParser(); IList <ISequence> orgSequences = parser.Parse(filePath); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; Console.WriteLine("The number of sequences is: {0}", numberOfSequences); Console.WriteLine("Original unaligned sequences are:"); PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { //Console.WriteLine(msa.AlignedSequences[i].ToString()); } float scoreQ = MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences); float scoreTC = MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences); allQ.Add(scoreQ); allTC.Add(scoreTC); Console.WriteLine("Alignment score Q is: {0}", scoreQ); Console.WriteLine("Alignment score TC is: {0}", scoreTC); if (allQ.Count % 1000 == 0) { Console.WriteLine(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"); Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray())); Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray())); } } } Console.WriteLine("number of datasets is: {0}", allQ.Count); Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray())); Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray())); }
public void FastaFor186972391() { string expectedSequence = "IFYEPVEILGYDNKSSLVLVKRLITRMYQQKSLISSLNDSNQNEFWGHKNSFSSHFSSQMVSEGFGVILE" + "IPFSSRLVSSLEEKRIPKSQNLRSIHSIFPFLEDKLSHLNYVSDLLIPHPIHLEILVQILQCWIKDVPSL" + "HLLRLFFHEYHNLNSLITLNKSIYVFSKRKKRFFGFLHNSYVYECEYLFLFIRKKSSYLRSISSGVFLER" + "THFYGKIKYLLVVCCNSFQRILWFLKDTFIHYVRYQGKAIMASKGTLILMKKWKFHLVNFWQSYFHFWFQ" + "PYRINIKQLPNYSFSFLGYFSSVRKNPLVVRNQMLENSFLINTLTQKLDTIVPAISLIGSLSKAQFCTVL" + "GHPISKPIWTDLSDSDILDRFCRICRNLCRYHSGSSKKQVLYRIKYIFRLSCARTLARKHKSTVRTFMRR" + "LGSGFLEEFFLEEE"; string filepath = @"TestUtils\FASTA\186972391.fasta"; Assert.IsTrue(File.Exists(filepath)); IList <ISequence> seqs = null; FastaParser parser = null; try { parser = new FastaParser(); using (StreamReader reader = File.OpenText(filepath)) { seqs = parser.Parse(reader); } Assert.IsNotNull(seqs); Assert.AreEqual(1, seqs.Count); Sequence seq = (Sequence)seqs[0]; Assert.IsNotNull(seq); Assert.AreEqual(expectedSequence, seq.ToString()); Assert.AreEqual(expectedSequence.Length, seq.EncodedValues.Length); Assert.IsNotNull(seq.Alphabet); Assert.AreEqual(seq.Alphabet.Name, "Protein"); Assert.AreEqual("gi|186972391|gb|ACC99454.1| maturase K [Scaphosepalum rapax]", seq.ID); // Try it again with ParseOne, from reader and from filename using (StreamReader reader = File.OpenText(filepath)) { seq = (Sequence)parser.ParseOne(reader); } Assert.IsNotNull(seq); Assert.AreEqual(expectedSequence, seq.ToString()); Assert.AreEqual(expectedSequence.Length, seq.EncodedValues.Length); Assert.IsNotNull(seq.Alphabet); Assert.AreEqual(seq.Alphabet.Name, "Protein"); Assert.AreEqual("gi|186972391|gb|ACC99454.1| maturase K [Scaphosepalum rapax]", seq.ID); seq = (Sequence)parser.ParseOne(filepath); Assert.IsNotNull(seq); Assert.AreEqual(expectedSequence, seq.ToString()); Assert.AreEqual(expectedSequence.Length, seq.EncodedValues.Length); Assert.IsNotNull(seq.Alphabet); Assert.AreEqual(seq.Alphabet.Name, "Protein"); Assert.AreEqual("gi|186972391|gb|ACC99454.1| maturase K [Scaphosepalum rapax]", seq.ID); } finally { if (parser != null) { parser.Dispose(); } } }
public void FastaFormatter() { // Test with FASTA file from Simon string filepathOriginal = @"TestUtils\FASTA\NC_005213.ffn"; Assert.IsTrue(File.Exists(filepathOriginal)); FastaParser parser = null; try { parser = new FastaParser(); FastaFormatter formatter = new FastaFormatter(); // Read the original file IList <ISequence> seqsOriginal = null; parser = new FastaParser(); seqsOriginal = parser.Parse(filepathOriginal); Assert.IsNotNull(seqsOriginal); // Use the formatter to write the original sequences to a temp file string filepathTmp = Path.GetTempFileName(); using (TextWriter writer = new StreamWriter(filepathTmp)) { foreach (Sequence s in seqsOriginal) { formatter.Format(s, writer); } } // Read the new file, then compare the sequences IList <ISequence> seqsNew = null; parser = new FastaParser(); seqsNew = parser.Parse(filepathTmp); Assert.IsNotNull(seqsOriginal); // Now compare the sequences. int countOriginal = seqsOriginal.Count(); int countNew = seqsNew.Count(); Assert.AreEqual(countOriginal, countNew); int i; for (i = 0; i < countOriginal; i++) { Assert.AreEqual(seqsOriginal[i].ID, seqsNew[i].ID); string orgSeq = seqsOriginal[i].ToString(); string newSeq = seqsNew[i].ToString(); Assert.AreEqual(orgSeq, newSeq); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(filepathTmp); } finally { if (parser != null) { parser.Dispose(); } } }
/// <summary> /// Validate submit job and FetchResultAsync() using multiple input sequences /// </summary> /// <param name="nodeName">xml node name</param> void ValidateFetchResultAsync(string nodeName) { // Read input from config file string filepath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string emailId = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.EmailIDNode); string clusterOption = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ClusterOptionNode); string actionAlign = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ActionAlignNode); ConfigParameters configparams = new ConfigParameters(); ClustalWParser clustalparser = new ClustalWParser(); configparams.UseBrowserProxy = true; TestIClustalWServiceHandler handler = new TestIClustalWServiceHandler(clustalparser, configparams); ClustalWParameters parameters = new ClustalWParameters(); parameters.Values[ClustalWParameters.Email] = emailId; parameters.Values[ClustalWParameters.ClusterOption] = clusterOption; parameters.Values[ClustalWParameters.ActionAlign] = actionAlign; IList <ISequence> sequence = null; // Get input sequences using (FastaParser parser = new FastaParser()) { sequence = parser.Parse(filepath); } // Submit job and validate it returned valid job id and control id ServiceParameters svcparameters = handler.SubmitRequest(sequence, parameters); Assert.IsTrue(string.IsNullOrEmpty(svcparameters.JobId)); Console.WriteLine(string.Concat("JobId:", svcparameters.JobId)); foreach (string key in svcparameters.Parameters.Keys) { Assert.IsTrue(string.IsNullOrEmpty(svcparameters.Parameters[key].ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "{0}:{1}", key, svcparameters.Parameters[key].ToString())); } // Get the results and validate it is not null. ClustalWResult result = null; int retrycount = 0; ServiceRequestInformation info; do { info = handler.GetRequestStatus(svcparameters); if (info.Status == ServiceRequestStatus.Ready) { break; } Thread.Sleep( info.Status == ServiceRequestStatus.Waiting || info.Status == ServiceRequestStatus.Queued ? Constants.ClusterRetryInterval * retrycount : 0); retrycount++; }while (retrycount < 10); if (info.Status == ServiceRequestStatus.Ready) { result = handler.FetchResultsAsync(svcparameters); } Assert.IsNotNull(result); Assert.IsNotNull(result.SequenceAlignment); foreach (IAlignedSequence alignSeq in result.SequenceAlignment.AlignedSequences) { Console.WriteLine("Aligned Sequence Sequences : "); ApplicationLog.WriteLine("Aligned Sequence Sequences : "); foreach (ISequence seq in alignSeq.Sequences) { Console.WriteLine(string.Concat("Sequence:", seq.ToString())); ApplicationLog.WriteLine(string.Concat("Sequence:", seq.ToString())); } } Console.WriteLine(@"ClustalWServiceHandler BVT : Submit job and Get Results is successfully completed using FetchResultAsync()"); ApplicationLog.WriteLine(@"ClustalWServiceHandler BVT : Submit job and Get Results is successfully completed using FetchResultAsync()"); }
/// <summary> /// Parse General test cases for Data Virtualization /// </summary> /// <param name="nodeName">Xml node name</param> /// <param name="addParam">Additional parameter</param> static void ParseGeneralTestCases(string nodeName, AdditionalParameters addParam) { // Gets the expected sequence from the Xml string filePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsTrue(File.Exists(filePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: File Exists in the Path '{0}'.", filePath)); Console.WriteLine(string.Format(null, "FastA Parser BVT: File Exists in the Path '{0}'.", filePath)); IList <ISequence> seqsList = null; FastaParser parserObj = new FastaParser(); parserObj.EnforceDataVirtualization = true; Sequence parseOneSeq = null; string expectedSequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); string[] expectedSequences = expectedSequence.Split(','); // Gets the SequenceAlignment list based on the parameters. switch (addParam) { case AdditionalParameters.Parse: case AdditionalParameters.Properties: seqsList = parserObj.Parse(filePath); break; case AdditionalParameters.ParseOne: parseOneSeq = (Sequence)parserObj.ParseOne(filePath); break; case AdditionalParameters.ParseReadOnly: seqsList = parserObj.Parse(filePath, false); break; case AdditionalParameters.ParseOneReadOnly: parseOneSeq = (Sequence)parserObj.ParseOne(filePath, false); break; default: break; } // Check if ParseOne or Parse was used for parsing if (null == seqsList) { seqsList = new List <ISequence>(); seqsList.Add(parseOneSeq); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: Number of Sequences found are '{0}'.", seqsList.Count.ToString((IFormatProvider)null))); Console.WriteLine(string.Format(null, "FastA Parser BVT: Number of Sequences found are '{0}'.", seqsList.Count.ToString((IFormatProvider)null))); } else { Assert.IsNotNull(seqsList); Assert.AreEqual(2, seqsList.Count); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: Number of Sequences found are '{0}'.", seqsList.Count.ToString((IFormatProvider)null))); Console.WriteLine(string.Format(null, "FastA Parser BVT: Number of Sequences found are '{0}'.", seqsList.Count.ToString((IFormatProvider)null))); } // Validating by setting the BlockSize and MaxNumberOfBlocks int seqNumber = 0; foreach (Sequence seq in seqsList) { //seq.BlockSize = 5; seq.MaxNumberOfBlocks = 5; StringBuilder sb = new StringBuilder(); for (int i = 0; i < seq.Count; i++) { sb.Append(seq[i].Symbol.ToString()); } switch (addParam) { case AdditionalParameters.Properties: //Assert.AreEqual(5, seq.BlockSize); Assert.AreEqual(5, seq.MaxNumberOfBlocks); ApplicationLog.WriteLine("FastA Parser BVT: The Properties are as expected."); Console.WriteLine("FastA Parser BVT: The Properties are as expected."); break; default: Assert.AreEqual(expectedSequences[seqNumber], sb.ToString()); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: The FASTA sequence '{0}' validation after Parse() is found to be as expected.", expectedSequences[seqNumber])); // Logs to the NUnit GUI (Console.Out) window Console.WriteLine(string.Format(null, "FastA Parser BVT: The FASTA sequence '{0}' validation after Parse() is found to be as expected.", expectedSequences[seqNumber])); Assert.AreEqual(expectedSequences[seqNumber].Length, seq.Count); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: The FASTA Length sequence '{0}' is as expected.", expectedSequences[seqNumber].Length)); string[] alphabets = Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode).ToLower(CultureInfo.CurrentCulture).Split(','); Assert.IsNotNull(seq.Alphabet); Assert.AreEqual(seq.Alphabet.Name.ToLower(CultureInfo.CurrentCulture), alphabets[seqNumber]); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: The Sequence Alphabet is '{0}' and is as expected.", seq.Alphabet.Name)); string[] seqIDs = Utility._xmlUtil.GetTextValue(nodeName, Constants.SequenceIdNode).ToLower(CultureInfo.CurrentCulture).Split('/'); Assert.AreEqual(seqIDs[seqNumber].ToLower(CultureInfo.CurrentCulture) , seq.ID.ToLower(CultureInfo.CurrentCulture)); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: The Sequence ID is '{0}' and is as expected.", seq.ID)); // Logs to the NUnit GUI (Console.Out) window Console.WriteLine(string.Format(null, "FastA Parser BVT: The Sequence ID is '{0}' and is as expected.", seq.ID)); break; } seqNumber++; } }
public void PerformNeedlemanWunschPerf() { // Get Sequence file path. string refPath = Utility._xmlUtil.GetTextValue(Constants.AlignmentAlgorithmNodeName, Constants.RefFilePathNode); string queryPath = Utility._xmlUtil.GetTextValue(Constants.AlignmentAlgorithmNodeName, Constants.QueryFilePathNode); string smFilePath = Utility._xmlUtil.GetTextValue(Constants.AlignmentAlgorithmNodeName, Constants.SMFilePathNode); // Create a List for input files. List <string> lstInputFiles = new List <string>(); lstInputFiles.Add(refPath); lstInputFiles.Add(queryPath); FastaParser parserObj = new FastaParser(); IList <ISequence> seqs1 = parserObj.Parse(refPath); parserObj = new FastaParser(); IList <ISequence> seqs2 = parserObj.Parse(queryPath); IAlphabet alphabet = Alphabets.DNA; ISequence originalSequence1 = seqs1[0]; ISequence originalSequence2 = seqs2[0]; ISequence aInput = new Sequence(alphabet, originalSequence1.ToString()); ISequence bInput = new Sequence(alphabet, originalSequence2.ToString()); SimilarityMatrix sm = new SimilarityMatrix(smFilePath); nwObj = new NeedlemanWunschAligner(); nwObj.GapOpenCost = -10; nwObj.GapExtensionCost = -10; nwObj.SimilarityMatrix = sm; _watchObj = new Stopwatch(); _watchObj.Reset(); _watchObj.Start(); long memoryStart = GC.GetTotalMemory(false); // Align sequences using smith water man algorithm. IList <IPairwiseSequenceAlignment> alignment = nwObj.AlignSimple(aInput, bInput); _watchObj.Stop(); long memoryEnd = GC.GetTotalMemory(false); string memoryUsed = (memoryEnd - memoryStart).ToString(); // Display Needlemanwunsch perf test case execution details. DisplayTestCaseHeader(lstInputFiles, _watchObj, memoryUsed, "NeedlemanWunsch"); Console.WriteLine(string.Format( "Needleman Wunsch AlignSimple() method, Alignment Score is : {0}", alignment[0].PairwiseAlignedSequences[0].Score.ToString())); // Dispose NeedlemanWunsch object nwObj = null; }
/// <summary> /// Validate formatted BAM file. /// </summary> /// <param name="nodeName">Different xml nodes used for different test cases</param> /// <param name="BAMParserPam">BAM Format method parameters</param> void ValidateBAMFormatter(string nodeName, BAMParserParameters BAMParserPam) { // Get input and output values from xml node. string bamFilePath = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedAlignedSeqFilePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string alignedSeqCount = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.AlignedSeqCountNode); Stream stream = null; SequenceAlignmentMap seqAlignment = null; using (BAMParser bamParserObj = new BAMParser()) { // Parse a BAM file. seqAlignment = bamParserObj.Parse(bamFilePath); // Create a BAM formatter object. BAMFormatter formatterObj = new BAMFormatter(); // Write/Format aligned sequences to BAM file. switch (BAMParserPam) { case BAMParserParameters.StreamWriter: using (stream = new FileStream(Constants.BAMTempFileName, FileMode.Create, FileAccess.Write)) { formatterObj.Format(seqAlignment, stream); } break; case BAMParserParameters.FileName: formatterObj.Format(seqAlignment, Constants.BAMTempFileName); break; case BAMParserParameters.IndexFile: formatterObj.Format(seqAlignment, Constants.BAMTempFileName, Constants.BAMTempIndexFile); File.Exists(Constants.BAMTempIndexFile); break; default: break; } // Parse formatted BAM file and validate aligned sequences. SequenceAlignmentMap expectedSeqAlignmentMap = bamParserObj.Parse( Constants.BAMTempFileName); // Validate Parsed BAM file Header record fileds. ValidateBAMHeaderRecords(nodeName, expectedSeqAlignmentMap); IList <SAMAlignedSequence> alignedSeqs = expectedSeqAlignmentMap.QuerySequences; Assert.AreEqual(alignedSeqCount, alignedSeqs.Count.ToString((IFormatProvider)null)); // Get expected sequences using (FastaParser parserObj = new FastaParser()) { IList <ISequence> expectedSequences = parserObj.Parse(expectedAlignedSeqFilePath); // Validate aligned sequences from BAM file. for (int index = 0; index < alignedSeqs.Count; index++) { Assert.AreEqual(expectedSequences[index].ToString(), alignedSeqs[index].QuerySequence.ToString()); // Log to NUNIT GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "BAM Formatter BVT : Validated Aligned sequence :{0} successfully", alignedSeqs[index].QuerySequence.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "BAM Formatter BVT : Validated the aligned sequence :{0} successfully", alignedSeqs[index].QuerySequence.ToString())); } } } File.Delete(Constants.BAMTempFileName); File.Delete(Constants.BAMTempIndexFile); }
/// <summary> /// General method to validate SAM parser method. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParserSeqAlign( string nodeName, ParseOrFormatTypes method) { // Gets the expected sequence from the Xml string filePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); using (SAMParser parser = new SAMParser()) { SequenceAlignmentMap alignments = null; // Parse SAM File switch (method) { case ParseOrFormatTypes.ParseOrFormatText: using (TextReader reader = new StreamReader(filePath)) { alignments = parser.Parse(reader); } break; case ParseOrFormatTypes.ParseOrFormatTextWithFlag: using (TextReader reader = new StreamReader(filePath)) { alignments = parser.Parse(reader, true); } break; case ParseOrFormatTypes.ParseOrFormatFileName: alignments = parser.Parse(filePath); break; case ParseOrFormatTypes.ParseOrFormatFileNameWithFlag: alignments = parser.Parse(filePath, true); break; } // Get expected sequences using (FastaParser parserObj = new FastaParser()) { IList <ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); // Validate parsed output with expected output for (int index = 0; index < alignments.QuerySequences.Count; index++) { for (int count = 0; count < alignments.QuerySequences[index].Sequences.Count; count++) { Assert.AreEqual(expectedSequences[index].ToString(), alignments.QuerySequences[index].Sequences[count].ToString()); } } } } }
/// <summary> /// Validate formatter all format method overloads with filePath\textwriter /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="formatTypes">enum type to execute different overload</param> void ValidateSAMFormatter(string nodeName, ParseOrFormatTypes formatTypes) { // Gets the expected sequence from the Xml string filePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); try { IList <ISequenceAlignment> alignments = parser.Parse(filePath); SAMFormatter formatter = new SAMFormatter(); switch (formatTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (TextWriter writer = new StreamWriter(Constants.SAMTempFileName)) { formatter.Format(alignments[0], writer); } break; case ParseOrFormatTypes.ParseOrFormatFileName: formatter.Format(alignments[0], Constants.SAMTempFileName); break; case ParseOrFormatTypes.ParseOrFormatFileNameWithFlag: formatter.Format(alignments, Constants.SAMTempFileName); break; } alignments = parser.Parse(Constants.SAMTempFileName); // Get expected sequences using (FastaParser parserObj = new FastaParser()) { IList <ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(expectedSequences[count].ToString(), alignments[index].AlignedSequences[ialigned].Sequences[iseq].ToString()); count++; } } } } } finally { (parser as SAMParser).Dispose(); } }
/// <summary> /// Validate parser parse one method overloads with filePath\textreader /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="parseTypes">enum type to execute different overload</param> void ValidateSAMParserWithParseOne(string nodeName, ParseOrFormatTypes parseTypes) { // Gets the expected sequence from the Xml string filePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); try { ISequenceAlignment alignment = null; // Parse SAM File switch (parseTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (TextReader reader = new StreamReader(filePath)) { alignment = parser.ParseOne(reader); } break; case ParseOrFormatTypes.ParseOrFormatTextWithFlag: using (TextReader reader = new StreamReader(filePath)) { alignment = parser.ParseOne(reader, true); } break; case ParseOrFormatTypes.ParseOrFormatFileName: alignment = parser.ParseOne(filePath); break; case ParseOrFormatTypes.ParseOrFormatFileNameWithFlag: alignment = parser.ParseOne(filePath, true); break; } // Get expected sequences using (FastaParser parserObj = new FastaParser()) { IList <ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile); // Validate parsed output with expected output int count = 0; for (int ialigned = 0; ialigned < alignment.AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignment.AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(expectedSequences[count].ToString(), alignment.AlignedSequences[ialigned].Sequences[iseq].ToString()); count++; } } } } finally { (parser as SAMParser).Dispose(); } }
private void DoBLAST(string filename) { // Update progress bar, set content of user control to initial state // Load protein sequences FastaParser parser = new FastaParser(); IList<ISequence> queryList = parser.Parse(filename).ToList(); // Initialize and populate queue of query sequences Queue<QueueSequence> queryQueue = new Queue<QueueSequence>(); int j = 0; // For debuging int progValue = 0; int currentProgress = 0; foreach (ISequence protein in queryList) { QueueSequence qp = new QueueSequence(); qp.Sequence = protein; qp.Position = j; string name = j.ToString(); j++; if (File.Exists(Up.ProjectDir + "\\xml\\" + name + ".xml")) { IList<BlastResult> blastResults; BlastXmlParser parser2 = new BlastXmlParser(); try { blastResults = parser2.Parse(Up.ProjectDir + "\\xml\\" + name + ".xml"); progValue = Convert.ToInt32(Math.Round((double)currentProgress / queryList.Count() * 100, 0)); UpdateProgressBar(progValue, "Validating BLAST results."); currentProgress++; } catch { queryQueue.Enqueue(qp); } } else { queryQueue.Enqueue(qp); } } // Initialize BLAST queue positions to having no jobs (EMPTY) BlastQueue blastQueue = new BlastQueue(); // While there are proteins left to submit to BLAST, or there are // busy jobs still on the queue UpdateProgressBar(progValue, "Starting up BLAST service, please wait."); while (queryQueue.Count > 0 || blastQueue.isBlastQueueBusy()) { // Iterate over blastQueue for (int i = 0; i < BlastQueue.Length; i++) { // Get blastJob from array and update status BlastJob blastJob = blastQueue[i]; QueueSequence qp = new QueueSequence(); qp.Sequence = blastJob.Query; qp.Position = blastJob.Position; // if queue position is AVAILABLE if (blastJob.JobStatus == BlastJob.AVAILABLE) { if (queryQueue.Count > 0) { QueueSequence qp2 = queryQueue.Dequeue(); // try to submit job, enqueue back the protein if submission failed. try { blastQueue[i] = submit(qp2); if (blastQueue[i].JobStatus == BlastJob.FAILED) { blastQueue[i].JobStatus = BlastJob.AVAILABLE; queryQueue.Enqueue(qp2); } else { UpdateProgressBar(progValue, "Submitting sequences to NCBI BLAST"); } } catch (Exception eee) { MessageBox.Show(eee.Message); } } } else { string jobId = blastJob.JobId; NCBIBlastHandler blastService = blastJob.BlastService; ServiceRequestInformation info = blastService.GetRequestStatus(jobId); Thread.Sleep(BlastQueue.RequestDelay); switch (info.Status) { case ServiceRequestStatus.Error: blastQueue[i].JobStatus = BlastJob.AVAILABLE; queryQueue.Enqueue(qp); break; case ServiceRequestStatus.Canceled: blastQueue[i].JobStatus = BlastJob.AVAILABLE; queryQueue.Enqueue(qp); break; case ServiceRequestStatus.Ready: string result = blastService.GetResult(jobId, blastJob.SearchParams); string name = blastJob.Position.ToString(); TextWriter tw = new StreamWriter(Up.ProjectDir + "\\xml\\" + name + ".xml"); tw.Write(result); tw.Close(); Debug.WriteLine("BLAST JOB: " + jobId + " , " + name + " , " + info.StatusInformation); // Added by VF on Jan, 22, 2013. Catches invalid BLAST records IList<BlastResult> blastResults; BlastXmlParser parser2 = new BlastXmlParser(); bool parsePassed = false; int fetchAttempts = 0; while (!parsePassed && fetchAttempts < 3) { try { blastResults = parser2.Parse(Up.ProjectDir + "\\xml\\" + name + ".xml"); parsePassed = true; Debug.WriteLine("FETCH OK JobId: " + jobId + " InputOrder: " + name + ". This is attempt:" + fetchAttempts.ToString()); } catch (Exception eee) { Debug.WriteLine("Trying to fetch JobId: " + jobId + " InputOrder: " + name + ". This is attempt: " + fetchAttempts.ToString()); parsePassed = false; result = blastService.GetResult(jobId, blastJob.SearchParams); TextWriter tw2 = new StreamWriter(Up.ProjectDir + "\\xml\\" + name + ".xml"); tw2.Write(result); tw2.Close(); fetchAttempts += 1; } Thread.Sleep(1000); } try { blastResults = parser2.Parse(Up.ProjectDir + "\\xml\\" + name + ".xml"); } catch (Exception eee) { blastQueue[i].JobStatus = BlastJob.AVAILABLE; queryQueue.Enqueue(qp); Debug.WriteLine("REQUEUE of JobId: " + " " + jobId + " InputOrder: " + name + " because max fetch is " + fetchAttempts.ToString()); break; } currentProgress += 1; progValue = Convert.ToInt32(Math.Round((double)currentProgress / queryList.Count() * 100, 0)); UpdateProgressBar(progValue, "Saving"); blastQueue[i].JobStatus = BlastJob.AVAILABLE; if (queryQueue.Count > 0) { QueueSequence qp3 = queryQueue.Dequeue(); try { blastQueue[i] = submit(qp3); if (blastQueue[i].JobStatus == BlastJob.FAILED) { blastQueue[i].JobStatus = BlastJob.AVAILABLE; queryQueue.Enqueue(qp3); } else { UpdateProgressBar(progValue, "Submitting sequences to NCBI BLAST"); } } catch (Exception eee) { MessageBox.Show(eee.Message); MessageBox.Show("Error creating a jobId for sequence " + qp3.Position); throw new Exception("Error creating a jobId for sequence" + qp3.Position); } } break; case ServiceRequestStatus.Queued: break; case ServiceRequestStatus.Waiting: break; default: MessageBox.Show("BLAST error " + info.Status + " " + blastJob.JobStatus + " for " + qp.Position); break; } } } } }
/// <summary> /// Run task for a particular step. /// </summary> /// <param name="task"></param> /// <returns></returns> private void RunTask(string task) { bool res = false; switch (task) { case "UserControl0": break; case "UserControl1": Util.SetupDirectories(Up.ProjectDir); break; case "UserControl2": if ((!File.Exists(Up.ProjectDir + "\\genes.fasta")) || (File.Exists(Up.ProjectDir + "\\genes.fasta") && (Up.FastaFile != ""))) { FastaParser parser = new FastaParser(); try { Up.QuerySequences = parser.Parse(Up.FastaFile).ToList(); } catch { FatalErrorDialog("Error parsing FASTA file. Please confirm that the input file is in FASTA format. If the problem persists file a bug report at blip.codeplex.com. The application will now be closed."); } File.Copy(Up.FastaFile, Up.ProjectDir + "\\genes.fasta", true); } break; case "UserControl3": break; case "UserControl4": UserControl4 uc4 = (CurrentControl as UserControl4); BlastUtil.RecordBlastThresholds(Up, uc4, ((ComboBoxItem)uc4.BlastProgram.SelectedItem).Content.ToString(), uc4.BlastDatabase.SelectedItem.ToString(), uc4.BlastAlgorithm.SelectedItem.ToString()); /* MessageBox.Show(String.Format("Recorded Parameters:\n{0}\n{1}\n{2}\n{3}\n{4}\n{5}\n{6}\n{7}", Up.BlastProgram, Up.BlastDatabase, Up.BlastGeneticCode, Up.BlastAlgorithm, Up.BlastMaxEvalue, Up.BlastMaxNumHits, Up.BlastMinPercentIdentity, Up.BlastMinPercentQueryCoverage )); */ LaunchBlastPipeline(Up.ProjectDir + "\\genes.fasta"); break; case "UserControl5": LogMessage("Set Pivot parameters.\r\n"); break; case "UserControl6": UserControl6 uc6 = (CurrentControl as UserControl6); //Pivot.RecordPivotParameters(Up, uc6.CollectionNameBox.Text, uc6.CollectionTitleBox.Text); Pivot.RecordPivotParameters(Up, "blip", uc6.CollectionTitleBox.Text); break; case "UserControl7": break; case "UserControl8": UserControl8 uc8 = (CurrentControl as UserControl8); // progressBar1.Maximum = Up.QuerySequences.Count(); // progressBar1.Minimum = 0; // progressBar1.Value = 0; uc8.SaveImagePreviewState(); res = WriteCollection(); break; case "UserControl9": UserControl9 uc9 = (CurrentControl as UserControl9); if (uc9.createProject.IsChecked == true) { Debug.WriteLine("NEW"); UserControls.Clear(); UserControl0 uc0 = new UserControl0(Up); UserControls.Add(uc0); UserControl9 c9 = new UserControl9(Up); UserControls.Add(c9); UserControl1 uc1 = new UserControl1(Up); UserControls.Add(uc1); UserControl2 uc2 = new UserControl2(Up); UserControls.Add(uc2); //UserControl3 uc3 = new UserControl3(Up); //UserControls.Add(uc3); UserControl6 c6 = new UserControl6(Up); UserControls.Add(c6); UserControl4 c4 = new UserControl4(Up); UserControls.Add(c4); UserControl5 uc5 = new UserControl5(Up); UserControls.Add(uc5); UserControl8 c8 = new UserControl8(Up); UserControls.Add(c8); UserControl7 uc7 = new UserControl7(Up); UserControls.Add(uc7); } if (uc9.loadProject.IsChecked == true) { UserControls.Clear(); UserControl0 uc0 = new UserControl0(Up); UserControls.Add(uc0); UserControl9 c9 = new UserControl9(Up); UserControls.Add(c9); UserControl10 c10 = new UserControl10(Up); UserControls.Add(c10); UserControl11 c11 = new UserControl11(Up); UserControls.Add(c11); } break; case "UserControl10": UserControl10 uc10 = (CurrentControl as UserControl10); Debug.WriteLine("UC10"); Action<object> action = (object obj) => { StartWebServer("/", Up.CxmlDir, Up.WebServerPort); }; Task t1 = new Task(action, "BLiP_WS"); t1.Start(); Previous_Button.IsEnabled = false; Next_Button.IsEnabled = false; Finish_Button.IsEnabled = true; break; default: break; } }
/// <summary> /// Controls behaviour when user clicks the Next button /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void Next_Click(object sender, RoutedEventArgs e) { Previous_Button.IsEnabled = true; // Previous button should always be enabled after clicking Next. RunTask(CurrentControl.Name); // Run the task assocaited with the current control step MainGrid.Children.Remove(CurrentControl); // Remove the current control from the UI. // Update the current step and bound checking to decide which buttons to enable/disable. if (CurrentStep < (UserControls.Count() - 1)) { CurrentStep += 1; } if (CurrentStep == (UserControls.Count() - 1)) { //Next_Button.IsEnabled = false; //Finish_Button.IsEnabled = true; } else { Next_Button.IsEnabled = true; Finish_Button.IsEnabled = false; } // Load the user control for the updated current step into the UI UserControl uc = UserControls[CurrentStep]; uc.VerticalAlignment = VerticalAlignment.Top; MainGrid.Children.Add(uc); Grid.SetRow(uc, 0); Grid.SetColumn(uc, 1); CurrentControl = uc; LogText.Text = Up.Log; // Update Log // SPECIAL CASE: If uploading file, disable Next button until user selects a file from RunkTask or the file already exists. if (uc.Name == "UserControl2") { UserControl2 uc2 = CurrentControl as UserControl2; Next_Button.IsEnabled = false; //System.Windows.MessageBox.Show(String.Format("CHECK: {0}", Up.ProjectDir)); if (File.Exists(Up.ProjectDir + "\\genes.fasta")) { //System.Windows.MessageBox.Show("TRUE"); (CurrentControl as UserControl2).LoadFastaFileGrid.Children.Clear(); TextBlock txt1 = new TextBlock(); FastaParser parser = new FastaParser(); try { Up.QuerySequences = parser.Parse(Up.ProjectDir + "\\genes.fasta").ToList(); } catch { FatalErrorDialog("Error parsing FASTA file. Please confirm that the input file is in FASTA format. If the problem persists file a bug report at blip.codeplex.com. The application will now be closed."); } txt1.Text = ("A file with " + Up.QuerySequences.Count() + " gene sequences exists in this folder.\n\nIf you want to load a new file select an empty project folder instead.\r\n"); txt1.Margin = new Thickness(55); (CurrentControl as UserControl2).LoadFastaFileGrid.Children.Add(txt1); Next_Button.IsEnabled = true; } else { //System.Windows.MessageBox.Show("FALSE"); MainGrid.Children.Remove(CurrentControl); UserControls[CurrentStep] = new UserControl2(Up); uc = UserControls[CurrentStep]; uc.VerticalAlignment = VerticalAlignment.Top; MainGrid.Children.Add(uc); Grid.SetRow(uc, 0); Grid.SetColumn(uc, 1); CurrentControl = uc; } (uc as UserControl2).RunCompleted += delegate(object sender1, RoutedEventArgs arg) { Next_Button.IsEnabled = true; Previous_Button.IsEnabled = true; }; } if (uc.Name == "UserControl7") { (CurrentControl as UserControl7).CollectionUrlBox.Text = Up.CollectionUrl; } }
/// <summary> /// Validate Submit Job and Fetch ResultSync() using multiple input sequences /// </summary> /// <param name="nodeName">xml node name</param> void ValidateFetchResultSync(string nodeName) { // Read input from config file string filepath = Utility._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string emailId = Utility._xmlUtil.GetTextValue( nodeName, Constants.EmailIDNode); string clusterOption = Utility._xmlUtil.GetTextValue( nodeName, Constants.ClusterOptionNode); string actionAlign = Utility._xmlUtil.GetTextValue( nodeName, Constants.ActionAlignNode); // Initialize with parser and config params ConfigParameters configparams = new ConfigParameters(); ClustalWParser clustalparser = new ClustalWParser(); configparams.UseBrowserProxy = true; TestIClustalWServiceHandler handler = new TestIClustalWServiceHandler(clustalparser, configparams); ClustalWParameters parameters = new ClustalWParameters(); parameters.Values[ClustalWParameters.Email] = emailId; parameters.Values[ClustalWParameters.ClusterOption] = clusterOption; parameters.Values[ClustalWParameters.ActionAlign] = actionAlign; // Get the input sequences FastaParser parser = new FastaParser(); IList <ISequence> sequence = parser.Parse(filepath); // Submit job and validate it returned valid job id and control id ServiceParameters svcparameters = handler.SubmitRequest(sequence, parameters); Assert.IsNotEmpty(svcparameters.JobId); Console.WriteLine(string.Concat("JobId", svcparameters.JobId)); ApplicationLog.WriteLine(string.Concat("JobId", svcparameters.JobId)); foreach (string key in svcparameters.Parameters.Keys) { Assert.IsNotEmpty(svcparameters.Parameters[key].ToString()); Console.WriteLine(string.Format("{0}:{1}", key, svcparameters.Parameters[key].ToString())); ApplicationLog.WriteLine(string.Format("{0}:{1}", key, svcparameters.Parameters[key].ToString())); } // Get the results and validate it is not null. ClustalWResult result = handler.FetchResultsSync(svcparameters); Assert.IsNotNull(result); Assert.IsNotNull(result.SequenceAlignment); foreach (IAlignedSequence alignSeq in result.SequenceAlignment.AlignedSequences) { Console.WriteLine("Aligned Sequence Sequences :"); ApplicationLog.WriteLine("Aligned Sequence Sequences :"); foreach (ISequence seq in alignSeq.Sequences) { Console.WriteLine(string.Concat("Sequence:", seq.ToString())); ApplicationLog.WriteLine(string.Concat("Sequence:", seq.ToString())); } } Console.WriteLine(@"ClustalWServiceHandler BVT : Submit job and Get Results is successfully completed using FetchResultSync()"); ApplicationLog.WriteLine(@"ClustalWServiceHandler BVT : Submit job and Get Results is successfully completed using FetchResultSync()"); }
/// <summary> /// Validates most of the find matches suffix tree test cases with varying parameters. /// </summary> /// <param name="nodeName">Node name which needs to be read for execution.</param> /// <param name="isFilePath">Is File Path?</param> /// <param name="LISActionType">LIS action type enum</param> static void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath, LISParameters LISActionType) { ISequence referenceSeq = null; ISequence querySeq = null; string referenceSequence = string.Empty; string querySequence = string.Empty; if (isFilePath) { // Gets the reference sequence from the configurtion file string filePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastaParser parser = new FastaParser(); IList <ISequence> referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs[0]; referenceSequence = referenceSeq.ToString(); // Gets the reference sequence from the configurtion file string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); FastaParser queryParser = new FastaParser(); IList <ISequence> querySeqs = queryParser.Parse(queryFilePath); querySeq = querySeqs[0]; querySequence = querySeq.ToString(); } else { // Gets the reference sequence from the configurtion file referenceSequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string seqAlp = Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(seqAlp), referenceSequence); querySequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); seqAlp = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(seqAlp), querySequence); } string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); // Builds the suffix for the reference sequence passed. ISuffixTreeBuilder suffixTreeBuilder = new KurtzSuffixTreeBuilder(); SequenceSuffixTree suffixTree = suffixTreeBuilder.BuildSuffixTree(referenceSeq); IList <MaxUniqueMatch> matches = suffixTreeBuilder.FindMatches(suffixTree, querySeq, long.Parse(mumLength, null)); switch (LISActionType) { case LISParameters.FindUniqueMatches: // Validates the Unique Matches. ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches"); Assert.IsTrue(ValidateUniqueMatches(matches, nodeName, LISActionType)); break; case LISParameters.PerformLIS: // Validates the Unique Matches. ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches using LIS"); LongestIncreasingSubsequence lisObj = new LongestIncreasingSubsequence(); IList <MaxUniqueMatch> lisMatches = lisObj.GetLongestSequence(matches); Assert.IsTrue(ValidateUniqueMatches(lisMatches, nodeName, LISActionType)); break; default: break; } Console.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); }
/// <summary> /// Parse BAM and validate parsed aligned sequences and its properties. /// </summary> /// <param name="nodeName">Different xml nodes used for different test cases</param> /// <param name="BAMParserPam">BAM Parse method parameters</param> /// <param name="IsEncoding">True for BAMParser ctor with encoding. /// False otherwise </param> void ValidateBAMParser(string nodeName, BAMParserParameters BAMParserPam, bool IsEncoding, bool IsReferenceIndex) { // Get input and output values from xml node. string bamFilePath = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string expectedAlignedSeqFilePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); string refIndexValue = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.RefIndexNode); string startIndexValue = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.StartIndexNode); string endIndexValue = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.EndIndexNode); string alignedSeqCount = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.AlignedSeqCountNode); SequenceAlignmentMap seqAlignment = null; BAMParser bamParser = null; try { if (IsEncoding) { bamParser = new BAMParser(); } else { bamParser = new BAMParser(Encodings.IupacNA); } // Parse a BAM file with different parameters. switch (BAMParserPam) { case BAMParserParameters.StreamReader: using (Stream stream = new FileStream(bamFilePath, FileMode.Open, FileAccess.Read)) { seqAlignment = bamParser.Parse(stream); } break; case BAMParserParameters.StreamReaderWithReadOnly: using (Stream stream = new FileStream(bamFilePath, FileMode.Open, FileAccess.Read)) { seqAlignment = bamParser.Parse(stream, false); } break; case BAMParserParameters.FileName: seqAlignment = bamParser.Parse(bamFilePath); break; case BAMParserParameters.FileNameWithReadOnly: seqAlignment = bamParser.Parse(bamFilePath, false); break; case BAMParserParameters.ParseRangeFileName: seqAlignment = bamParser.ParseRange(bamFilePath, Convert.ToInt32(refIndexValue, (IFormatProvider)null)); break; case BAMParserParameters.ParseRangeFileNameWithReadOnly: seqAlignment = bamParser.ParseRange(bamFilePath, Convert.ToInt32(refIndexValue, (IFormatProvider)null), false); break; case BAMParserParameters.ParseRangeWithIndex: seqAlignment = bamParser.ParseRange(bamFilePath, Convert.ToInt32(refIndexValue, (IFormatProvider)null), Convert.ToInt32(startIndexValue, (IFormatProvider)null), Convert.ToInt32(endIndexValue, (IFormatProvider)null), false); break; } // Validate BAM Header record fileds. if (!IsReferenceIndex) { ValidateBAMHeaderRecords(nodeName, seqAlignment); } IList <SAMAlignedSequence> alignedSeqs = seqAlignment.QuerySequences; Assert.AreEqual(alignedSeqCount, alignedSeqs.Count.ToString((IFormatProvider)null)); // Get expected sequences using (FastaParser parserObj = new FastaParser()) { IList <ISequence> expectedSequences = parserObj.Parse(expectedAlignedSeqFilePath); // Validate aligned sequences from BAM file. for (int index = 0; index < alignedSeqs.Count; index++) { Assert.AreEqual(expectedSequences[index].ToString(), alignedSeqs[index].QuerySequence.ToString()); // Log to NUNIT GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "BAM Parser BVT : Validated Aligned sequence :{0} successfully", alignedSeqs[index].QuerySequence.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "BAM Parser BVT : Validated the aligned sequence :{0} successfully", alignedSeqs[index].QuerySequence.ToString())); } } } finally { bamParser.Dispose(); } }
/// <summary> /// Validates the Mummer align method for several test cases for the parameters passed. /// </summary> /// <param name="nodeName">Node name to be read from xml</param> /// <param name="isSeqList">Is MUMmer alignment with List of sequences</param> static void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath, bool isSeqList) { ISequence referenceSeq = null; ISequence querySeq = null; IList <ISequence> querySeqs = null; string referenceSequence = string.Empty; string querySequence = string.Empty; IList <IPairwiseSequenceAlignment> align = null; if (isFilePath) { // Gets the reference sequence from the configurtion file string filePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastaParser parser = new FastaParser(); IList <ISequence> referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs[0]; referenceSequence = referenceSeq.ToString(); // Gets the reference sequence from the configurtion file string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); FastaParser queryParser = new FastaParser(); querySeqs = queryParser.Parse(queryFilePath); querySeq = querySeqs[0]; querySequence = querySeq.ToString(); } else { // Gets the reference sequence from the configurtion file referenceSequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence); querySequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence); querySeqs = new List <ISequence>(); querySeqs.Add(querySeq); } string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); MUMmer mum = new MUMmer3(); mum.LengthOfMUM = long.Parse(mumLength, null); mum.PairWiseAlgorithm = new NeedlemanWunschAligner(); mum.GapOpenCost = int.Parse(Utility._xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), (IFormatProvider)null); if (isSeqList) { querySeqs.Add(referenceSeq); align = mum.Align(querySeqs); } else { align = mum.AlignSimple(referenceSeq, querySeqs); } string expectedScore = Utility._xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); Assert.AreEqual(expectedScore, align[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider)null)); Console.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); string[] expectedSequences = Utility._xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]); alignedSeq.SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]); alignedSeq.Score = int.Parse(expectedScore); seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(CompareAlignment(align, expectedOutput)); Console.WriteLine("MUMmer BVT : Successfully validated the aligned sequences."); ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the aligned sequences."); }
public void PerformPAMSAMPerf() { Stopwatch _watchObj = new Stopwatch(); // Get input values from XML. string refPath = Utility._xmlUtil.GetTextValue(Constants.PamsamNode, Constants.RefFilePathNode); string queryPath = Utility._xmlUtil.GetTextValue(Constants.PamsamNode, Constants.QueryFilePathNode); // Create a List for input files. List <string> lstInputFiles = new List <string>(); lstInputFiles.Add(refPath); lstInputFiles.Add(queryPath); // Parse a Reference and query sequence file. ISequenceParser parser = new FastaParser(); IList <ISequence> refsequences = parser.Parse(queryPath); IList <ISequence> orgSequences = parser.Parse(refPath); // Execute UnAlign method to verify that it does not contains gap List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); // Set static properties PAMSAMMultipleSequenceAligner.FasterVersion = true; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; // Set Alignment parameters. int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 2; int numberOfDegrees = 2; int numberOfPartitions = 4; // Profile Distance function name DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; // Set Hierarchical clustering. UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; // Set NeedlemanWunschProfileAligner ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.InnerProduct; // Create similarity matrix instance. SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); // Reset stop watch and start timer. _watchObj.Reset(); _watchObj.Start(); long memoryStart = GC.GetTotalMemory(true); // Parallel Option will only get set if the PAMSAMMultipleSequenceAligner is getting called // To test separately distance matrix, binary tree etc.. // Set the parallel option using below ctor. msa = new PAMSAMMultipleSequenceAligner (sequences, MoleculeType.DNA, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); // Stop watchclock. _watchObj.Stop(); long memoryEnd = GC.GetTotalMemory(true); string memoryUsed = (memoryEnd - memoryStart).ToString(); // Display all aligned sequence, performance and memory optimization nos. DisplayTestCaseHeader(lstInputFiles, _watchObj, memoryUsed, "PAMSAM"); Console.WriteLine(string.Format( "PAMSAM SequenceAligner method, Alignment Score is : {0}", msa.AlignmentScore.ToString())); int index = 0; foreach (ISequence seq in msa.AlignedSequences) { Console.WriteLine(string.Format( "PAMSAM Aligned Seq {0}:{1}", index, seq.ToString())); index++; } }
/// <summary> /// Validate the Mummer GetMUMs method for different test cases. /// </summary> /// <param name="nodeName">Name of the XML node to be read.</param> /// <param name="isFilePath">Is Sequence saved in File</param> /// <param name="isAfterLIS">Is Mummer execution after LIS</param> /// <param name="isLIS">Is Mummer execution with LIS option</param> static void ValidateMUMsGeneralTestCases(string nodeName, bool isFilePath, bool isAfterLIS, bool isLIS) { ISequence referenceSeq = null; ISequence querySeq = null; IList <ISequence> querySeqs = null; string referenceSequence = string.Empty; string querySequence = string.Empty; if (isFilePath) { // Gets the reference sequence from the configurtion file string filePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastaParser parser = new FastaParser(); IList <ISequence> referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs[0]; referenceSequence = referenceSeq.ToString(); // Gets the reference sequence from the configurtion file string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); FastaParser queryParser = new FastaParser(); querySeqs = queryParser.Parse(queryFilePath); querySeq = querySeqs[0]; querySequence = querySeq.ToString(); } else { // Gets the reference sequence from the configurtion file referenceSequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence); querySequence = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence); querySeqs = new List <ISequence>(); querySeqs.Add(querySeq); } string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); MUMmer mum = new MUMmer3(); mum.LengthOfMUM = long.Parse(mumLength, null); IDictionary <ISequence, IList <MaxUniqueMatch> > actualResult = null; if (!isLIS) { actualResult = mum.GetMUMs(referenceSeq, querySeqs); } else { actualResult = mum.GetMUMs(referenceSeq, querySeqs, isAfterLIS); } // Validate MUMs output. Assert.IsTrue(ValidateMums(nodeName, actualResult, querySeq)); Console.WriteLine("MUMmer BVT : Successfully validated the Mumms"); ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the Mumms."); }
public void FastaFormatterWithParseValidateFormat() { // Gets the expected sequence from the Xml string filePath = Utility._xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode); Assert.IsTrue(File.Exists(filePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Format(null, "FastA Formatter BVT: File Exists in the Path '{0}'.", filePath)); FastaParser parser = new FastaParser(); FastaFormatter formatter = new FastaFormatter(); // Read the original file IList <ISequence> seqsOriginal = null; parser = new FastaParser(); seqsOriginal = parser.Parse(filePath); Assert.IsNotNull(seqsOriginal); // Use the formatter to write the original sequences to a temp file ApplicationLog.WriteLine(string.Format(null, "FastA Formatter BVT: Creating the Temp file '{0}'.", Constants.FastaTempFileName)); using (TextWriter writer = new StreamWriter(Constants.FastaTempFileName)) { foreach (Sequence s in seqsOriginal) { formatter.Format(s, writer); } } // Read the new file, then compare the sequences IList <ISequence> seqsNew = null; parser = new FastaParser(); seqsNew = parser.Parse(Constants.FastaTempFileName); Assert.IsNotNull(seqsNew); ApplicationLog.WriteLine(string.Format(null, "FastA Formatter BVT: New Sequence is '{0}'.", seqsNew[0].ToString())); // Now compare the sequences. int countOriginal = seqsOriginal.Count(); int countNew = seqsNew.Count(); Assert.AreEqual(countOriginal, countNew); ApplicationLog.WriteLine("The Number of sequences are matching."); int i; for (i = 0; i < countOriginal; i++) { Assert.AreEqual(seqsOriginal[i].ID, seqsNew[i].ID); string orgSeq = seqsOriginal[i].ToString(); string newSeq = seqsNew[i].ToString(); Assert.AreEqual(orgSeq, newSeq); Console.WriteLine(string.Format(null, "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method and is as expected.", seqsNew[i].ID)); ApplicationLog.WriteLine(string.Format(null, "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method.", seqsNew[i].ID)); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); }
/// <summary> /// Validates most of the find matches suffix tree test cases with varying parameters. /// </summary> /// <param name="nodeName">Node name which needs to be read for execution.</param> /// <param name="isFilePath">Is File Path?</param> /// <param name="additionalParam">LIS action type enum</param> static void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath, AdditionalParameters additionalParam) { ISequence referenceSeqs = null; ISequence searchSeqs = null; string[] referenceSequences = null; string[] searchSequences = null; if (isFilePath) { // Gets the reference sequence from the FastA file string filePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastaParser parser = new FastaParser(); IList <ISequence> referenceSeqList = parser.Parse(filePath); referenceSeqs = new SegmentedSequence(referenceSeqList); // Gets the query sequence from the FastA file string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer BVT : Successfully validated the File Path '{0}'.", queryFilePath)); FastaParser queryParser = new FastaParser(); IList <ISequence> querySeqList = queryParser.Parse(queryFilePath); searchSeqs = new SegmentedSequence(querySeqList); } else { // Gets the reference & search sequences from the configurtion file referenceSequences = Utility._xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode); searchSequences = Utility._xmlUtil.GetTextValues(nodeName, Constants.SearchSequencesNode); IAlphabet seqAlphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); List <ISequence> refSeqList = new List <ISequence>(); List <ISequence> searchSeqList = new List <ISequence>(); for (int i = 0; i < referenceSequences.Length; i++) { ISequence referSeq = new Sequence(seqAlphabet, referenceSequences[i]); refSeqList.Add(referSeq); } referenceSeqs = new SegmentedSequence(refSeqList); for (int i = 0; i < searchSequences.Length; i++) { ISequence searchSeq = new Sequence(seqAlphabet, searchSequences[i]); searchSeqList.Add(searchSeq); } searchSeqs = new SegmentedSequence(searchSeqList); } string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); // Builds the suffix for the reference sequence passed. ISuffixTreeBuilder suffixTreeBuilder = new KurtzSuffixTreeBuilder(); SequenceSuffixTree suffixTree = suffixTreeBuilder.BuildSuffixTree(referenceSeqs); IList <MaxUniqueMatch> matches = suffixTreeBuilder.FindMatches(suffixTree, searchSeqs, long.Parse(mumLength, null)); switch (additionalParam) { case AdditionalParameters.FindUniqueMatches: // Validates the Unique Matches. ApplicationLog.WriteLine("NUCmer BVT : Validating the Unique Matches"); Assert.IsTrue(ValidateUniqueMatches(matches, nodeName, additionalParam, isFilePath)); Console.WriteLine( "NUCmer BVT : Successfully validated the all the unique matches for the sequences."); break; case AdditionalParameters.PerformClusterBuilder: // Validates the Unique Matches. ApplicationLog.WriteLine( "NUCmer BVT : Validating the Unique Matches using Cluster Builder"); Assert.IsTrue(ValidateUniqueMatches(matches, nodeName, additionalParam, isFilePath)); Console.WriteLine( "NUCmer BVT : Successfully validated the all the cluster builder matches for the sequences."); break; default: break; } ApplicationLog.WriteLine( "NUCmer BVT : Successfully validated the all the unique matches for the sequences."); }
/// <summary> /// ParseOne General test cases /// </summary> /// <param name="nodeName">Xml node name</param> /// <param name="addParam">Additional parameter</param> static void ParseReaderGeneralTestCases(string nodeName, AdditionalParameters addParam) { // Gets the expected sequence from the Xml string filePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsTrue(File.Exists(filePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: File Exists in the Path '{0}'.", filePath)); ISequence seqsObj = null; FastaParser parserObj = new FastaParser(); switch (addParam) { case AdditionalParameters.ParseOne: using (TextReader reader = new StreamReader(filePath)) { seqsObj = parserObj.ParseOne(reader); } break; case AdditionalParameters.ParseOneReadOnly: using (TextReader reader = new StreamReader(filePath)) { seqsObj = parserObj.ParseOne(reader, true); } break; case AdditionalParameters.ParseReader: using (TextReader reader = new StreamReader(filePath)) { seqsObj = (Sequence)parserObj.Parse(reader)[0]; } break; case AdditionalParameters.ParseReaderReadOnly: using (TextReader reader = new StreamReader(filePath)) { seqsObj = (Sequence)parserObj.Parse(reader, true)[0]; } break; default: break; } Assert.IsNotNull(seqsObj); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: Number of Sequences found are '{0}'.", seqsObj.Count.ToString((IFormatProvider)null))); string expectedSequence = Utility._xmlUtil.GetTextValue( Constants.SimpleFastaDnaNodeName, Constants.ExpectedSequenceNode); Assert.IsNotNull(seqsObj); Assert.AreEqual(expectedSequence, seqsObj.ToString()); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: The FASTA sequence '{0}' validation after Parse() is found to be as expected.", seqsObj.ToString())); // Logs to the NUnit GUI (Console.Out) window Console.WriteLine(string.Format(null, "FastA Parser BVT: The FASTA sequence '{0}' validation after Parse() is found to be as expected.", seqsObj.ToString())); Assert.IsNotNull(seqsObj.Alphabet); Assert.AreEqual( seqsObj.Alphabet.Name.ToLower(CultureInfo.CurrentCulture), Utility._xmlUtil.GetTextValue(Constants.SimpleFastaDnaNodeName, Constants.AlphabetNameNode).ToLower(CultureInfo.CurrentCulture)); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: The Sequence Alphabet is '{0}' and is as expected.", seqsObj.Alphabet.Name)); Assert.AreEqual(Utility._xmlUtil.GetTextValue( Constants.SimpleFastaDnaNodeName, Constants.SequenceIdNode), seqsObj.ID); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: The Sequence ID is '{0}' and is as expected.", seqsObj.ID)); // Logs to the NUnit GUI (Console.Out) window Console.WriteLine(string.Format(null, "FastA Parser BVT: The Sequence ID is '{0}' and is as expected.", seqsObj.ID)); }
/// <summary> /// Validates the NUCmer align method for several test cases for the parameters passed. /// </summary> /// <param name="nodeName">Node name to be read from xml</param> /// <param name="isFilePath">Is Sequence saved in File</param> static void ValidateNUCmerAlignGeneralTestCases(string nodeName, bool isFilePath) { string[] referenceSequences = null; string[] searchSequences = null; IList <ISequence> refSeqList = new List <ISequence>(); IList <ISequence> searchSeqList = new List <ISequence>(); if (isFilePath) { // Gets the reference sequence from the FastA file string filePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastaParser parser = new FastaParser(); refSeqList = parser.Parse(filePath); // Gets the query sequence from the FastA file string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "NUCmer BVT : Successfully validated the File Path '{0}'.", queryFilePath)); FastaParser queryParser = new FastaParser(); searchSeqList = queryParser.Parse(queryFilePath); } else { // Gets the reference & search sequences from the configurtion file referenceSequences = Utility._xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode); searchSequences = Utility._xmlUtil.GetTextValues(nodeName, Constants.SearchSequencesNode); IAlphabet seqAlphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); for (int i = 0; i < referenceSequences.Length; i++) { ISequence referSeq = new Sequence(seqAlphabet, referenceSequences[i]); refSeqList.Add(referSeq); } for (int i = 0; i < searchSequences.Length; i++) { ISequence searchSeq = new Sequence(seqAlphabet, searchSequences[i]); searchSeqList.Add(searchSeq); } } string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); NUCmer nucmerObj = new NUCmer3(); nucmerObj.MaximumSeparation = 0; nucmerObj.MinimumScore = 2; nucmerObj.SeparationFactor = 0.12f; nucmerObj.BreakLength = 2; nucmerObj.LengthOfMUM = long.Parse(mumLength, null); IList <IPairwiseSequenceAlignment> align = nucmerObj.Align(refSeqList, searchSeqList); string expectedSequences = string.Empty; string actualSequences = string.Empty; if (isFilePath) { expectedSequences = Utility._xmlUtil.GetFileTextValue(nodeName, Constants.ExpectedSequencesNode); } else { expectedSequences = Utility._xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequencesNode); } // Gets all the aligned sequences in comma seperated format foreach (IPairwiseSequenceAlignment seqAlignment in align) { foreach (PairwiseAlignedSequence alignedSeq in seqAlignment) { actualSequences = string.Concat(actualSequences, alignedSeq.FirstSequence.ToString(), ","); actualSequences = string.Concat(actualSequences, alignedSeq.SecondSequence.ToString(), ","); } } Assert.AreEqual(expectedSequences, actualSequences.Substring(0, actualSequences.Length - 1)); Console.WriteLine("NUCmer BVT : Successfully validated all the aligned sequences."); ApplicationLog.WriteLine("NUCmer BVT : Successfully validated all the aligned sequences."); }
public void FastaForUniprotDutpase() { int expectedSequenceCount = 2015; string filepath = @"TestUtils\FASTA\uniprot-dutpase.fasta"; Assert.IsTrue(File.Exists(filepath)); List <string> headers = new List <string>(); List <string> sequences = new List <string>(); using (StreamReader reader = File.OpenText(filepath)) { string line = null; StringBuilder s = null; while ((line = reader.ReadLine()) != null) { if (line.StartsWith(">", StringComparison.CurrentCultureIgnoreCase)) { if (s != null) { sequences.Add(s.ToString()); s = null; } headers.Add(line); } else { if (s == null) { s = new StringBuilder(); } s.Append(line); } } if (s != null) { sequences.Add(s.ToString()); s = null; } } Assert.AreEqual(expectedSequenceCount, headers.Count); Assert.AreEqual(expectedSequenceCount, sequences.Count); IList <ISequence> seqs = null; FastaParser parser = new FastaParser(); using (StreamReader reader = File.OpenText(filepath)) { seqs = parser.Parse(reader); } Assert.IsNotNull(seqs); Assert.AreEqual(expectedSequenceCount, seqs.Count); for (int i = 0; i < expectedSequenceCount; i++) { Sequence seq = (Sequence)seqs[i]; Assert.IsNotNull(seq); Assert.AreEqual(sequences[i], seq.ToString()); Assert.AreEqual(sequences[i].Length, seq.EncodedValues.Length); Assert.AreEqual(headers[i].Substring(1), seq.ID); } ((FastaParser)parser).Dispose(); }
public void TestHierarchicalClusteringSerial() { int dimension = 4; IDistanceMatrix distanceMatrix = new SymmetricDistanceMatrix(dimension); for (int i = 0; i < distanceMatrix.Dimension - 1; ++i) { for (int j = i + 1; j < distanceMatrix.Dimension; ++j) { distanceMatrix[i, j] = i + j; distanceMatrix[j, i] = i + j; } } PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions { MaxDegreeOfParallelism = 2 }; IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(distanceMatrix); Assert.AreEqual(7, hierarchicalClustering.Nodes.Count); for (int i = 0; i < dimension * 2 - 1; ++i) { Assert.AreEqual(i, hierarchicalClustering.Nodes[i].ID); } for (int i = dimension; i < hierarchicalClustering.Nodes.Count; ++i) { Console.WriteLine(hierarchicalClustering.Nodes[i].LeftChildren.ID); Console.WriteLine(hierarchicalClustering.Nodes[i].RightChildren.ID); } // Test on sequences ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"); ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); ISequence seqC = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); List <ISequence> sequences = new List <ISequence>(); sequences.Add(seqA); sequences.Add(seqB); sequences.Add(seqC); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG")); int kmerLength = 4; KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA); //Console.WriteLine(kmerDistanceMatrixGenerator.Name); kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences); //Console.WriteLine(kmerDistanceMatrixGenerator.DistanceMatrix); for (int i = 0; i < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension - 1; ++i) { for (int j = i + 1; j < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension; ++j) { Console.WriteLine("{0}-{1}: {2}", i, j, kmerDistanceMatrixGenerator.DistanceMatrix[i, j]); } } hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix); for (int i = 0; i < hierarchicalClustering.Nodes.Count; ++i) { Assert.AreEqual(true, hierarchicalClustering.Nodes[i].NeedReAlignment); } BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering); for (int i = 0; i < tree.Nodes.Count; ++i) { Assert.AreEqual(true, tree.Nodes[i].NeedReAlignment); } SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); //Assert.AreEqual(0, hierarchicalClustering.Nodes[4].LeftChildren.ID); //Assert.AreEqual(1, hierarchicalClustering.Nodes[4].RightChildren.ID); //Assert.AreEqual(2, hierarchicalClustering.Nodes[5].LeftChildren.ID); //Assert.AreEqual(4, hierarchicalClustering.Nodes[5].RightChildren.ID); //Assert.AreEqual(3, hierarchicalClustering.Nodes[6].LeftChildren.ID); //Assert.AreEqual(5, hierarchicalClustering.Nodes[6].RightChildren.ID); // Test on larger dataset ISequenceParser parser = new FastaParser(); string filepath = @"testdata\FASTA\RV11_BBS_all.afa"; IList <ISequence> orgSequences = parser.Parse(filepath); sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA); kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences); hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix); for (int i = sequences.Count; i < hierarchicalClustering.Nodes.Count; ++i) { Console.WriteLine("Node {0}: leftchildren-{1}, rightChildren-{2}", i, hierarchicalClustering.Nodes[i].LeftChildren.ID, hierarchicalClustering.Nodes[i].RightChildren.ID); } }
public void AllEditableScenarios() { string filepathOriginal = @"TestUtils\Fasta\5_sequences.fasta"; Assert.IsTrue(File.Exists(filepathOriginal)); FastaParser fastaParser = null; try { fastaParser = new FastaParser(); IList <ISequence> sequences; string[] expectedSequences = new string[] { "KRIPKSQNLRSIHSIFPFLEDKLSHLN", "LNIPSLITLNKSIYVFSKRKKRLSGFLHN", "HEAGAWGHEEHEAGAWGHEEHEAGAWGHEE", "PAWHEAEPAWHEAEPAWHEAEPAWHEAEPAWHEAE", "CGGUCCCGCGGUCCCGCGGUCCCGCGGUCCCG" }; fastaParser.EnforceDataVirtualization = true; sequences = fastaParser.Parse(filepathOriginal, true); int sequenceCount = sequences.Count; for (int i = 0; i < sequenceCount; i++) { Sequence actualSequence = sequences[i] as Sequence; actualSequence.IsReadOnly = false; ISequenceItem item = actualSequence[1]; actualSequence.Add(item); expectedSequences[i] += item.Symbol; Assert.AreEqual(expectedSequences[i], actualSequence.ToString()); actualSequence.Remove(item); int indexOfItem = expectedSequences[i].IndexOf(item.Symbol); expectedSequences[i] = expectedSequences[i].Remove(indexOfItem, 1); Assert.AreEqual(expectedSequences[i], actualSequence.ToString()); actualSequence.RemoveAt(0); expectedSequences[i] = expectedSequences[i].Remove(0, 1); Assert.AreEqual(expectedSequences[i], actualSequence.ToString()); actualSequence.RemoveRange(2, 5); expectedSequences[i] = expectedSequences[i].Remove(2, 5); Assert.AreEqual(expectedSequences[i], actualSequence.ToString()); actualSequence.Replace(0, 'C'); expectedSequences[i] = expectedSequences[i].Remove(0, 1); expectedSequences[i] = expectedSequences[i].Insert(0, "C"); Assert.AreEqual(expectedSequences[i], actualSequence.ToString()); actualSequence.ReplaceRange(3, "GG"); expectedSequences[i] = expectedSequences[i].Remove(3, 2); expectedSequences[i] = expectedSequences[i].Insert(3, "GG"); Assert.AreEqual(expectedSequences[i], actualSequence.ToString()); actualSequence.Insert(3, item); expectedSequences[i] = expectedSequences[i].Insert(3, item.Symbol.ToString()); Assert.AreEqual(expectedSequences[i], actualSequence.ToString()); actualSequence.InsertRange(2, "CC"); expectedSequences[i] = expectedSequences[i].Insert(2, "CC"); Assert.AreEqual(expectedSequences[i], actualSequence.ToString()); bool actualContainsValue = actualSequence.Contains(actualSequence[3]); bool expectedContainsValue = expectedSequences[i].Contains(actualSequence[3].Symbol.ToString()); Assert.AreEqual(actualContainsValue, expectedContainsValue); } } finally { if (fastaParser != null) { fastaParser.Dispose(); } } }
public void TestNeedlemanWunschProfileAligner() { Console.WriteLine("Number of logical processors: {0}", Environment.ProcessorCount); ISequence templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-"); Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>(); for (int i = 0; i < templateSequence.Count; ++i) { itemSet.Add(templateSequence[i], i); } Profiles.ItemSet = itemSet; SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); int gapOpenPenalty = -3; int gapExtendPenalty = -1; IProfileAligner profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct, gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount); ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"); ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); List <ISequence> sequences = new List <ISequence>(); sequences.Add(seqA); sequences.Add(seqB); IProfileAlignment profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[0]); IProfileAlignment profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[1]); profileAligner.Align(profileAlignmentA, profileAlignmentB); List <int> eStringSubtree = profileAligner.GenerateEString(profileAligner.AlignedA); List <int> eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB); List <ISequence> alignedSequences = new List <ISequence>(); ISequence seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[0]); alignedSequences.Add(seq); seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[1]); alignedSequences.Add(seq); float profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty); Console.WriteLine("alignment score is: {0}", profileScore); Console.WriteLine("the aligned sequences are:"); for (int i = 0; i < alignedSequences.Count; ++i) { Console.WriteLine(alignedSequences[i].ToString()); } ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGAA---AAATCAGATT"); ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---"); // Test on case 3: 36 sequences ISequenceParser parser = new FastaParser(); string filepath = @"TestUtils\FASTA\RV11_BBS_all.afa"; IList <ISequence> orgSequences = parser.Parse(filepath); sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; Console.WriteLine("Original unaligned sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(">"); Console.WriteLine(sequences[i].ToString()); } for (int i = 1; i < numberOfSequences - 1; ++i) { for (int j = i + 1; j < numberOfSequences; ++j) { profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[i]); profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[j]); profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct, gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount); profileAligner.Align(profileAlignmentA, profileAlignmentB); eStringSubtree = profileAligner.GenerateEString(profileAligner.AlignedA); eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB); Console.WriteLine("Sequences lengths are: {0}-{1}", sequences[i].Count, sequences[j].Count); Console.WriteLine("estring 1:"); for (int k = 0; k < eStringSubtree.Count; ++k) { Console.Write("{0}\t", eStringSubtree[k]); } Console.WriteLine("\nestring 2:"); for (int k = 0; k < eStringSubtreeB.Count; ++k) { Console.Write("{0}\t", eStringSubtreeB[k]); } alignedSequences = new List <ISequence>(); seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[i]); alignedSequences.Add(seq); seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[j]); alignedSequences.Add(seq); profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty); Console.WriteLine("\nalignment score is: {0}", profileScore); Console.WriteLine("the aligned sequences are:"); for (int k = 0; k < alignedSequences.Count; ++k) { Console.WriteLine(alignedSequences[k].ToString()); } } ((FastaParser)parser).Dispose(); } }
public void TestMsaBenchMarkLargeDataset() { // Test on DNA benchmark dataset ISequenceParser parser = new FastaParser(); string filepath = @"testdata\FASTA\Protein\Balibase\RV913\BOX032.xml.afa"; IList <ISequence> orgSequences = parser.Parse(filepath); IList <ISequence> sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; String outputFilePath = @"tempBOX032.xml.afa"; StreamWriter writer = new StreamWriter(outputFilePath, true); foreach (ISequence sequence in sequences) { writer.WriteLine(">" + sequence.ID); // write sequence BasicDerivedSequence derivedSeq = new BasicDerivedSequence(sequence, false, false, 0, 0); for (int lineStart = 0; lineStart < sequence.Count; lineStart += 60) { derivedSeq.RangeStart = lineStart; derivedSeq.RangeLength = Math.Min(60, sequence.Count - lineStart); writer.WriteLine(derivedSeq.ToString()); } writer.Flush(); } writer.Close(); sequences.Clear(); sequences = parser.Parse(outputFilePath); Console.WriteLine("Original sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(sequences[i].ToString()); } Console.WriteLine("Benchmark sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(orgSequences[i].ToString()); } PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = true; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 3; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct; PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, MoleculeType.Protein, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty)); Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA); for (int i = 0; i < msa.AlignedSequencesA.Count; ++i) { Console.WriteLine(msa.AlignedSequencesA[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB); for (int i = 0; i < msa.AlignedSequencesB.Count; ++i) { Console.WriteLine(msa.AlignedSequencesB[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC); for (int i = 0; i < msa.AlignedSequencesC.Count; ++i) { Console.WriteLine(msa.AlignedSequencesC[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { Console.WriteLine(msa.AlignedSequences[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences)); if (File.Exists(outputFilePath)) { File.Delete(outputFilePath); } }
public void TestMuscleMultipleSequenceAlignmentRunningTime() { // Test on DNA benchmark dataset ISequenceParser parser = new FastaParser(); string filepath = @"TestUtils\FASTA\RunningTime\BOX246.xml.afa"; MoleculeType mt = MoleculeType.Protein; IList <ISequence> orgSequences = parser.Parse(filepath); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); //filepath = @"TestUtils\FASTA\RunningTime\12_raw.afa"; //List<ISequence> sequences = parser.Parse(filepath); int numberOfSequences = orgSequences.Count; Console.WriteLine("Original sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(sequences[i].ToString()); } Console.WriteLine("Benchmark sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(orgSequences[i].ToString()); } PAMSAMMultipleSequenceAligner.FasterVersion = true; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 2; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast; SimilarityMatrix similarityMatrix = null; switch (mt) { case (MoleculeType.DNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); break; case (MoleculeType.RNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); break; case (MoleculeType.Protein): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); break; default: throw new Exception("Invalid molecular type"); } PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("The number of partitions is: {0}", numberOfPartitions); Console.WriteLine("The number of degrees is: {0}", numberOfDegrees); Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences)); Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty)); Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA); for (int i = 0; i < msa.AlignedSequencesA.Count; ++i) { Console.WriteLine(msa.AlignedSequencesA[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB); for (int i = 0; i < msa.AlignedSequencesB.Count; ++i) { Console.WriteLine(msa.AlignedSequencesB[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC); for (int i = 0; i < msa.AlignedSequencesC.Count; ++i) { Console.WriteLine(msa.AlignedSequencesC[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { Console.WriteLine(msa.AlignedSequences[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences)); ((FastaParser)parser).Dispose(); }
public void TestProgressiveAligner() { MsaUtils.SetProfileItemSets(MoleculeType.DNA); SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); int gapOpenPenalty = -8; int gapExtendPenalty = -1; int kmerLength = 4; PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions { MaxDegreeOfParallelism = 2 }; ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"); ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); ISequence seqC = new Sequence(Alphabets.DNA, "GGGACAAAATCAG"); List <ISequence> sequences = new List <ISequence>(); sequences.Add(seqA); sequences.Add(seqB); sequences.Add(seqC); KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA); kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences); IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix); BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering); IProgressiveAligner progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty); progressiveAligner.Align(sequences, tree); ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT"); ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---"); ISequence expectedSeqC = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---"); Assert.AreEqual(expectedSeqA.ToString(), progressiveAligner.AlignedSequences[0].ToString()); Assert.AreEqual(expectedSeqB.ToString(), progressiveAligner.AlignedSequences[1].ToString()); Assert.AreEqual(expectedSeqC.ToString(), progressiveAligner.AlignedSequences[2].ToString()); sequences = new List <ISequence>(); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG")); kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences); hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix); tree = new BinaryGuideTree(hierarchicalClustering); for (int i = 0; i < tree.NumberOfNodes; ++i) { Console.WriteLine("Node {0} ID: {1}", i, tree.Nodes[i].ID); } for (int i = 0; i < tree.NumberOfEdges; ++i) { Console.WriteLine("Edge {0} ID: {1}, length: {2}", i, tree.Edges[i].ID, tree.Edges[i].Length); } SequenceWeighting sw = new SequenceWeighting(tree); for (int i = 0; i < sw.Weights.Length; ++i) { Console.WriteLine("weights {0} is {1}", i, sw.Weights[i]); } progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty); progressiveAligner.Align(sequences, tree); for (int i = 0; i < progressiveAligner.AlignedSequences.Count; ++i) { Console.WriteLine(progressiveAligner.AlignedSequences[i].ToString()); } MsaUtils.SetProfileItemSets(MoleculeType.Protein); ISequenceParser parser = new FastaParser(); string filepath = @"testdata\FASTA\Protein\BB11001.tfa"; IList <ISequence> orgSequences = parser.Parse(filepath); sequences = MsaUtils.UnAlign(orgSequences); similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); kmerLength = 4; int numberOfSequences = orgSequences.Count; gapOpenPenalty = -13; gapExtendPenalty = -5; kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA); kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences); hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix); tree = new BinaryGuideTree(hierarchicalClustering); for (int i = tree.NumberOfLeaves; i < tree.Nodes.Count; ++i) { Console.WriteLine("Node {0}: leftchildren-{1}, rightChildren-{2}", i, tree.Nodes[i].LeftChildren.ID, tree.Nodes[i].RightChildren.ID); } progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty); progressiveAligner.Align(sequences, tree); for (int i = 0; i < progressiveAligner.AlignedSequences.Count; ++i) { Console.WriteLine(progressiveAligner.AlignedSequences[i].ToString()); } }
public void FastaParserValidateParse() { // Gets the expected sequence from the Xml string filePath = _utilityObj._xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode); Assert.IsTrue(File.Exists(filePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser BVT: File Exists in the Path '{0}'.", filePath)); IList <ISequence> seqsList = null; using (FastaParser parser = new FastaParser()) { seqsList = parser.Parse(filePath); Assert.IsNotNull(seqsList); Assert.AreEqual(1, seqsList.Count); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser BVT: Number of Sequences found are '{0}'.", seqsList.Count.ToString((IFormatProvider)null))); string expectedSequence = _utilityObj._xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); Sequence seq = (Sequence)seqsList[0]; Assert.IsNotNull(seq); Assert.AreEqual(expectedSequence, seq.ToString()); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser BVT: The FASTA sequence '{0}' validation after Parse() is found to be as expected.", seq.ToString())); // Logs to the NUnit GUI (Console.Out) window Console.WriteLine(string.Format((IFormatProvider)null, "FastA Parser BVT: The FASTA sequence '{0}' validation after Parse() is found to be as expected.", seq.ToString())); byte[] tmpEncodedSeq = new byte[seq.Count]; (seq as IList <byte>).CopyTo(tmpEncodedSeq, 0); Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser BVT: The FASTA Length sequence '{0}' is as expected.", expectedSequence.Length)); Assert.IsNotNull(seq.Alphabet); Assert.AreEqual(seq.Alphabet.Name.ToLower(CultureInfo.CurrentCulture), _utilityObj._xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode).ToLower(CultureInfo.CurrentCulture)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser BVT: The Sequence Alphabet is '{0}' and is as expected.", seq.Alphabet.Name)); Assert.AreEqual(_utilityObj._xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.SequenceIdNode), seq.ID); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser BVT: The Sequence ID is '{0}' and is as expected.", seq.ID)); // Logs to the NUnit GUI (Console.Out) window Console.WriteLine(string.Format((IFormatProvider)null, "FastA Parser BVT: The Sequence ID is '{0}' and is as expected.", seq.ID)); } }