public void testBug3() { //Test on DNA benchmark dataset ISequenceParser parser = new FastaParser(); string filepath = @"TestUtils\122_raw.afa"; MoleculeType mt = MoleculeType.DNA; IList <ISequence> orgSequences = parser.Parse(filepath); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 2; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast; SimilarityMatrix similarityMatrix = null; switch (mt) { case (MoleculeType.DNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); break; case (MoleculeType.RNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); break; case (MoleculeType.Protein): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); break; default: throw new InvalidDataException("Invalid molecular type"); } //DateTime startTime = DateTime.Now; PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Assert.IsNotNull(msa.AlignedSequences); ((FastaParser)parser).Dispose(); }
public void testBug() { List <ISequence> sequences = new List <ISequence>(); ISequence seq1 = new Sequence(Alphabets.Protein, "MQEPQSELNIDPPLSQETFSELWNLLPENNVLSSELCPAVDELLLPESVVNWLDEDSDDAPRMPATSAP"); ISequence seq2 = new Sequence(Alphabets.Protein, "PLSQETFSDLWNLLPENNLLSSELSAPVDDLLPYTDVATWLDECPNEAPQMPEPSAPAAPPPATPAPATSWPLSSFVPSQKTYPGNYGFRLGF"); ISequence seq3 = new Sequence(Alphabets.Protein, "MEPSSETGMDPPLSQETFEDLWSLLPDPLQTVTCRLDNLSEFPDYPLAADMSVLQEGLMGNAVPTVTSCAPSTDDYAGKYGLQLDFQQNGTAKS"); ISequence seq4 = new Sequence(Alphabets.Protein, "MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPRVAPAPAAPTPAAPAPAPSWPLS"); ISequence seq5 = new Sequence(Alphabets.Protein, "MEESQAELGVEPPLSQETFSDLWKLLPENNLLSSELSPAVDDLLLSPEDVANWLDERPDEAPQMPEPPAPAAPTPAAPAPATSWPLSSFVPSQK"); ISequence seq6 = new Sequence(Alphabets.Protein, "MTAMEESQSDISLELPLSQETFSGLWKLLPPEDILPSPHCMDDLLLPQDVEEFFEGPSEALRVSGAPAAQDPVTETPGPVAPAPATPWPLSSFVPSQKTYQGNYGFHLGFLQ"); ISequence seq7 = new Sequence(Alphabets.Protein, "FRLGFLHSGTAKSVTWTYSPLLNKLFCQLAKTCPVQLWVSSPPPPNTCVRAMAIYKKSEFVTEVVRRCPHHERCSDSSDGLAPPQHLIRVEGNLRAKYLDDRNTFRHSVV"); sequences.Add(seq1); sequences.Add(seq2); sequences.Add(seq3); sequences.Add(seq4); sequences.Add(seq5); sequences.Add(seq6); sequences.Add(seq7); SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner(sequences, 2, DistanceFunctionTypes.EuclideanDistance, UpdateDistanceMethodsTypes.Average, ProfileAlignerNames.NeedlemanWunschProfileAligner, ProfileScoreFunctionNames.WeightedEuclideanDistance, sm, -8, -1, 2, 16); Assert.IsNotNull(msa.AlignedSequences); }
public void testBug2() { //Test on DNA benchmark dataset string filepath = @"TestUtils\122_raw.afa".TestDir(); FastAParser parser = new FastAParser(); IList <ISequence> orgSequences = parser.Parse(filepath).ToList(); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 2; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast; SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Assert.IsNotNull(msa.AlignedSequences); }
// $TODO: Change the above namespace after PhaseOne changes /// <summary> /// Aligns multiple sequences using a multiple sequence aligner. /// This sample uses PAMSAM with a set of default parameters. /// </summary> /// <param name="sequences">List of sequences to align.</param> /// <returns>List of ISequenceAlignment</returns> static IList <ISequence> DoMultipleSequenceAlignment(List <ISequence> sequences) { // $TODO: Change the signature after PAMSAM PhaseOne is checked in // Initialise objects for constructor // $TODO: Change this after PAMSAM PhaseOne is checked in SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); int gapOpenPenalty = -4; int gapExtendPenalty = -1; int kmerLength = 3; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct; // Call aligner PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount * 2, Environment.ProcessorCount); return(msa.AlignedSequences); }
public void TestMuscleMultipleSequenceAlignment() { SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); int gapOpenPenalty = -4; int gapExtendPenalty = -1; int kmerLength = 3; ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"); ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); ISequence seqC = new Sequence(Alphabets.DNA, "GGGACAAAATCAG"); List <ISequence> sequences = new List <ISequence>(); sequences.Add(seqA); sequences.Add(seqB); sequences.Add(seqC); DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct; PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount * 2, Environment.ProcessorCount); Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA); for (int i = 0; i < msa.AlignedSequencesA.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequencesA[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC); for (int i = 0; i < msa.AlignedSequencesC.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequencesC[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequences[i].Select(a => (char)a).ToArray())); } // Test case 2 Console.WriteLine("Example 2"); sequences = new List <ISequence>(); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG")); msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount * 2, Environment.ProcessorCount); Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA); for (int i = 0; i < msa.AlignedSequencesA.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequencesA[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC); for (int i = 0; i < msa.AlignedSequencesC.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequencesC[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequences[i].Select(a => (char)a).ToArray())); } // Test case e Console.WriteLine("Example 2"); sequences = new List <ISequence>(); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG")); msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount * 2, Environment.ProcessorCount); Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA); for (int i = 0; i < msa.AlignedSequencesA.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequencesA[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC); for (int i = 0; i < msa.AlignedSequencesC.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequencesC[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequences[i].Select(a => (char)a).ToArray())); } }
public void TestMuscleMultipleSequenceAlignmentRunningTime() { string filepath = @"TestUtils\FASTA\RunningTime\BOX246.xml.afa"; // Test on DNA benchmark dataset FastAParser parser = new FastAParser(filepath); IList <ISequence> orgSequences = parser.Parse().ToList(); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); //filepath = @"TestUtils\FASTA\RunningTime\12_raw.afa"; //List<ISequence> sequences = parser.Parse(filepath); int numberOfSequences = orgSequences.Count; Console.WriteLine("Original sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(new string(sequences[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Benchmark sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(new string(orgSequences[i].Select(a => (char)a).ToArray())); } PAMSAMMultipleSequenceAligner.FasterVersion = true; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 2; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast; SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("The number of partitions is: {0}", numberOfPartitions); Console.WriteLine("The number of degrees is: {0}", numberOfDegrees); Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences)); Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty)); Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA); for (int i = 0; i < msa.AlignedSequencesA.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequencesA[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB); for (int i = 0; i < msa.AlignedSequencesB.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequencesB[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC); for (int i = 0; i < msa.AlignedSequencesC.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequencesC[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequences[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences)); ((FastAParser)parser).Dispose(); }
public void TestMsaBenchMarkOnBralibase() { List <float> allQ = new List <float>(); List <float> allTC = new List <float>(); string fileDirectory = @"testData\FASTA\RNA\k10"; DirectoryInfo iD = new DirectoryInfo(fileDirectory); PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; MoleculeType mt = MoleculeType.RNA; SimilarityMatrix similarityMatrix; int gapOpenPenalty = -20; int gapExtendPenalty = -5; int kmerLength = 4; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProductCached; switch (mt) { case (MoleculeType.DNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); break; case (MoleculeType.RNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); break; case (MoleculeType.Protein): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); break; default: throw new Exception("Invalid molecular type"); } foreach (DirectoryInfo fi in iD.GetDirectories()) { foreach (FileInfo fiii in fi.GetFiles()) { String filePath = fiii.FullName; Console.WriteLine(filePath); ISequenceParser parser = new FastaParser(); IList <ISequence> orgSequences = parser.Parse(filePath); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; Console.WriteLine("The number of sequences is: {0}", numberOfSequences); Console.WriteLine("Original unaligned sequences are:"); PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { //Console.WriteLine(msa.AlignedSequences[i].ToString()); } float scoreQ = MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences); float scoreTC = MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences); allQ.Add(scoreQ); allTC.Add(scoreTC); Console.WriteLine("Alignment score Q is: {0}", scoreQ); Console.WriteLine("Alignment score TC is: {0}", scoreTC); if (allQ.Count % 1000 == 0) { Console.WriteLine(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"); Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray())); Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray())); } } } Console.WriteLine("number of datasets is: {0}", allQ.Count); Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray())); Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray())); }
public void TestMsaBenchMarkLargeDataset() { // Test on DNA benchmark dataset ISequenceParser parser = new FastaParser(); string filepath = @"testdata\FASTA\Protein\Balibase\RV913\BOX032.xml.afa"; IList <ISequence> orgSequences = parser.Parse(filepath); IList <ISequence> sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; String outputFilePath = @"tempBOX032.xml.afa"; StreamWriter writer = new StreamWriter(outputFilePath, true); foreach (ISequence sequence in sequences) { writer.WriteLine(">" + sequence.ID); // write sequence BasicDerivedSequence derivedSeq = new BasicDerivedSequence(sequence, false, false, 0, 0); for (int lineStart = 0; lineStart < sequence.Count; lineStart += 60) { derivedSeq.RangeStart = lineStart; derivedSeq.RangeLength = Math.Min(60, sequence.Count - lineStart); writer.WriteLine(derivedSeq.ToString()); } writer.Flush(); } writer.Close(); sequences.Clear(); sequences = parser.Parse(outputFilePath); Console.WriteLine("Original sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(sequences[i].ToString()); } Console.WriteLine("Benchmark sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(orgSequences[i].ToString()); } PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = true; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 3; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct; PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, MoleculeType.Protein, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty)); Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA); for (int i = 0; i < msa.AlignedSequencesA.Count; ++i) { Console.WriteLine(msa.AlignedSequencesA[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB); for (int i = 0; i < msa.AlignedSequencesB.Count; ++i) { Console.WriteLine(msa.AlignedSequencesB[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC); for (int i = 0; i < msa.AlignedSequencesC.Count; ++i) { Console.WriteLine(msa.AlignedSequencesC[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { Console.WriteLine(msa.AlignedSequences[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences)); if (File.Exists(outputFilePath)) { File.Delete(outputFilePath); } }
public void PerformPAMSAMPerf() { Stopwatch _watchObj = new Stopwatch(); // Get input values from XML. string refPath = Utility._xmlUtil.GetTextValue(Constants.PamsamNode, Constants.RefFilePathNode); string queryPath = Utility._xmlUtil.GetTextValue(Constants.PamsamNode, Constants.QueryFilePathNode); // Create a List for input files. List <string> lstInputFiles = new List <string>(); lstInputFiles.Add(refPath); lstInputFiles.Add(queryPath); // Parse a Reference and query sequence file. ISequenceParser parser = new FastaParser(); IList <ISequence> refsequences = parser.Parse(queryPath); IList <ISequence> orgSequences = parser.Parse(refPath); // Execute UnAlign method to verify that it does not contains gap List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); // Set static properties PAMSAMMultipleSequenceAligner.FasterVersion = true; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; // Set Alignment parameters. int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 2; int numberOfDegrees = 2; int numberOfPartitions = 4; // Profile Distance function name DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; // Set Hierarchical clustering. UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; // Set NeedlemanWunschProfileAligner ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.InnerProduct; // Create similarity matrix instance. SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); // Reset stop watch and start timer. _watchObj.Reset(); _watchObj.Start(); long memoryStart = GC.GetTotalMemory(true); // Parallel Option will only get set if the PAMSAMMultipleSequenceAligner is getting called // To test separately distance matrix, binary tree etc.. // Set the parallel option using below ctor. msa = new PAMSAMMultipleSequenceAligner (sequences, MoleculeType.DNA, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); // Stop watchclock. _watchObj.Stop(); long memoryEnd = GC.GetTotalMemory(true); string memoryUsed = (memoryEnd - memoryStart).ToString(); // Display all aligned sequence, performance and memory optimization nos. DisplayTestCaseHeader(lstInputFiles, _watchObj, memoryUsed, "PAMSAM"); Console.WriteLine(string.Format( "PAMSAM SequenceAligner method, Alignment Score is : {0}", msa.AlignmentScore.ToString())); int index = 0; foreach (ISequence seq in msa.AlignedSequences) { Console.WriteLine(string.Format( "PAMSAM Aligned Seq {0}:{1}", index, seq.ToString())); index++; } }
public void TestMsaBenchMark() { string fileDirectory = @"TestUtils\FASTA\Protein\Balibase\RV911\"; DirectoryInfo iD = new DirectoryInfo(fileDirectory); PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = true; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; SimilarityMatrix similarityMatrix; int gapOpenPenalty = -20; int gapExtendPenalty = -5; int kmerLength = 4; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProductCached; similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); List <float> allQ = new List <float>(); List <float> allTC = new List <float>(); foreach (FileInfo fi in iD.GetFiles()) { String filePath = fi.FullName; Console.WriteLine(filePath); FastAParser parser = new FastAParser(filePath); parser.Alphabet = AmbiguousProteinAlphabet.Instance; IList <ISequence> orgSequences = parser.Parse().ToList(); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; Console.WriteLine("The number of sequences is: {0}", numberOfSequences); Console.WriteLine("Original unaligned sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { //Console.WriteLine(sequences[i].ToString()); } Console.WriteLine("Original aligned sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { //Console.WriteLine(orgSequences[i].ToString()); } PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA); for (int i = 0; i < msa.AlignedSequencesA.Count; ++i) { //Console.WriteLine(msa.AlignedSequencesA[i].ToString()); } Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB); for (int i = 0; i < msa.AlignedSequencesB.Count; ++i) { //Console.WriteLine(msa.AlignedSequencesB[i].ToString()); } Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC); for (int i = 0; i < msa.AlignedSequencesC.Count; ++i) { //Console.WriteLine(msa.AlignedSequencesC[i].ToString()); } Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { //Console.WriteLine(msa.AlignedSequences[i].ToString()); } float scoreQ = MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences); float scoreTC = MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences); allQ.Add(scoreQ); allTC.Add(scoreTC); Console.WriteLine("Alignment score Q is: {0}", scoreQ); Console.WriteLine("Alignment score TC is: {0}", scoreTC); ((FastAParser)parser).Dispose(); } Console.WriteLine("Number of datasets is: {0}", allQ.Count); Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray())); Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray())); }
public void TestMsaBenchMarkLargeDataset() { string filepath = @"\TestUtils\BOX032Small.xml.afa"; string filePathObj = Directory.GetCurrentDirectory() + filepath; // Test on DNA benchmark dataset FastAParser parser = new FastAParser(filePathObj); IList <ISequence> orgSequences = parser.Parse().ToList(); IList <ISequence> sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; String outputFilePath = @"tempBOX032.xml.afa"; using (StreamWriter writer = new StreamWriter(outputFilePath, true)) { foreach (ISequence sequence in sequences) { writer.WriteLine(">" + sequence.ID); // write sequence for (int lineStart = 0; lineStart < sequence.Count; lineStart += 60) { writer.WriteLine(new String(sequence.Skip(lineStart).Take((int)Math.Min(60, sequence.Count - lineStart)).Select(a => (char)a).ToArray())); } writer.Flush(); } } sequences.Clear(); parser = new FastAParser(outputFilePath); sequences = parser.Parse().ToList(); Console.WriteLine("Original sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(new string(sequences[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Benchmark sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(new string(orgSequences[i].Select(a => (char)a).ToArray())); } PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = true; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 3; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct; PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty)); Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA); for (int i = 0; i < msa.AlignedSequencesA.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequencesA[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB); for (int i = 0; i < msa.AlignedSequencesB.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequencesB[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC); for (int i = 0; i < msa.AlignedSequencesC.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequencesC[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { Console.WriteLine(new string(msa.AlignedSequences[i].Select(a => (char)a).ToArray())); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences)); ((FastAParser)parser).Dispose(); if (File.Exists(outputFilePath)) { File.Delete(outputFilePath); } }
public void TestName() { PAMSAMMultipleSequenceAligner aligner = new PAMSAMMultipleSequenceAligner(); Assert.AreEqual("PAMSAM (MUSCLE)", aligner.Name); }
public void TestMuscleMultipleSequenceAlignmentRunningTime() { // Test on DNA benchmark dataset ISequenceParser parser = new FastaParser(); //string filepath = @"testdata\FASTA\RunningTime\122.afa"; string filepath = @"testdata\FASTA\RunningTime\BOX246.xml.afa"; MoleculeType mt = MoleculeType.Protein; IList <ISequence> orgSequences = parser.Parse(filepath); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); //filepath = @"testdata\FASTA\RunningTime\12_raw.afa"; //List<ISequence> sequences = parser.Parse(filepath); int numberOfSequences = orgSequences.Count; Console.WriteLine("Original sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(sequences[i].ToString()); } Console.WriteLine("Benchmark sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(orgSequences[i].ToString()); } PAMSAMMultipleSequenceAligner.FasterVersion = true; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 2; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast; SimilarityMatrix similarityMatrix = null; switch (mt) { case (MoleculeType.DNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); break; case (MoleculeType.RNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); break; case (MoleculeType.Protein): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); break; default: throw new Exception("Invalid molecular type"); } PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("The number of partitions is: {0}", numberOfPartitions); Console.WriteLine("The number of degrees is: {0}", numberOfDegrees); Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences)); Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty)); Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA); for (int i = 0; i < msa.AlignedSequencesA.Count; ++i) { Console.WriteLine(msa.AlignedSequencesA[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB); for (int i = 0; i < msa.AlignedSequencesB.Count; ++i) { Console.WriteLine(msa.AlignedSequencesB[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC); for (int i = 0; i < msa.AlignedSequencesC.Count; ++i) { Console.WriteLine(msa.AlignedSequencesC[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { Console.WriteLine(msa.AlignedSequences[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences)); }
public void TestMsaBenchMarkOnBralibase() { var allQ = new List <float>(); var allTC = new List <float>(); string fileDirectory = @"TestUtils\Fasta\RNA\k10".TestDir(); DirectoryInfo iD = new DirectoryInfo(fileDirectory); PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; var similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);; int gapOpenPenalty = -20; int gapExtendPenalty = -5; int kmerLength = 4; int numberOfDegrees = 2; int numberOfPartitions = 16; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProductCached; foreach (DirectoryInfo fi in iD.GetDirectories()) { foreach (FileInfo fiii in fi.GetFiles()) { String filePath = fiii.FullName; Console.WriteLine($"Loading: {filePath}"); var orgSequences = new FastAParser() { Alphabet = AmbiguousRnaAlphabet.Instance }.Parse(filePath).ToList(); var sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; Console.WriteLine("The number of sequences is: {0}", numberOfSequences); PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); float scoreQ = MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences); float scoreTC = MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences); Console.WriteLine("Alignment score Q is: {0}", scoreQ); Console.WriteLine("Alignment score TC is: {0}", scoreTC); allQ.Add(scoreQ); allTC.Add(scoreTC); if (allQ.Count % 1000 == 0) { Console.WriteLine(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"); Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray())); Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray())); } } } Console.WriteLine("number of datasets is: {0}", allQ.Count); Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray())); Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray())); }
public void TestMsaBenchMarkOnSABmark() { List <float> allQ = new List <float>(); List <float> allTC = new List <float>(); string fileDirectory = @"TestUtils\Fasta\Protein\SABmark".TestDir(); DirectoryInfo iD = new DirectoryInfo(fileDirectory); PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = true; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; SimilarityMatrix similarityMatrix; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 3; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct; similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); foreach (DirectoryInfo fi in iD.GetDirectories()) { foreach (DirectoryInfo fii in fi.GetDirectories()) { foreach (FileInfo fiii in fii.GetFiles()) { String filePath = fiii.FullName; Console.WriteLine(filePath); FastAParser parser = new FastAParser(); IList <ISequence> orgSequences = parser.Parse(filePath).ToList(); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; Console.WriteLine("The number of sequences is: {0}", numberOfSequences); Console.WriteLine("Original unaligned sequences are:"); PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { //Console.WriteLine(msa.AlignedSequences[i].ToString()); } float scoreQ = MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences); float scoreTC = MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences); allQ.Add(scoreQ); allTC.Add(scoreTC); Console.WriteLine("Alignment score Q is: {0}", scoreQ); Console.WriteLine("Alignment score TC is: {0}", scoreTC); if (allQ.Count % 1000 == 0) { Console.WriteLine(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"); Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray())); Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray())); } } } } Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray())); Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray())); }
public void TestMsaBenchMarkLargeDataset() { // Test on DNA benchmark dataset string filePathObj = @"TestUtils\BOX032Small.xml.afa".TestDir(); var orgSequences = new FastAParser().Parse(filePathObj).ToList(); var sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; Assert.AreEqual(numberOfSequences, sequences.Count); string outputFilePath = Path.GetTempFileName(); try { using (StreamWriter writer = new StreamWriter(outputFilePath, true)) { foreach (ISequence sequence in sequences) { // write sequence writer.WriteLine(">" + sequence.ID); for (int lineStart = 0; lineStart < sequence.Count; lineStart += 60) { writer.WriteLine(new String(sequence.Skip(lineStart).Take((int)Math.Min(60, sequence.Count - lineStart)).Select(a => (char)a).ToArray())); } writer.Flush(); } } sequences = new FastAParser().Parse(outputFilePath).ToList(); } finally { File.Delete(outputFilePath); } Console.WriteLine("Original sequences are:"); sequences.ForEach(Console.WriteLine); Console.WriteLine("Benchmark sequences are:"); orgSequences.ForEach(Console.WriteLine); // Begin alignment PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = true; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 3; int numberOfDegrees = 2; int numberOfPartitions = 16; SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct; PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty)); Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA); for (int i = 0; i < msa.AlignedSequencesA.Count; ++i) { Console.WriteLine(msa.AlignedSequencesA[i]); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB); for (int i = 0; i < msa.AlignedSequencesB.Count; ++i) { Console.WriteLine(msa.AlignedSequencesB[i]); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC); for (int i = 0; i < msa.AlignedSequencesC.Count; ++i) { Console.WriteLine(msa.AlignedSequencesC[i]); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { Console.WriteLine(msa.AlignedSequences[i]); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences)); }