/// <summary> /// Constructor for SmithWatermanProfileAligner Aligner. /// Sets default similarity matrix, gap penalties, and profile function name. /// Users will typically reset these using parameters specific to their particular sequences and needs. /// </summary> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="profileScoreFunctionName">enum: profileScoreFunctionName</param> /// <param name="gapOpenPenalty">negative integer</param> /// <param name="gapExtensionPenalty">negative integer</param> /// <param name="numberOfPartitions">positive integer</param> public SmithWatermanProfileAlignerParallel(SimilarityMatrix similarityMatrix, ProfileScoreFunctionNames profileScoreFunctionName, int gapOpenPenalty, int gapExtensionPenalty, int numberOfPartitions) : base(similarityMatrix, profileScoreFunctionName, gapOpenPenalty, gapExtensionPenalty, numberOfPartitions) { }
/// <summary> /// Constructor for NeedlemanWunschProfile Aligner. /// Sets default similarity matrix, gap penalties, and profile function name. /// Users will typically reset these using parameters specific to their particular sequences and needs. /// </summary> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="profileScoreFunctionName">enum: profileScoreFunctionName</param> /// <param name="gapOpenPenalty">negative integer</param> /// <param name="gapExtensionPenalty">negative integer</param> /// <param name="numberOfPartitions">positive integer</param> public NeedlemanWunschProfileAlignerSerial(SimilarityMatrix similarityMatrix, ProfileScoreFunctionNames profileScoreFunctionName, int gapOpenPenalty, int gapExtensionPenalty, int numberOfPartitions) : base(similarityMatrix, profileScoreFunctionName, gapOpenPenalty, gapExtensionPenalty, numberOfPartitions) { }
public static float PairWiseScoreFunction(ISequence sequenceA, ISequence sequenceB, SimilarityMatrix similarityMatrix, int gapOpenPenalty, int gapExtensionPenalty) { if (sequenceA.Count != sequenceB.Count) { throw new Exception("Unaligned sequences"); } float result = 0; bool isGapA = false; bool isGapB = false; for (int i = 0; i < sequenceA.Count; ++i) { if (sequenceA.Alphabet.CheckIsGap(sequenceA[i]) && sequenceB.Alphabet.CheckIsGap(sequenceB[i])) { continue; } if (sequenceA.Alphabet.CheckIsGap(sequenceA[i]) && !sequenceB.Alphabet.CheckIsGap(sequenceB[i])) { if (isGapB) { isGapB = false; } if (isGapA) { result += gapExtensionPenalty; } else { result += gapOpenPenalty; isGapA = true; } continue; } if (!sequenceA.Alphabet.CheckIsGap(sequenceA[i]) && sequenceB.Alphabet.CheckIsGap(sequenceB[i])) { if (isGapA) { isGapA = false; } if (isGapB) { result += gapExtensionPenalty; } else { result += gapOpenPenalty; isGapB = true; } continue; } result += similarityMatrix[sequenceA[i], sequenceB[i]]; } return result; }
/// <summary> /// Initializes a new instance of the DynamicProgrammingPairwiseAligner class. /// Constructor for all the pairwise aligner (NeedlemanWunsch, SmithWaterman, Overlap). /// Sets default similarity matrix and gap penalties. /// Users will typically reset these using parameters specific to their particular sequences and needs. /// </summary> protected DynamicProgrammingPairwiseAligner() { // Set default similarity matrix and gap penalty. // User will typically choose their own parameters, these defaults are reasonable for many cases. // Molecule type is set to protein, since this will also work for DNA and RNA in the // special case of a diagonal similarity matrix. this.InternalSimilarityMatrix = new DiagonalSimilarityMatrix(2, -2); GapOpenCost = -8; GapExtensionCost = -1; }
public void PairwiseOverlapProteinSeqAffineGap() { string sequenceString1 = "HEAGAWGHEE"; string sequenceString2 = "PAWHEAE"; Sequence sequence1 = new Sequence(Alphabets.Protein, sequenceString1); Sequence sequence2 = new Sequence(Alphabets.Protein, sequenceString2); SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); int gapPenalty = -8; PairwiseOverlapAligner overlap = new PairwiseOverlapAligner(); overlap.SimilarityMatrix = sm; overlap.GapOpenCost = gapPenalty; overlap.GapExtensionCost = -1; IList<IPairwiseSequenceAlignment> result = overlap.Align(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Affine; Matrix {1}; GapOpenCost {2}; GapExtenstionCost {3}", overlap.Name, overlap.SimilarityMatrix.Name, overlap.GapOpenCost, overlap.GapExtensionCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "GAWGHEE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "PAW-HEA"); alignedSeq.Consensus = new Sequence(Alphabets.AmbiguousProtein, "XAWGHEX"); alignedSeq.Score = 25; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// /// </summary> public NucmerPairwiseAligner() { // Set the default Similarity Matrix SimilarityMatrix = new SimilarityMatrix( SimilarityMatrix.StandardSimilarityMatrix.DiagonalScoreMatrix); // Set the defaults GapOpenCost = DefaultGapOpenCost; GapExtensionCost = DefaultGapExtensionCost; LengthOfMUM = DefaultLengthOfMUM; // Set the ClusterBuilder properties to defaults FixedSeparation = ClusterBuilder.DefaultFixedSeparation; MaximumSeparation = ClusterBuilder.DefaultMaximumSeparation; MinimumScore = ClusterBuilder.DefaultMinimumScore; SeparationFactor = ClusterBuilder.DefaultSeparationFactor; BreakLength = ModifiedSmithWaterman.DefaultBreakLength; }
/// <summary> /// Aligns multiple sequences using a multiple sequence aligner. /// This sample uses PAMSAM with a set of default parameters. /// </summary> /// <param name="sequences">List of sequences to align.</param> /// <returns>List of ISequenceAlignment</returns> public static IList<ISequence> DoMultipleSequenceAlignment(List<ISequence> sequences) { // Initialize objects for constructor SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); int gapOpenPenalty = -4; int gapExtendPenalty = -1; int kmerLength = 3; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct; // Call aligner PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount * 2, Environment.ProcessorCount); return msa.AlignedSequences; }
/// <summary> /// Construct a progressive aligner /// </summary> /// <param name="profileAlignerName">ProfileAlignerNames member</param> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="gapOpenPenalty">negative gapOpenPenalty</param> /// <param name="gapExtendPenalty">negative gapExtendPenalty</param> public ProgressiveAligner(ProfileAlignerNames profileAlignerName, SimilarityMatrix similarityMatrix, int gapOpenPenalty, int gapExtendPenalty) { // Get ProfileAligner ready switch (profileAlignerName) { case (ProfileAlignerNames.NeedlemanWunschProfileAligner): _profileAligner = new NeedlemanWunschProfileAlignerSerial(); break; case (ProfileAlignerNames.SmithWatermanProfileAligner): _profileAligner = new SmithWatermanProfileAlignerSerial(); break; default: throw new Exception("Invalid profile aligner name"); } _profileAligner.SimilarityMatrix = similarityMatrix; _profileAligner.GapOpenCost = gapOpenPenalty; _profileAligner.GapExtensionCost = gapExtendPenalty; _alignedSequences = new List<ISequence>(); }
/// <summary> /// Symmetrized entropy of observation vectors /// </summary> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="profileIndexA">the first profile vector (normalized)</param> /// <param name="profileIndexB">the second profile vector (normalized)</param> protected float SymmetrizedEntropy(SimilarityMatrix similarityMatrix, int profileIndexA, int profileIndexB) { return MsaUtils.SymmetrizedEntropy(_profileAlignmentA.ProfilesMatrix[profileIndexA], _profileAlignmentB.ProfilesMatrix[profileIndexB]); }
/// <summary> /// Validate the inputs. /// </summary> /// <param name="referenceSequence">Reference sequence.</param> /// <param name="querySequenceList">List of input sequences.</param> /// <returns>Are inputs valid.</returns> private bool Validate( ISequence referenceSequence, IEnumerable<ISequence> querySequenceList) { bool isValidLength = false; if (null == referenceSequence) { string message = Properties.Resource.ReferenceSequenceCannotBeNull; Debug.WriteLine(message); throw new ArgumentNullException("referenceSequence"); } if (null == querySequenceList) { string message = Properties.Resource.QueryListCannotBeNull; Debug.WriteLine(message); throw new ArgumentNullException("querySequenceList"); } if ((referenceSequence.Alphabet != Alphabets.DNA) && (referenceSequence.Alphabet != Alphabets.RNA)) { string message = string.Format( CultureInfo.CurrentCulture, Properties.Resource.OnlyDNAOrRNAInput, "MUMmer"); Debug.WriteLine(message); throw new ArgumentException(message, "referenceSequence"); } // setting default similarity matrix based on DNA or RNA if (SimilarityMatrix == null) { if (referenceSequence.Alphabet == Alphabets.RNA) { SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); } else if (referenceSequence.Alphabet == Alphabets.DNA) { SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); } } if (!SimilarityMatrix.ValidateSequence(referenceSequence)) { string message = Properties.Resource.FirstInputSequenceMismatchSimilarityMatrix; Debug.WriteLine(message); throw new ArgumentException(message, "referenceSequence"); } if (referenceSequence.Count < this.LengthOfMUM) { string message = String.Format( CultureInfo.CurrentCulture, Properties.Resource.InputSequenceMustBeGreaterThanMUM, this.LengthOfMUM); Debug.WriteLine(message); throw new ArgumentException(message, "referenceSequence"); } foreach (ISequence querySequence in querySequenceList) { if (null == querySequence) { string message = Properties.Resource.QuerySequenceCannotBeNull; Debug.WriteLine(message); throw new ArgumentNullException("querySequenceList", message); } if (referenceSequence.Alphabet != querySequence.Alphabet) { string message = Properties.Resource.InputAlphabetsMismatch; Debug.WriteLine(message); throw new ArgumentException(message); } if (!SimilarityMatrix.ValidateSequence(querySequence)) { string message = Properties.Resource.SecondInputSequenceMismatchSimilarityMatrix; Debug.WriteLine(message); throw new ArgumentException(message, "querySequenceList"); } if (querySequence.Count >= this.LengthOfMUM) { isValidLength = true; } } if (!isValidLength) { string message = String.Format( CultureInfo.CurrentCulture, Properties.Resource.InputSequenceMustBeGreaterThanMUM, this.LengthOfMUM); Debug.WriteLine(message); throw new ArgumentException(message, "querySequenceList"); } if (1 > this.LengthOfMUM) { string message = Properties.Resource.MUMLengthTooSmall; Debug.WriteLine(message); throw new ArgumentException(message); } return true; }
/// <summary> /// Validates PairwiseOverlapAlignment algorithm for the parameters passed. /// </summary> /// <param name="nodeName">Xml node name</param> /// <param name="isTextFile">Is text file an input.</param> /// <param name="caseType">Case Type</param> /// <param name="additionalParameter">parameter based on which certain validations are done.</param> /// <param name="alignType">Is the Align type Simple or Align with Gap Extension cost?</param> /// <param name="similarityMatrixParam">Similarity Matrix</param> private void ValidatePairwiseOverlapAlignment(string nodeName, bool isTextFile, SequenceCaseType caseType, AlignParameters additionalParameter, AlignmentType alignType, SimilarityMatrixParameters similarityMatrixParam) { Sequence aInput = null; Sequence bInput = null; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); var parser1 = new FastAParser(); ISequence originalSequence1 = parser1.Parse(filePath1).ElementAt(0); ISequence originalSequence2 = parser1.Parse(filePath2).ElementAt(0); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(new string(originalSequence1.Select(a => (char) a).ToArray()), new string(originalSequence2.Select(a => (char) a).ToArray()), alphabet, caseType, out aInput, out bInput); } else { string originalSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string originalSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType( originalSequence1, originalSequence2, alphabet, caseType, out aInput, out bInput); } var aInputString = new string(aInput.Select(a => (char) a).ToArray()); var bInputString = new string(bInput.Select(a => (char) a).ToArray()); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : First sequence used is '{0}'.", aInputString)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Second sequence used is '{0}'.", bInputString)); // Create similarity matrix object for a given file. string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm = null; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create PairwiseOverlapAligner instance and set its values. var pairwiseOverlapObj = new PairwiseOverlapAligner(); if (additionalParameter != AlignParameters.AllParam) { pairwiseOverlapObj.SimilarityMatrix = sm; pairwiseOverlapObj.GapOpenCost = gapOpenCost; pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; } IList<IPairwiseSequenceAlignment> result = null; // Align the input sequences. switch (additionalParameter) { case AlignParameters.AlignList: var sequences = new List<ISequence>(); sequences.Add(aInput); sequences.Add(bInput); switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sequences); break; default: result = pairwiseOverlapObj.AlignSimple(sequences); break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(aInput, bInput); break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } aInput = null; bInput = null; sm = null; // Get the expected sequence and scorde from xml config. string expectedSequence1 = string.Empty; string expectedSequence2 = string.Empty; string expectedScore = string.Empty; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); string[] expectedSequences1, expectedSequences2; var seperators = new char[1] {';'}; expectedSequences1 = expectedSequence1.Split(seperators); expectedSequences2 = expectedSequence2.Split(seperators); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq; for (int i = 0; i < expectedSequences1.Length; i++) { alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequences1[i]), SecondSequence = new Sequence(alphabet, expectedSequences2[i]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); } expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput,true)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Aligned Second Sequence is '{0}'.", expectedSequence2)); }
/// <summary> /// Pairwise alignment of two sequences using an affine gap penalty. The various algorithms in derived classes (NeedlemanWunsch, /// SmithWaterman, and PairwiseOverlap) all use this general engine for alignment with an affine gap penalty. /// </summary> /// <param name="localSimilarityMatrix">Scoring matrix.</param> /// <param name="gapOpenPenalty">Gap open penalty (by convention, use a negative number for this.).</param> /// <param name="gapExtensionPenalty">Gap extension penalty (by convention, use a negative number for this.).</param> /// <param name="inputA">First input sequence.</param> /// <param name="inputB">Second input sequence.</param> /// <returns>A list of sequence alignments.</returns> public IList<IPairwiseSequenceAlignment> Align( SimilarityMatrix localSimilarityMatrix, int gapOpenPenalty, int gapExtensionPenalty, ISequence inputA, ISequence inputB) { this.SimilarityMatrix = localSimilarityMatrix; this.GapOpenCost = gapOpenPenalty; this.GapExtensionCost = gapExtensionPenalty; return DoAlign(inputA, inputB, true); }
/// <summary> /// Inializes a new alignment job /// </summary> /// <param name="similarityMatrix"></param> /// <param name="gapOpenCost"></param> /// <param name="aInput"></param> /// <param name="bInput"></param> public PairwiseOverlapSimpleAlignmentJob(SimilarityMatrix similarityMatrix, int gapOpenCost, ISequence aInput, ISequence bInput) : base(similarityMatrix, gapOpenCost, 0, aInput, bInput) { }
private void InValidateSmithWatermanAlignmentWithInvalidSequence(string nodeName, bool isTextFile, InvalidSequenceType invalidSequenceType, AlignParameters additionalParameter, AlignmentType alignType, InvalidSequenceType sequenceType) { IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); Exception actualException = null; Sequence aInput = null; Sequence bInput = null; if (isTextFile) { // Read the xml file for getting both the files for aligning. string filepath = this.GetInputFileNameWithInvalidType(nodeName, invalidSequenceType); // Create input sequence for sequence string in different cases. try { // Parse the files and get the sequence. IEnumerable<ISequence> seqs = null; var parser = new FastAParser(); seqs = parser.Parse(filepath); aInput = new Sequence(alphabet, new string(seqs.ElementAt(0).Select(a => (char) a).ToArray())); } catch (Exception ex) { actualException = ex; } } else { string originalSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.InvalidSequence1); // Create input sequence for sequence string in different cases. try { aInput = new Sequence(alphabet, originalSequence); } catch (ArgumentException ex) { actualException = ex; } } if (actualException == null) { bInput = aInput; // Create similarity matrix object for a given file. string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); var sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create SmithWatermanAligner instance and set its values. var smithWatermanObj = new SmithWatermanAligner(); if (additionalParameter != AlignParameters.AllParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; smithWatermanObj.GapExtensionCost = gapExtensionCost; } // Align the input sequences and catch the exception. switch (additionalParameter) { case AlignParameters.AlignList: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(new List<ISequence> {aInput, bInput}); } catch (ArgumentException ex) { actualException = ex; } break; default: try { smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput}); } catch (ArgumentException ex) { actualException = ex; } break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(aInput, bInput); } catch (ArgumentException ex) { actualException = ex; } break; default: try { smithWatermanObj.AlignSimple(aInput, bInput); } catch (ArgumentException ex) { actualException = ex; } break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); } catch (ArgumentException ex) { actualException = ex; } break; default: try { smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); } catch (ArgumentException ex) { actualException = ex; } break; } break; default: break; } } // Validate Error messages for Invalid Sequence types. string expectedErrorMessage = this.GetExpectedErrorMeesageWithInvalidSequenceType(nodeName, sequenceType); Assert.AreEqual(expectedErrorMessage, actualException.Message); ApplicationLog.WriteLine(string.Concat( "SmithWatermanAligner P2 : Expected Error message is thrown ", expectedErrorMessage)); }
/// <summary> /// Validates PairwiseOverlapAlignment algorithm for the parameters passed. /// </summary> /// <param name="nodeName">Node Name in the xml.</param> /// <param name="alignParam">parameter based on which certain validations are done.</param> /// <param name="similarityMatrixParam">Similarity Matrix Parameter.</param> /// <param name="alignType">Alignment Type</param> private void ValidatePairwiseOverlapAlignment(string nodeName, AlignParameters alignParam, SimilarityMatrixParameters similarityMatrixParam, AlignmentType alignType) { ISequence aInput; ISequence bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (alignParam.ToString().Contains("Code")) { string sequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, sequence1); bInput = new Sequence(alphabet, sequence2); } else { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); var parser1 = new FastAParser { Alphabet = alphabet }; aInput = parser1.Parse(filePath1).ElementAt(0); bInput = parser1.Parse(filePath2).ElementAt(0); } string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); var pairwiseOverlapObj = new PairwiseOverlapAligner(); if (AlignParameters.AllParam != alignParam) { pairwiseOverlapObj.SimilarityMatrix = sm; pairwiseOverlapObj.GapOpenCost = gapOpenCost; } IList<IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignParameters.AlignList: case AlignParameters.AlignListCode: var sequences = new List<ISequence> {aInput, bInput}; switch (alignType) { case AlignmentType.Align: pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; result = pairwiseOverlapObj.Align(sequences); break; default: result = pairwiseOverlapObj.AlignSimple(sequences); break; } break; case AlignParameters.AllParam: case AlignParameters.AllParamCode: switch (alignType) { case AlignmentType.Align: pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; case AlignParameters.AlignTwo: case AlignParameters.AlignTwoCode: switch (alignType) { case AlignmentType.Align: pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; result = pairwiseOverlapObj.Align(aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(aInput, bInput); break; } break; default: break; } // Read the xml file for getting both the files for aligning. string expectedSequence1; string expectedSequence2; string expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); var seperators = new [] {';'}; string[] expectedSequences1 = expectedSequence1.Split(seperators); string[] expectedSequences2 = expectedSequence2.Split(seperators); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); for (int i = 0; i < expectedSequences1.Length; i++) { PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequences1[i]), SecondSequence = new Sequence(alphabet, expectedSequences2[i]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); } expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput, true)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Aligned Second Sequence is '{0}'.", expectedSequence2)); }
/// <summary> /// Construct an aligner and run the alignment. /// </summary> /// <param name="sequences">input sequences</param> /// <param name="kmerLength">positive integer of kmer length</param> /// <param name="distanceFunctionName">enum: distance function name</param> /// <param name="hierarchicalClusteringMethodName">enum: cluster update method</param> /// <param name="profileAlignerMethodName">enum: profile-profile aligner name</param> /// <param name="profileFunctionName">enum: profile-profile distance function</param> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="gapOpenPenalty">negative gapOpenPenalty</param> /// <param name="gapExtendPenalty">negative gapExtendPenalty</param> /// <param name="numberOfPartitions">the number of partitions in dynamic programming</param> /// <param name="degreeOfParallelism">degree of parallelism option for parallel extension</param> public PAMSAMMultipleSequenceAligner( IList<ISequence> sequences, int kmerLength, DistanceFunctionTypes distanceFunctionName, UpdateDistanceMethodsTypes hierarchicalClusteringMethodName, ProfileAlignerNames profileAlignerMethodName, ProfileScoreFunctionNames profileFunctionName, SimilarityMatrix similarityMatrix, int gapOpenPenalty, int gapExtendPenalty, int numberOfPartitions, int degreeOfParallelism) { AlignmentScoreC = float.MinValue; AlignmentScoreB = float.MinValue; AlignmentScoreA = float.MinValue; AlignmentScore = float.MinValue; StartLog(); if (null == sequences) { throw new ArgumentNullException("sequences"); } if (sequences.Count == 0) { throw new ArgumentException("Empty input sequences"); } // Set parallel extension option if (degreeOfParallelism <= 0) { throw new ArgumentException("Invalid parallel degree parameter"); } this.degreeOfParallelism = degreeOfParallelism; ParallelOption = new ParallelOptions { MaxDegreeOfParallelism = degreeOfParallelism }; if (numberOfPartitions <= 0) { throw new ArgumentException("Invalid number of partition parameter"); } this.numberOfPartitions = numberOfPartitions; // Assign the alphabet SetAlphabet(sequences, similarityMatrix, false); // Initialize parameters KmerLength = kmerLength; DistanceFunctionName = distanceFunctionName; HierarchicalClusteringMethodName = hierarchicalClusteringMethodName; ProfileAlignerName = profileAlignerMethodName; ProfileProfileFunctionName = profileFunctionName; SimilarityMatrix = similarityMatrix; GapOpenCost = gapOpenPenalty; GapExtensionCost = gapExtendPenalty; MsaUtils.SetProfileItemSets(this.alphabet); ReportLog("Start Aligning"); // Work... DoAlignment(sequences); }
/// <summary> /// Log of Weighted inner-product by exponential of shifted similarity matrix /// </summary> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="profileIndexA">the first profile vector (normalized)</param> /// <param name="profileIndexB">the second profile vector (normalized)</param> protected float LogExponentialInnerProductShiftedFast(SimilarityMatrix similarityMatrix, int profileIndexA, int profileIndexB) { float[] profileA = _profileAlignmentA.ProfilesMatrix[profileIndexA]; float[] profileB = _profileAlignmentB.ProfilesMatrix[profileIndexB]; if (profileA.Length != profileB.Length) { throw new ArgumentException("Unequal length profiles"); } int dimension = profileA.Length - 1; float result = 0; _indexA = MsaUtils.CreateIndexArray(dimension); _indexB = MsaUtils.CreateIndexArray(dimension); //MsaUtils.QuickSort(a, aIndex, 0, a.Length - 1); MsaUtils.QuickSort(profileA, _indexA, 0, dimension - 1); MsaUtils.QuickSort(profileB, _indexB, 0, dimension - 1); for (int i = 0; i < dimension; ++i) { if (profileA[_indexA[i]] == 0) { break; } for (int j = 0; j < dimension; ++j) { if (profileB[_indexB[j]] == 0) { break; } result += profileB[j] * profileA[i] * (float)Math.Pow(2, similarityMatrix[i, j] + 0.5); } } result *= (1 - profileA[dimension]) * (1 - profileB[dimension]); return (float)Math.Log(result, 2); }
/// <summary> /// Log of Weighted inner-product by exponential of shifted similarity matrix /// </summary> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="profileIndexA">the first profile vector (normalized)</param> /// <param name="profileIndexB">the second profile vector (normalized)</param> protected float LogExponentialInnerProductShifted(SimilarityMatrix similarityMatrix, int profileIndexA, int profileIndexB) { float[] profileA = _profileAlignmentA.ProfilesMatrix[profileIndexA]; float[] profileB = _profileAlignmentB.ProfilesMatrix[profileIndexB]; if (profileA.Length != profileB.Length) { throw new ArgumentException("Unequal length profiles"); } int dimension = profileA.Length - 1; float result = 0; for (int i = 0; i < dimension; ++i) { for (int j = 0; j < dimension; ++j) { result += profileA[i] * profileB[j] * (float)Math.Pow(2, similarityMatrix[i, j] + 0.5); } } result *= (profileA[dimension]) * (1 - profileB[dimension]); return (float)Math.Log(result, 2); }
/// <summary> /// Correlation of observation vectors /// </summary> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="profileIndexA">the first profile vector (normalized)</param> /// <param name="profileIndexB">the second profile vector (normalized)</param> protected float PearsonCorrelation(SimilarityMatrix similarityMatrix, int profileIndexA, int profileIndexB) { float[] profileA = _profileAlignmentA.ProfilesMatrix[profileIndexA]; float[] profileB = _profileAlignmentB.ProfilesMatrix[profileIndexB]; if (profileA.Length != profileB.Length) { throw new ArgumentException("Unequal length profiles"); } return MsaUtils.Correlation(profileA, profileB); }
/// <summary> /// Weighted inner-profuct by shifted similarity matrix /// </summary> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="profileIndexA">the first profile vector (normalized)</param> /// <param name="profileIndexB">the second profile vector (normalized)</param> protected float WeightedInnerProductShifted(SimilarityMatrix similarityMatrix, int profileIndexA, int profileIndexB) { float[] profileA = _profileAlignmentA.ProfilesMatrix[profileIndexA]; float[] profileB = _profileAlignmentB.ProfilesMatrix[profileIndexB]; if (profileA.Length != profileB.Length) { throw new ArgumentException("Unequal length profiles"); } int dimension = profileA.Length - 1; float[] cachedW = new float[dimension]; for (int i = 0; i < dimension; ++i) { for (int j = 0; j < dimension; ++j) { cachedW[i] += (similarityMatrix[i, j] + (float)0.5) * profileB[j]; } } float result = 0; for (int i = 0; i < dimension; ++i) { result += profileA[i] * cachedW[i]; } result *= (1 - profileA[dimension]) * (1 - profileB[dimension]); return result; }
/// <summary> /// This method assigns the alphabet from the input sequences /// </summary> /// <param name="sequences">Input sequences</param> /// <param name="similarityMatrix">Matrix to use for similarity comparisons</param> /// <param name="fixSimilarityMatrixErrors">True to fix any similarity matrix issue related to the alphabet.</param> private void SetAlphabet(IList<ISequence> sequences, SimilarityMatrix similarityMatrix, bool fixSimilarityMatrixErrors) { if (sequences.Count == 0) { throw new ArgumentException("Empty input sequences"); } // Validate data type this.alphabet = Alphabets.GetAmbiguousAlphabet(sequences[0].Alphabet); Parallel.For(1, sequences.Count, ParallelOption, i => { if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, this.alphabet)) { throw new ArgumentException("Inconsistent sequence alphabet"); } }); SimilarityMatrix bestSimilarityMatrix = null; if (this.alphabet is DnaAlphabet) { bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); } else if (this.alphabet is RnaAlphabet) { bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); } else if (this.alphabet is ProteinAlphabet) { bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); } // Check or assign the similarity matrix. if (similarityMatrix == null) { SimilarityMatrix = bestSimilarityMatrix; if (SimilarityMatrix == null) throw new ArgumentException("Unknown alphabet - could not choose SimilarityMatrix."); } else { var similarityMatrixDNA = new List<String> { "AmbiguousDNA" }; var similarityMatrixRNA = new List<String> { "AmbiguousRNA" }; var similarityMatrixProtein = new List<String> { "BLOSUM45", "BLOSUM50", "BLOSUM62", "BLOSUM80", "BLOSUM90", "PAM250", "PAM30", "PAM70" }; if (this.alphabet is DnaAlphabet) { if (!similarityMatrixDNA.Contains(similarityMatrix.Name)) { if (fixSimilarityMatrixErrors) SimilarityMatrix = bestSimilarityMatrix; else throw new ArgumentException("Inappropriate Similarity Matrix for DNA."); } } else if (this.alphabet is ProteinAlphabet) { if (!similarityMatrixProtein.Contains(similarityMatrix.Name)) { if (fixSimilarityMatrixErrors) SimilarityMatrix = bestSimilarityMatrix; else throw new ArgumentException("Inappropriate Similarity Matrix for Protein."); } } else if (this.alphabet is RnaAlphabet) { if (!similarityMatrixRNA.Contains(similarityMatrix.Name)) { if (fixSimilarityMatrixErrors) SimilarityMatrix = bestSimilarityMatrix; else throw new ArgumentException("Inappropriate Similarity Matrix for RNA."); } } else { throw new ArgumentException("Invalid alphabet"); } } }
/// <summary> /// Jensen-Shannon divergence of observation vectors /// </summary> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="profileIndexA">the first profile vector (normalized)</param> /// <param name="profileIndexB">the second profile vector (normalized)</param> protected float JensenShannonDivergence(SimilarityMatrix similarityMatrix, int profileIndexA, int profileIndexB) { return 1 - MsaUtils.JensenShannonDivergence(_profileAlignmentA.ProfilesMatrix[profileIndexA], _profileAlignmentB.ProfilesMatrix[profileIndexB]); }
private void ValidateSmithWatermanAlignment(string nodeName, bool isTextFile, SequenceCaseType caseType, AlignParameters additionalParameter, AlignmentType alignType, SimilarityMatrixParameters similarityMatrixParam) { Sequence aInput, bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); // Parse the files and get the sequence. ISequence originalSequence1 = null; ISequence originalSequence2 = null; var parseObjectForFile1 = new FastAParser { Alphabet = alphabet }; originalSequence1 = parseObjectForFile1.Parse(filePath1).ElementAt(0); originalSequence2 = parseObjectForFile1.Parse(filePath2).ElementAt(0); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(originalSequence1.ConvertToString(), originalSequence2.ConvertToString(), alphabet, caseType, out aInput, out bInput); } else { string originalSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string originalSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType( originalSequence1, originalSequence2, alphabet, caseType, out aInput, out bInput); } ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : First sequence used is '{0}'.", aInput.ConvertToString())); ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : Second sequence used is '{0}'.", bInput.ConvertToString())); // Create similarity matrix object for a given file. string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create SmithWatermanAligner instance and set its values. var smithWatermanObj = new SmithWatermanAligner(); if (additionalParameter != AlignParameters.AllParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; smithWatermanObj.GapExtensionCost = gapExtensionCost; } IList<IPairwiseSequenceAlignment> result = null; // Align the input sequences. switch (additionalParameter) { case AlignParameters.AlignList: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(new List<ISequence> {aInput, bInput}); break; default: result = smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput}); break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(aInput, bInput); break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } // Get the expected sequence and scorde from xml config. string expectedSequence1, expectedSequence2, expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); switch (caseType) { case SequenceCaseType.LowerCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence1InLower); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence2InLower); break; default: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence2Node); break; } break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); switch (caseType) { case SequenceCaseType.LowerCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence1inLowerNode); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence2inLowerNode); break; case SequenceCaseType.LowerUpperCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence1inLowerNode); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; default: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } break; } // Match the alignment result with expected result. IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequence1), SecondSequence = new Sequence(alphabet, expectedSequence2), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned Second Sequence is '{0}'.", expectedSequence2)); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// Constructor for all the pairwise aligner (NeedlemanWunsch, SmithWaterman, Overlap). /// Sets default similarity matrix and gap penalties. /// Users will typically reset these using parameters specific to their particular sequences and needs. /// /// This constructor is for non-parallel version. /// </summary> protected DynamicProgrammingProfileAlignerSerial( SimilarityMatrix similarityMatrix, ProfileScoreFunctionNames profileScoreFunctionName, int gapOpenPenalty, int gapExtensionPenalty) : this(similarityMatrix, profileScoreFunctionName, gapOpenPenalty, gapExtensionPenalty, 1) { }
private void InValidateSmithWatermanAlignmentWithInvalidSimilarityMatrix(string nodeName, bool isTextFile, SimilarityMatrixInvalidTypes invalidType, AlignParameters additionalParameter, AlignmentType alignType) { Sequence aInput = null; Sequence bInput = null; ISequence inputSequence1; ISequence inputSequence2; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string firstInputFilepath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string secondInputFilepath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); // Parse the files and get the sequence. var parseObjectForFile1 = new FastAParser { Alphabet = alphabet }; inputSequence1 = parseObjectForFile1.Parse(firstInputFilepath).ElementAt(0); inputSequence2 = parseObjectForFile1.Parse(secondInputFilepath).ElementAt(0); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(new string(inputSequence1.Select(a => (char) a).ToArray()), new string(inputSequence2.Select(a => (char) a).ToArray()), alphabet, SequenceCaseType.LowerCase, out aInput, out bInput); } else { string firstInputSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string secondInputSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(firstInputSequence, secondInputSequence, alphabet, SequenceCaseType.LowerCase, out aInput, out bInput); } ApplicationLog.WriteLine(string.Concat( "SmithWatermanAligner P2 : First sequence used is '{0}'.", new string(aInput.Select(a => (char) a).ToArray()))); ApplicationLog.WriteLine(string.Concat( "SmithWatermanAligner P2 : Second sequence used is '{0}'.", new string(bInput.Select(a => (char) a).ToArray()))); // Create similarity matrix object for a invalid file. string blosumFilePath = this.GetSimilarityMatrixFileWithInvalidType(nodeName, invalidType); Exception actualExpection = null; // For invalid similarity matrix data format; exception will be thrown while instantiating SimilarityMatrix sm = null; try { if (invalidType != SimilarityMatrixInvalidTypes.NullSimilarityMatrix) { sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); } } catch (InvalidDataException ex) { actualExpection = ex; } // For non matching similarity matrix exception will be thrown while alignment if (actualExpection == null) { int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create SmithWatermanAligner instance and set its values. var smithWatermanObj = new SmithWatermanAligner(); if (additionalParameter != AlignParameters.AllParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; smithWatermanObj.GapExtensionCost = gapExtensionCost; } // Align the input sequences and catch the exception. switch (additionalParameter) { case AlignParameters.AlignList: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(new List<ISequence> {aInput, bInput}); } catch (ArgumentException ex) { actualExpection = ex; } break; default: try { smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput}); } catch (ArgumentException ex) { actualExpection = ex; } break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(aInput, bInput); } catch (ArgumentException ex) { actualExpection = ex; } break; default: try { smithWatermanObj.AlignSimple(aInput, bInput); } catch (ArgumentException ex) { actualExpection = ex; } break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); } catch (ArgumentException ex) { actualExpection = ex; } break; default: try { smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); } catch (ArgumentException ex) { actualExpection = ex; } break; } break; default: break; } } // Validate that expected exception is thrown using error message. string expectedErrorMessage = this.GetExpectedErrorMeesageWithInvalidSimilarityMatrixType(nodeName, invalidType); Assert.AreEqual(expectedErrorMessage, actualExpection.Message); ApplicationLog.WriteLine(string.Concat( "SmithWatermanAligner P2 : Expected Error message is thrown ", expectedErrorMessage)); }
/// <summary> /// Constructor for all the pairwise aligner (NeedlemanWunsch, SmithWaterman, Overlap). /// Sets default similarity matrix and gap penalties. /// Users will typically reset these using parameters specific to their particular sequences and needs. /// </summary> protected DynamicProgrammingProfileAlignerSerial( SimilarityMatrix similarityMatrix, ProfileScoreFunctionNames profileScoreFunctionName, int gapOpenPenalty, int gapExtensionPenalty, int numberOfCores) { // Set default similarity matrix and gap penalty. // User will typically choose their own parameters, these defaults are reasonable for many cases. // Molecule type is set to protein, since this will also work for DNA and RNA in the // special case of a diagonal similarity matrix. _similarityMatrix = similarityMatrix; _gapOpenPenalty = gapOpenPenalty; _gapExtensionPenalty = gapExtensionPenalty; switch (profileScoreFunctionName) { case (ProfileScoreFunctionNames.InnerProduct): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(InnerProduct); break; case (ProfileScoreFunctionNames.WeightedInnerProduct): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(WeightedInnerProduct); break; case (ProfileScoreFunctionNames.WeightedInnerProductShifted): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(WeightedInnerProductShifted); break; case (ProfileScoreFunctionNames.InnerProductFast): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(InnerProductFast); break; case (ProfileScoreFunctionNames.WeightedInnerProductFast): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(WeightedInnerProductFast); break; case (ProfileScoreFunctionNames.WeightedInnerProductShiftedFast): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(WeightedInnerProductShiftedFast); break; case (ProfileScoreFunctionNames.PearsonCorrelation): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(PearsonCorrelation); break; case (ProfileScoreFunctionNames.WeightedEuclideanDistance): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(WeightedEuclideanDistance); break; case (ProfileScoreFunctionNames.LogExponentialInnerProduct): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(LogExponentialInnerProduct); break; case (ProfileScoreFunctionNames.LogExponentialInnerProductShifted): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(LogExponentialInnerProductShifted); break; case (ProfileScoreFunctionNames.WeightedEuclideanDistanceFast): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(WeightedEuclideanDistanceFast); break; case (ProfileScoreFunctionNames.LogExponentialInnerProductFast): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(LogExponentialInnerProductFast); break; case (ProfileScoreFunctionNames.LogExponentialInnerProductShiftedFast): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(LogExponentialInnerProductShiftedFast); break; case (ProfileScoreFunctionNames.SymmetrizedEntropy): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(SymmetrizedEntropy); break; case (ProfileScoreFunctionNames.JensenShannonDivergence): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(JensenShannonDivergence); break; case (ProfileScoreFunctionNames.WeightedInnerProductCached): _profileProfileScoreFunction = new ProfileScoreFunctionSelector(WeightedInnerProductCached); _cachingFunction = new CachingFunctionSelector(CachingWeightedInnerProduct); _doCaching = true; break; default: throw new Exception("Invalid profile function name"); } if (numberOfCores <= 0) { throw new ArgumentException("Invalid number of cores parameter"); } _numberOfPartitions = numberOfCores; }
/// <summary> /// Inializes a new alignment job /// </summary> /// <param name="similarityMatrix"></param> /// <param name="gapOpenCost"></param> /// <param name="gapExtensionCost"></param> /// <param name="aInput"></param> /// <param name="bInput"></param> protected PairwiseOverlapSimpleAlignmentJob(SimilarityMatrix similarityMatrix, int gapOpenCost, int gapExtensionCost, ISequence aInput, ISequence bInput) : base(similarityMatrix, gapOpenCost, gapExtensionCost, aInput, bInput) { }
/// <summary> /// Pairwise alignment of two sequences using an affine gap penalty. The various algorithms in derived classes (NeedlemanWunsch, /// SmithWaterman, and PairwiseOverlap) all use this general engine for alignment with an affine gap penalty. /// </summary> /// <param name="similarityMatrix">Scoring matrix.</param> /// <param name="gapOpenPenalty">Gap open penalty (by convention, use a negative number for this.)</param> /// <param name="gapExtensionPenalty">Gap extension penalty (by convention, use a negative number for this.)</param> /// <param name="profileAlignmentA">First input profileAlignment</param> /// <param name="profileAlignmentB">Second input profileAlignment</param> public IProfileAlignment Align( SimilarityMatrix similarityMatrix, int gapOpenPenalty, int gapExtensionPenalty, IProfileAlignment profileAlignmentA, IProfileAlignment profileAlignmentB) { _profileAlignmentA = profileAlignmentA; _profileAlignmentB = profileAlignmentB; ResetSpecificAlgorithmMemberVariables(); // Set Gap Penalty and Similarity Matrix GapOpenCost = gapOpenPenalty; GapExtensionCost = gapExtensionPenalty; SimilarityMatrix = similarityMatrix; ValidateAlignInput(profileAlignmentA, profileAlignmentB); // throws exception if input not valid // Convert input strings to 0-based int arrays using similarity matrix mapping _a = MsaUtils.CreateIndexArray(profileAlignmentA.ProfilesMatrix.RowSize); _b = MsaUtils.CreateIndexArray(profileAlignmentB.ProfilesMatrix.RowSize); if (_doCaching) { _cachingFunction(similarityMatrix, _profileAlignmentA, _profileAlignmentB); } // Sort profileA _indexAs = CachingIndex(_profileAlignmentA); FillMatrixAffine(); //DumpF(); // Writes matrix to application log, used for development and testing //DumpAffine(); // Writes matrix to application log in great detail. Useful only for small cases. float optScore = Traceback(out _alignedA, out _alignedB); #region Convert aligned sequences back to Sequence objects, load output SequenceAlignment object IProfileAlignment results = null; //AddSimpleConsensusToResult(results); if (PAMSAMMultipleSequenceAligner.UseWeights) { results = ProfileAlignment.GenerateProfileAlignment(_profileAlignmentA, _profileAlignmentB, _alignedA, _alignedB, _gapCode, _weights); } else { results = ProfileAlignment.GenerateProfileAlignment(_profileAlignmentA, _profileAlignmentB, _alignedA, _alignedB, _gapCode); } results.Score = optScore; #endregion return results; }
/// <summary> /// User will typically choose their own parameters, these defaults are reasonable for many cases. /// </summary> private void SetDefaults() { _nucmerAligner = new ModifiedSmithWaterman(); // Set the default Similarity Matrix SimilarityMatrix = new SimilarityMatrix( SimilarityMatrix.StandardSimilarityMatrix.DiagonalScoreMatrix); // Set the defaults GapOpenCost = DefaultGapOpenCost; GapExtensionCost = DefaultGapExtensionCost; LengthOfMUM = DefaultLengthOfMUM; // Set the ClusterBuilder properties to defaults FixedSeparation = ClusterBuilder.DefaultFixedSeparation; MaximumSeparation = ClusterBuilder.DefaultMaximumSeparation; MinimumScore = ClusterBuilder.DefaultMinimumScore; SeparationFactor = ClusterBuilder.DefaultSeparationFactor; BreakLength = ModifiedSmithWaterman.DefaultBreakLength; ScoreMethod = ClusterScoreMethod.MatchLength; }
/// <summary> /// Pairwise alignment of two sequences using a linear gap penalty. The various algorithms in derived classes (NeedlemanWunsch, /// SmithWaterman, and PairwiseOverlap) all use this general engine for alignment with a linear gap penalty. /// </summary> /// <param name="localSimilarityMatrix">Scoring matrix.</param> /// <param name="gapPenalty">Gap penalty (by convention, use a negative number for this.).</param> /// <param name="inputA">First input sequence.</param> /// <param name="inputB">Second input sequence.</param> /// <returns>A list of sequence alignments.</returns> public IList<IPairwiseSequenceAlignment> AlignSimple(SimilarityMatrix localSimilarityMatrix, int gapPenalty, ISequence inputA, ISequence inputB) { this.SimilarityMatrix = localSimilarityMatrix; this.GapOpenCost = gapPenalty; return DoAlign(inputA, inputB,false); }