public void PairwiseOverlapProteinSeqWithZeroOverlap() { Sequence sequence1 = new Sequence(Alphabets.Protein, "ACDEF"); Sequence sequence2 = new Sequence(Alphabets.Protein, "TUVWY"); SimilarityMatrix sm = new DiagonalSimilarityMatrix(5, -5); int gapPenalty = -10; PairwiseOverlapAligner overlap = new PairwiseOverlapAligner(); overlap.SimilarityMatrix = sm; overlap.GapOpenCost = gapPenalty; overlap.GapExtensionCost = -1; IList <IPairwiseSequenceAlignment> result = overlap.Align(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Simple; Matrix {1}; GapOpenCost {2}", overlap.Name, overlap.SimilarityMatrix.Name, overlap.GapOpenCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void PairwiseOverlapMultipleAlignments() { Sequence sequence1 = new Sequence(Alphabets.DNA, "CCCAACCC"); Sequence sequence2 = new Sequence(Alphabets.DNA, "CCC"); SimilarityMatrix sm = new DiagonalSimilarityMatrix(5, -20); int gapPenalty = -10; PairwiseOverlapAligner overlap = new PairwiseOverlapAligner(); overlap.SimilarityMatrix = sm; overlap.GapOpenCost = gapPenalty; IList <IPairwiseSequenceAlignment> result = overlap.AlignSimple(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Simple; Matrix {1}; GapOpenCost {2}", overlap.Name, overlap.SimilarityMatrix.Name, overlap.GapOpenCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); // First alignment PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); // Second alignment alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 5; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void PairwiseOverlapProteinSeqAffineGapUseEarth() { string sequenceString1 = "HEAGAWGHEE"; string sequenceString2 = "PAWHEAE"; Sequence sequence1 = new Sequence(Alphabets.Protein, sequenceString1); Sequence sequence2 = new Sequence(Alphabets.Protein, sequenceString2); SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); int gapPenalty = -8; PairwiseOverlapAligner overlap = new PairwiseOverlapAligner(); overlap.SimilarityMatrix = sm; overlap.GapOpenCost = gapPenalty; overlap.UseEARTHToFillMatrix = true; overlap.GapExtensionCost = -1; IList <IPairwiseSequenceAlignment> result = overlap.Align(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Affine; Matrix {1}; GapOpenCost {2}; GapExtenstionCost {3}", overlap.Name, overlap.SimilarityMatrix.Name, overlap.GapOpenCost, overlap.GapExtensionCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "GAWGHEE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "PAW-HEA"); alignedSeq.Consensus = new Sequence(Alphabets.AmbiguousProtein, "XAWGHEX"); alignedSeq.Score = 25; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// Initializes a new instance of the OverlapDeNovoAssembler class. /// Sets default threshold values, pairwise aligner, consensusResolver. /// Users will typically reset these using parameters /// specific to their particular sequences and needs. /// </summary> public OverlapDeNovoAssembler() { // The following definitions give the default values // for different parameters used for initialization // Note: Each of the following constructor calls, // in turn might set default values for its required parameters. // By default, set merge threshold to 3 MergeThreshold = 3; // By default, use PairwiseOverlapAligner OverlapAlgorithm = new PairwiseOverlapAligner(); // By default, value of AssumeStandardOrientation is set as true AssumeStandardOrientation = true; }
public void ValidateSequenceAlignersAll() { MUMmer mumobj = new MUMmer3(); Assert.AreEqual(mumobj.ToString(), SequenceAligners.MUMmer.ToString()); NeedlemanWunschAligner nwAlignerobj = new NeedlemanWunschAligner(); Assert.AreEqual(nwAlignerobj.ToString(), SequenceAligners.NeedlemanWunsch.ToString()); NUCmer nucobj = new NUCmer3(); Assert.AreEqual(nucobj.ToString(), SequenceAligners.NUCmer.ToString()); PairwiseOverlapAligner poAlignerobj = new PairwiseOverlapAligner(); Assert.AreEqual(poAlignerobj.ToString(), SequenceAligners.PairwiseOverlap.ToString()); SmithWatermanAligner swAlignerobj = new SmithWatermanAligner(); Assert.AreEqual(swAlignerobj.ToString(), SequenceAligners.SmithWaterman.ToString()); Assert.IsNotNull(SequenceAligners.All); Console.Write("Successfully created all the objects in Sequence Aligners"); ApplicationLog.Write("Successfully created all the objects in Sequence Aligners"); }
void ValidatePairwiseOverlapAlignment(bool isTextFile, AlignmentParamType alignParam, AlignmentType alignType) { ISequence aInput = null; ISequence bInput = null; IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.FilePathNode1); string filePath2 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.FilePathNode2); //Parse the files and get the sequence. using (FastAParser parser1 = new FastAParser(filePath1)) { parser1.Alphabet = alphabet; aInput = parser1.Parse().ElementAt(0); } using (FastAParser parser2 = new FastAParser(filePath2)) { parser2.Alphabet = alphabet; bInput = parser2.Parse().ElementAt(0); } } else { // Read the xml file for getting both the files for aligning. string origSequence1 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.SequenceNode1); string origSequence2 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, origSequence1); bInput = new Sequence(alphabet, origSequence2); } string aInputString = new string(aInput.Select(a => (char)a).ToArray()); string bInputString = new string(bInput.Select(a => (char)a).ToArray()); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : First sequence used is '{0}'.", aInputString)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Second sequence used is '{0}'.", bInputString)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : First sequence used is '{0}'.", aInputString)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Second sequence used is '{0}'.", bInputString)); string blosumFilePath = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm = new SimilarityMatrix(blosumFilePath); int gapOpenCost = int.Parse(utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.GapOpenCostNode), (IFormatProvider)null); int gapExtensionCost = int.Parse(utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.GapExtensionCostNode), (IFormatProvider)null); PairwiseOverlapAligner pairwiseOverlapObj = new PairwiseOverlapAligner(); if (AlignmentParamType.AllParam != alignParam) { pairwiseOverlapObj.SimilarityMatrix = sm; pairwiseOverlapObj.GapOpenCost = gapOpenCost; } IList <IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignmentParamType.AlignList: List <ISequence> sequences = new List <ISequence>(); sequences.Add(aInput); sequences.Add(bInput); switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sequences); break; default: result = pairwiseOverlapObj.AlignSimple(sequences); break; } break; case AlignmentParamType.AlignTwo: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(aInput, bInput); break; } break; case AlignmentParamType.AllParam: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } pairwiseOverlapObj = null; aInput = null; bInput = null; sm = null; // Read the xml file for getting both the files for aligning. string expectedSequence1 = string.Empty; string expectedSequence2 = string.Empty; string expectedScore = string.Empty; aInput = null; bInput = null; sm = null; switch (alignType) { case AlignmentType.Align: expectedScore = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedScoreNode); expectedSequence1 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedSequenceNode2); break; } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); string[] expectedSequences1, expectedSequences2; char[] seperators = new char[1] { ';' }; expectedSequences1 = expectedSequence1.Split(seperators); expectedSequences2 = expectedSequence2.Split(seperators); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq; for (int i = 0; i < expectedSequences1.Length; i++) { alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(alphabet, expectedSequences1[i]); alignedSeq.SecondSequence = new Sequence(alphabet, expectedSequences2[i]); alignedSeq.Score = Convert.ToInt32(expectedScore, (IFormatProvider)null); align.PairwiseAlignedSequences.Add(alignedSeq); } expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Aligned Second Sequence is '{0}'.", expectedSequence2)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Final Score '{0}'.", expectedScore)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Aligned First Sequence is '{0}'.", expectedSequence1)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Aligned Second Sequence is '{0}'.", expectedSequence2)); }
/// <summary> /// Validates PairwiseOverlapAlignment algorithm for the parameters passed. /// </summary> /// <param name="isTextFile">Is text file an input.</param> /// <param name="alignParam">parameter based on which certain validations are done.</param> /// <param name="alignType">Is the Align type Simple or Align with Gap Extension cost?</param> private void ValidatePairwiseOverlapAlignment(bool isTextFile, AlignmentParamType alignParam, AlignmentType alignType) { ISequence aInput; ISequence bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.FilePathNode1).TestDir(); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.FilePathNode2).TestDir(); //Parse the files and get the sequence. var parser = new FastAParser { Alphabet = alphabet }; aInput = parser.Parse(filePath1).ElementAt(0); bInput = parser.Parse(filePath2).ElementAt(0); } else { // Read the xml file for getting both the files for aligning. string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, origSequence1); bInput = new Sequence(alphabet, origSequence2); } var aInputString = aInput.ConvertToString(); var bInputString = bInput.ConvertToString(); ApplicationLog.WriteLine($"PairwiseOverlapAligner BVT : First sequence used is '{aInputString}'."); ApplicationLog.WriteLine($"PairwiseOverlapAligner BVT : Second sequence used is '{bInputString}'."); string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.BlosumFilePathNode).TestDir(); var sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.GapExtensionCostNode), null); var pairwiseOverlapObj = new PairwiseOverlapAligner(); if (AlignmentParamType.AllParam != alignParam) { pairwiseOverlapObj.SimilarityMatrix = sm; pairwiseOverlapObj.GapOpenCost = gapOpenCost; } IList <IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignmentParamType.AlignList: var sequences = new List <ISequence> { aInput, bInput }; switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sequences); break; default: result = pairwiseOverlapObj.AlignSimple(sequences); break; } break; case AlignmentParamType.AlignTwo: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(aInput, bInput); break; } break; case AlignmentParamType.AllParam: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } // Read the xml file for getting both the files for aligning. string expectedSequence1; string expectedSequence2; string expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedSequenceNode2); break; } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); var seperators = new[] { ';' }; string[] expectedSequences1 = expectedSequence1.Split(seperators); string[] expectedSequences2 = expectedSequence2.Split(seperators); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); for (int i = 0; i < expectedSequences1.Length; i++) { PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequences1[i]), SecondSequence = new Sequence(alphabet, expectedSequences2[i]), Score = Convert.ToInt32(expectedScore, null) }; align.PairwiseAlignedSequences.Add(alignedSeq); } expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : Aligned Second Sequence is '{0}'.", expectedSequence2)); }