public void SmithWatermanProteinSeqAffineGap() { IPairwiseSequenceAligner sw = new SmithWatermanAligner { SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62), GapOpenCost = -8, GapExtensionCost = -1, }; ISequence sequence1 = new Sequence(Alphabets.Protein, "HEAGAWGHEE"); ISequence sequence2 = new Sequence(Alphabets.Protein, "PAWHEAE"); IList<IPairwiseSequenceAlignment> result = sw.Align(sequence1, sequence2); AlignmentHelpers.LogResult(sw, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"), SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"), Consensus = new Sequence(Alphabets.AmbiguousProtein, "AWGHE"), Score = 20, FirstOffset = 0, SecondOffset = 3 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
public void PairwiseOverlapProteinSeqSimpleGap() { string sequenceString1 = "HEAGAWGHEE"; string sequenceString2 = "PAWHEAE"; Sequence sequence1 = new Sequence(Alphabets.Protein, sequenceString1); Sequence sequence2 = new Sequence(Alphabets.Protein, sequenceString2); SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); int gapPenalty = -8; PairwiseOverlapAligner overlap = new PairwiseOverlapAligner(); overlap.SimilarityMatrix = sm; overlap.GapOpenCost = gapPenalty; IList<IPairwiseSequenceAlignment> result = overlap.AlignSimple(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Simple; Matrix {1}; GapOpenCost {2}", overlap.Name, overlap.SimilarityMatrix.Name, overlap.GapOpenCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "GAWGHEE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "PAW-HEA"); alignedSeq.Consensus = new Sequence(Alphabets.AmbiguousProtein, "XAWGHEX"); alignedSeq.Score = 25; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void TestMUMmerAlignerSingleMum() { const string reference = "TTAATTTTAG"; const string search = "AGTTTAGAG"; ISequence referenceSeq = new Sequence(Alphabets.DNA, reference); ISequence searchSeq = new Sequence(Alphabets.DNA, search); var searchSeqs = new List<ISequence> {searchSeq}; MUMmerAligner mummer = new MUMmerAligner { LengthOfMUM = 3, PairWiseAlgorithm = new NeedlemanWunschAligner(), GapExtensionCost = -2 }; IList<IPairwiseSequenceAlignment> result = mummer.Align(referenceSeq, searchSeqs); // Check if output is not null Assert.AreNotEqual(null, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "TTAATTTTAG--"), SecondSequence = new Sequence(Alphabets.DNA, "---AGTTTAGAG"), Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "TTAAKTTTAGAG"), Score = -6, FirstOffset = 0, SecondOffset = 3 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void TestNUCmer3MultipleReferencesAndQueries() { Sequence referenceSeq = null; Sequence searchSeq = null; List<ISequence> referenceSeqs = null; List<ISequence> searchSeqs = null; referenceSeqs = new List<ISequence>(); string reference = "ATGCGCATCCCC"; referenceSeq = new Sequence(Alphabets.DNA, reference); referenceSeq.ID = "R1"; referenceSeqs.Add(referenceSeq); reference = "TAGCT"; referenceSeq = new Sequence(Alphabets.DNA, reference); referenceSeq.ID = "R11"; referenceSeqs.Add(referenceSeq); searchSeqs = new List<ISequence>(); string search = "CCGCGCCCCCTC"; searchSeq = new Sequence(Alphabets.DNA, search); searchSeq.ID = "Q1"; searchSeqs.Add(searchSeq); search = "AGCT"; searchSeq = new Sequence(Alphabets.DNA, search); searchSeq.ID = "Q11"; searchSeqs.Add(searchSeq); NucmerPairwiseAligner nucmer = new NucmerPairwiseAligner(); nucmer.FixedSeparation = 0; nucmer.MinimumScore = 2; nucmer.SeparationFactor = -1; nucmer.LengthOfMUM = 3; nucmer.ForwardOnly = true; IList<IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList(); // Check if output is not null Assert.AreNotEqual(null, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "GCGCATCCCC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GCGC--CCCC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "GCGCATCCCC"); alignedSeq.Score = -5; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); align = new PairwiseSequenceAlignment(); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "AGCT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "AGCT"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "AGCT"); alignedSeq.Score = 12; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 1; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
public void NeedlemanWunschProteinSeqAffineGap() { IPairwiseSequenceAligner nw = new NeedlemanWunschAligner { SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62), GapOpenCost = -8, GapExtensionCost = -1 }; ISequence sequence1 = new Sequence(Alphabets.Protein, "HEAGAWGHEE"); ISequence sequence2 = new Sequence(Alphabets.Protein, "PAWHEAE"); IList<IPairwiseSequenceAlignment> result = nw.Align(sequence1, sequence2); AlignmentHelpers.LogResult(nw, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.Protein, "HEAGAWGHE-E"), SecondSequence = new Sequence(Alphabets.Protein, "P---AW-HEAE"), Consensus = new Sequence(AmbiguousProteinAlphabet.Instance, "XEAGAWGHEAE"), Score = 5, FirstOffset = 0, SecondOffset = 0 }); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
/// <summary> /// Given two byte arrays representing a pairwise alignment, shift them so /// that all deletions start as early as possible. For example: /// /// <code> /// TTTTAAAATTTT -> Converts to -> TTTTAAAATTTT /// TTTTAA--TTTT TTTT--AATTTT /// </code> /// /// This function takes a IPairwiseSequenceAlignment and assumes that the first sequence is the reference and second /// sequence is the query. It returns a new Pairwise sequence alignment with all of the indels left aligned as well as a list of variants. /// </summary> /// <param name="aln">Aln. The second sequence should be of type QualitativeSequence or Sequence</param> /// <param name="callVariants">callVariants. If true, it will call variants, otherwise the second half of tuple will be null. </param> public static Tuple<IPairwiseSequenceAlignment, List<Variant>> LeftAlignIndelsAndCallVariants(IPairwiseSequenceAlignment aln, bool callVariants = true) { if (aln == null) { throw new NullReferenceException ("aln"); } if (aln.PairwiseAlignedSequences == null || aln.PairwiseAlignedSequences.Count != 1) { throw new ArgumentException ("The pairwise aligned sequence should only have one alignment"); } var frstAln = aln.PairwiseAlignedSequences.First (); var seq1 = frstAln.FirstSequence; var seq2 = frstAln.SecondSequence; if (seq1 == null) { throw new NullReferenceException ("seq1"); } else if (seq2 == null) { throw new NullReferenceException ("seq2"); } //TODO: Might implement an ambiguity check later. #if FALSE if (seq1.Alphabet.HasAmbiguity || seq2.Alphabet.HasAmbiguity) { throw new ArgumentException ("Cannot left align sequences with ambiguous symbols."); } #endif // Note we have to copy unless we can guarantee the array will not be mutated. byte[] refseq = seq1.ToArray (); ISequence newQuery; List<Variant> variants = null; // Call variants for a qualitative sequence if (seq2 is QualitativeSequence) { var qs = seq2 as QualitativeSequence; var query = Enumerable.Zip (qs, qs.GetQualityScores (), (bp, qv) => new BPandQV (bp, (byte)qv, false)).ToArray (); AlignmentUtils.LeftAlignIndels (refseq, query); AlignmentUtils.VerifyNoGapsOnEnds (refseq, query); if (callVariants) { variants = VariantCaller.CallVariants (refseq, query, seq2.IsMarkedAsReverseComplement()); } var newQueryQS = new QualitativeSequence (qs.Alphabet, qs.FormatType, query.Select (z => z.BP).ToArray (), query.Select (p => p.QV).ToArray (), false); newQueryQS.Metadata = seq2.Metadata; newQuery = newQueryQS; } else if (seq2 is Sequence) { // For a sequence with no QV values. var qs = seq2 as Sequence; var query = qs.Select (v => new BPandQV (v, 0, false)).ToArray(); AlignmentUtils.LeftAlignIndels (refseq, query); AlignmentUtils.VerifyNoGapsOnEnds (refseq, query); // ISequence does not have a setable metadata var newQueryS = new Sequence(qs.Alphabet, query.Select(z=>z.BP).ToArray(), false); newQueryS.Metadata = seq2.Metadata; if (callVariants) { variants = VariantCaller.CallVariants (refseq, query, seq2.IsMarkedAsReverseComplement()); } newQuery = newQueryS; } else { throw new ArgumentException ("Can only left align indels if the query sequence is of type Sequence or QualitativeSequence."); } if (aln.FirstSequence != null && aln.FirstSequence.ID != null) { foreach (var v in variants) { v.RefName = aln.FirstSequence.ID; } } var newRef = new Sequence (seq1.Alphabet, refseq, false); newRef.ID = seq1.ID; newRef.Metadata = seq1.Metadata; newQuery.ID = seq2.ID; var newaln = new PairwiseSequenceAlignment (aln.FirstSequence, aln.SecondSequence); var pas = new PairwiseAlignedSequence (); pas.FirstSequence = newRef; pas.SecondSequence = newQuery; newaln.Add (pas); return new Tuple<IPairwiseSequenceAlignment, List<Variant>> (newaln, variants); }
/// <summary> /// This method is considered as main execute method which defines the /// step by step algorithm. Derived class flows the defined flow by this /// method. Store generated MUMs in properties MUMs, SortedMUMs. /// Alignment first finds MUMs for all the query sequence, and then /// runs pairwise algorithm on gaps to produce alignments. /// </summary> /// <param name="referenceSequence">Reference sequence.</param> /// <param name="querySequenceList">List of input sequences.</param> /// <returns>A list of sequence alignments.</returns> private IList<IPairwiseSequenceAlignment> AlignmentWithAccumulatedMUMs( ISequence referenceSequence, IEnumerable<ISequence> querySequenceList) { // Get MUMs this.mums = new Dictionary<ISequence, IEnumerable<Match>>(); IList<IPairwiseSequenceAlignment> results = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment alignment = null; IEnumerable<Match> mum; if (this.Validate(referenceSequence, querySequenceList)) { // Safety check for public methods to ensure that null // inputs are handled. if (referenceSequence == null || querySequenceList == null) { return null; } Sequence seq = referenceSequence as Sequence; if (seq == null) { throw new ArgumentException(Properties.Resource.OnlySequenceClassSupported); } MUMmer mummer = new MUMmer(seq); mummer.LengthOfMUM = this.LengthOfMUM; mummer.NoAmbiguity = this.AmbigiousMatchesAllowed; foreach (ISequence sequence in querySequenceList) { if (sequence.Equals(referenceSequence)) { continue; } alignment = new PairwiseSequenceAlignment(referenceSequence, sequence); // Step2 : streaming process is performed with the query sequence if (this.MaximumMatchEnabled) { mum = mummer.GetMatches(sequence); } else { mum = mummer.GetMatchesUniqueInReference(sequence); } this.mums.Add(sequence, mum); // Step3(a) : sorted mum list based on reference sequence LongestIncreasingSubsequence lis = new LongestIncreasingSubsequence(); IList<Match> sortedMumList = lis.SortMum(GetMumsForLIS(mum)); if (sortedMumList.Count > 0) { // Step3(b) : LIS using greedy cover algorithm IList<Match> finalMumList = lis.GetLongestSequence(sortedMumList); if (finalMumList.Count > 0) { // Step 4 : get all the gaps in each sequence and call // pairwise alignment alignment.PairwiseAlignedSequences.Add( this.ProcessGaps(referenceSequence, sequence, finalMumList)); } results.Add(alignment); } else { IList<IPairwiseSequenceAlignment> sequenceAlignment = this.RunPairWise( referenceSequence, sequence); foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment) { results.Add(pairwiseAlignment); } } } } return results; }
public void ValidatePairwiseSequenceAlignmentToString() { IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"); alignedSeq.Consensus = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.Score = 28; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); string actualString = align.ToString(); string expectedString = "AWGHE\r\nAWGHE\r\nAW-HE\r\n\r\n".Replace("\r\n", System.Environment.NewLine); Assert.AreEqual(actualString, expectedString); }
/// <summary> /// This method is considered as main execute method which defines the /// step by step algorithm. Derived class flows the defined flow by this /// method. /// </summary> /// <param name="referenceSequenceList">Reference sequence.</param> /// <param name="originalQuerySequences">List of input sequences.</param> /// <returns>A list of sequence alignment.</returns> private IEnumerable <IPairwiseSequenceAlignment> Alignment(IEnumerable <ISequence> referenceSequenceList, IEnumerable <ISequence> originalQuerySequences) { ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList.ElementAt(0).Alphabet); IEnumerable <ISequence> querySequenceList = ForwardOnly ? originalQuerySequences : (ReverseOnly ? ReverseComplementSequenceList(originalQuerySequences) : AddReverseComplementsToSequenceList(originalQuerySequences)); IList <IPairwiseSequenceAlignment> results = new List <IPairwiseSequenceAlignment>(); var deltas = new List <DeltaAlignment>(); foreach (ISequence refSequence in referenceSequenceList) { this.nucmerAlgo = new NUCmer(refSequence); if (GapOpenCost != DefaultGapOpenCost) { this.nucmerAlgo.GapOpenCost = GapOpenCost; } if (GapExtensionCost != DefaultGapExtensionCost) { this.nucmerAlgo.GapExtensionCost = GapExtensionCost; } if (LengthOfMUM != DefaultLengthOfMUM) { this.nucmerAlgo.LengthOfMUM = LengthOfMUM; } // Set the ClusterBuilder properties to defaults if (FixedSeparation != ClusterBuilder.DefaultFixedSeparation) { this.nucmerAlgo.FixedSeparation = FixedSeparation; } if (MaximumSeparation != ClusterBuilder.DefaultMaximumSeparation) { this.nucmerAlgo.MaximumSeparation = MaximumSeparation; } if (MinimumScore != ClusterBuilder.DefaultMinimumScore) { this.nucmerAlgo.MinimumScore = MinimumScore; } if (SeparationFactor != ClusterBuilder.DefaultSeparationFactor) { this.nucmerAlgo.SeparationFactor = SeparationFactor; } if (BreakLength != ModifiedSmithWaterman.DefaultBreakLength) { this.nucmerAlgo.BreakLength = BreakLength; } this.nucmerAlgo.ConsensusResolver = ConsensusResolver; if (SimilarityMatrix != null) { this.nucmerAlgo.SimilarityMatrix = SimilarityMatrix; } foreach (ISequence querySequence in querySequenceList) { // Check for parameters that would prevent an alignment from being returned. if (Math.Min(querySequence.Count, refSequence.Count) < MinimumScore) { var msg = "Bad parameter settings for NucmerPairwiseAligner. " + "Tried to align a reference of length " + refSequence.Count.ToString() + " to a sequence of length " + querySequence.Count.ToString() + " while requiring a minimum score of MinimumScore = " + MinimumScore + ". This will prevent any alignments from being returned."; throw new ArgumentException(msg); } IEnumerable <DeltaAlignment> deltaAlignment = this.nucmerAlgo.GetDeltaAlignments(querySequence, !MaxMatch, querySequence.IsMarkedAsReverseComplement()); deltas.AddRange(deltaAlignment); } } if (deltas.Count > 0) { ISequence concatReference = referenceSequenceList.ElementAt(0); //// concat all the sequences into one sequence if (referenceSequenceList.Count() > 1) { concatReference = ConcatSequence(referenceSequenceList); } foreach (ISequence querySequence in querySequenceList) { List <DeltaAlignment> qDelta = deltas.Where(d => d.QuerySequence.Equals(querySequence)).ToList(); IPairwiseSequenceAlignment sequenceAlignment = new PairwiseSequenceAlignment(concatReference, querySequence); // Convert delta alignments to sequence alignments IList <PairwiseAlignedSequence> alignments = ConvertDeltaToAlignment(qDelta); if (alignments.Count > 0) { foreach (PairwiseAlignedSequence align in alignments) { // Calculate the score of alignment align.Score = CalculateScore( align.FirstSequence, align.SecondSequence); // Make Consensus align.Consensus = MakeConsensus( align.FirstSequence, align.SecondSequence); sequenceAlignment.PairwiseAlignedSequences.Add(align); } } results.Add(sequenceAlignment); } } return(results); }
private void ValidateMUMmerAlignGeneralTestCases(string nodeName) { // Gets the reference sequence from the configuration file string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer P2 : Successfully validated the File Path '{0}'.", filePath)); var fastaParserObj = new FastAParser(); IEnumerable<ISequence> referenceSeqs = fastaParserObj.Parse(filePath); ISequence referenceSeq = referenceSeqs.ElementAt(0); // Gets the reference sequence from the configuration file string queryFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer P2 : Successfully validated the Search File Path '{0}'.", queryFilePath)); var fastaParserObj1 = new FastAParser(); IEnumerable<ISequence> querySeqs = fastaParserObj1.Parse(queryFilePath); string mumLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); var mum = new MUMmerAligner { LengthOfMUM = long.Parse(mumLength, null), StoreMUMs = true, PairWiseAlgorithm = new NeedlemanWunschAligner(), GapOpenCost = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null) }; IList<IPairwiseSequenceAlignment> align = mum.Align(referenceSeq, querySeqs); // Validate FinalMUMs and MUMs Properties. Assert.IsNotNull(mum.MUMs); string expectedScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); string[] expectedSequences = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(AlignmentHelpers.CompareAlignment(align, expectedOutput)); ApplicationLog.WriteLine("MUMmer P2 : Successfully validated the aligned sequences."); }
public void ValidatePairwiseAlignedSequenceMultipleRefQuery() { var referenceSeqs = new List<ISequence>() { new Sequence(Alphabets.DNA, "ATGCGCATCCCC") {ID = "R1"}, new Sequence(Alphabets.DNA, "TAGCT") {ID = "R2"} }; var searchSeqs = new List<ISequence>() { new Sequence(Alphabets.DNA, "CCGCGCCCCCTC") {ID = "Q1"}, new Sequence(Alphabets.DNA, "AGCT") {ID = "Q2"} }; var nucmer = new NucmerPairwiseAligner { FixedSeparation = 0, MinimumScore = 2, SeparationFactor = -1, LengthOfMUM = 3, ForwardOnly = true, }; IList<IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList(); // Check if output is not null Assert.AreNotEqual(null, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "GCGCATCCCC"), SecondSequence = new Sequence(Alphabets.DNA, "GCGC--CCCC"), Consensus = new Sequence(Alphabets.DNA, "GCGCATCCCC"), Score = -5, FirstOffset = 0, SecondOffset = 0 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); align = new PairwiseSequenceAlignment(); alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "AGCT"), SecondSequence = new Sequence(Alphabets.DNA, "AGCT"), Consensus = new Sequence(Alphabets.DNA, "AGCT"), Score = 12, FirstOffset = 0, SecondOffset = 1 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); ApplicationLog.WriteLine("PairwiseAlignedSequence P1: Successfully validated Sequence with Multiple Reference."); }
public static void TestExceptionThrownForUnclippedAlignment() { var refseq = "ACAATATA"; var queryseq = "ACAATAT-"; var r = new Sequence (DnaAlphabet.Instance, refseq); var q = new Sequence (DnaAlphabet.Instance, queryseq); var aln = new PairwiseSequenceAlignment (r, q); var pas = new PairwiseAlignedSequence (); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add (pas); Assert.Throws<FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants (aln, true)); refseq = "AAACAATATA"; queryseq = "AA-CAATATA"; r = new Sequence (DnaAlphabet.Instance, refseq); q = new Sequence (DnaAlphabet.Instance, queryseq); aln = new PairwiseSequenceAlignment (r, q); pas = new PairwiseAlignedSequence (); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add (pas); Assert.Throws<FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants (aln, true)); }
public static void TestLeftAlignmentStep() { var refseq = "ACAATAAAAGCGCGCGCGCGTTACGTATAT--ATGGATAT"; var queryseq = "ACAATAA-AGC--GCGC--GTTACGTATATATATGGATAT"; var r = new Sequence (DnaAlphabet.Instance, refseq); var q = new Sequence (DnaAlphabet.Instance, queryseq); var aln = new PairwiseSequenceAlignment (r, q); var pas = new PairwiseAlignedSequence (); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add (pas); var tpl = VariantCaller.LeftAlignIndelsAndCallVariants (aln, true); // Check the left alignment aln = tpl.Item1 as PairwiseSequenceAlignment; var lar = aln.PairwiseAlignedSequences [0].FirstSequence.ConvertToString(); var laq = aln.PairwiseAlignedSequences [0].SecondSequence.ConvertToString(); var exprefseq = "ACAATAAAAGCGCGCGCGCGTTACG--TATATATGGATAT"; var expqueryseq = "ACAAT-AAA----GCGCGCGTTACGTATATATATGGATAT"; Assert.AreEqual (exprefseq, lar); Assert.AreEqual (expqueryseq, laq); // And it's hard, so we might as well check the variants var variants = tpl.Item2; Assert.AreEqual (3, variants.Count); string[] bases = new string[] { "A", "GCGC", "TA" }; char[] hpbases = new char[] { 'A', 'G', 'T' }; bool[] inHp = new bool[] { true, false, false }; int[] lengths = new int[] { 1, 4, 2 }; int[] starts = new int[] { 4, 8, 24 }; IndelType[] types = new IndelType[] { IndelType.Deletion, IndelType.Deletion, IndelType.Insertion }; for (int i = 0; i < 3; i++) { Assert.AreEqual (VariantType.INDEL, variants [i].Type); var vi = variants [i] as IndelVariant; Assert.AreEqual (hpbases[i], vi.HomopolymerBase); Assert.AreEqual (starts [i], vi.StartPosition); Assert.AreEqual (lengths [i], vi.Length); Assert.AreEqual (bases [i], vi.InsertedOrDeletedBases); Assert.AreEqual (inHp [i], vi.InHomopolymer); Assert.AreEqual (types [i], vi.InsertionOrDeletion); } }
private void ValidateGeneralSequenceAlignment(string nodeName, bool validateProperty) { // Read the xml file for getting both the files for aligning. string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : First sequence used is '{0}'.", origSequence1)); ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : Second sequence used is '{0}'.", origSequence2)); // Create two sequences ISequence aInput = new Sequence(alphabet, origSequence1); ISequence bInput = new Sequence(alphabet, origSequence2); // Add the sequences to the Sequence alignment object using AddSequence() method. IList<IPairwiseSequenceAlignment> sequenceAlignmentObj = new List<IPairwiseSequenceAlignment>(); var alignSeq = new PairwiseAlignedSequence {FirstSequence = aInput, SecondSequence = bInput}; IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(); seqAlignObj.Add(alignSeq); sequenceAlignmentObj.Add(seqAlignObj); // Read the output back and validate the same. IList<PairwiseAlignedSequence> newAlignedSequences = sequenceAlignmentObj[0].PairwiseAlignedSequences; ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : First sequence read is '{0}'.", origSequence1)); ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : Second sequence read is '{0}'.", origSequence2)); if (validateProperty) { string score = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string seqCount = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceCountNode); Assert.IsFalse(sequenceAlignmentObj.IsReadOnly); Assert.AreEqual(sequenceAlignmentObj.Count.ToString((IFormatProvider) null), seqCount); Assert.AreEqual( sequenceAlignmentObj[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider) null), score); Assert.AreEqual(sequenceAlignmentObj.Count.ToString((IFormatProvider) null), seqCount); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the IsRead Property"); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Count Property"); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Sequences Property"); } else { Assert.AreEqual(new String(newAlignedSequences[0].FirstSequence.Select(a => (char) a).ToArray()), origSequence1); Assert.AreEqual(new String(newAlignedSequences[0].SecondSequence.Select(a => (char) a).ToArray()), origSequence2); } }
private void ValidateNeedlemanWunschAlignment(string nodeName, AlignParameters alignParam, SimilarityMatrixParameters similarityMatrixParam, AlignmentType alignType) { ISequence aInput, bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); // Parse the files and get the sequence. if (alignParam.ToString().Contains("Code")) { string sequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, sequence1); bInput = new Sequence(alphabet, sequence2); } else { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); var parseObjectForFile1 = new FastAParser { Alphabet = alphabet }; ISequence originalSequence1 = parseObjectForFile1.Parse(filePath1).FirstOrDefault(); Assert.IsNotNull(originalSequence1); aInput = new Sequence(alphabet, originalSequence1.ConvertToString()); var parseObjectForFile2 = new FastAParser { Alphabet = alphabet }; ISequence originalSequence2 = parseObjectForFile2.Parse(filePath2).FirstOrDefault(); Assert.IsNotNull(originalSequence2); bInput = new Sequence(alphabet, originalSequence2.ConvertToString()); } string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); var needlemanWunschObj = new NeedlemanWunschAligner(); if (AlignParameters.AllParam != alignParam) { needlemanWunschObj.SimilarityMatrix = sm; needlemanWunschObj.GapOpenCost = gapOpenCost; } IList<IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignParameters.AlignList: case AlignParameters.AlignListCode: var sequences = new List<ISequence> {aInput, bInput}; switch (alignType) { case AlignmentType.Align: needlemanWunschObj.GapExtensionCost = gapExtensionCost; result = needlemanWunschObj.Align(sequences); break; default: result = needlemanWunschObj.AlignSimple(sequences); break; } break; case AlignParameters.AllParam: case AlignParameters.AllParamCode: switch (alignType) { case AlignmentType.Align: needlemanWunschObj.GapExtensionCost = gapExtensionCost; result = needlemanWunschObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = needlemanWunschObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; case AlignParameters.AlignTwo: case AlignParameters.AlignTwoCode: switch (alignType) { case AlignmentType.Align: needlemanWunschObj.GapExtensionCost = gapExtensionCost; result = needlemanWunschObj.Align(aInput, bInput); break; default: result = needlemanWunschObj.AlignSimple(aInput, bInput); break; } break; default: break; } // Read the xml file for getting both the files for aligning. string expectedSequence1, expectedSequence2, expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(aInput, bInput); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequence1), SecondSequence = new Sequence(alphabet, expectedSequence2), Score = Convert.ToInt32(expectedScore, null) }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); ApplicationLog.WriteLine(string.Format("NeedlemanWunschAligner P1 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format("NeedlemanWunschAligner P1 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format("NeedlemanWunschAligner P1 : Aligned Second Sequence is '{0}'.", expectedSequence2)); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void ValidateSequenceAlignmentProperties() { // Read the xml file for getting both the files for aligning. string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignDnaAlgorithmNodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignDnaAlgorithmNodeName, Constants.SequenceNode2); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue( Constants.AlignDnaAlgorithmNodeName, Constants.AlphabetNameNode)); string seqCount = this.utilityObj.xmlUtil.GetTextValue( Constants.AlignDnaAlgorithmNodeName, Constants.SequenceCountNode); // Create two sequences ISequence aInput = new Sequence(alphabet, origSequence1); ISequence bInput = new Sequence(alphabet, origSequence2); // Add the sequences to the Sequence alignment object using AddSequence() method. IList<IPairwiseSequenceAlignment> sequenceAlignmentObj = new List<IPairwiseSequenceAlignment>(); var alignSeq = new PairwiseAlignedSequence(); alignSeq.FirstSequence = aInput; alignSeq.SecondSequence = bInput; IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(aInput, bInput); seqAlignObj.Add(alignSeq); sequenceAlignmentObj.Add(seqAlignObj); // Validate all properties of sequence alignment class. Assert.AreEqual(seqCount, seqAlignObj.Count.ToString((IFormatProvider) null)); Assert.AreEqual(origSequence1, new string(seqAlignObj.FirstSequence.Select(a => (char) a).ToArray())); Assert.AreEqual(origSequence2, new string(seqAlignObj.SecondSequence.Select(a => (char) a).ToArray())); Assert.IsFalse(seqAlignObj.IsReadOnly); Assert.IsNull(seqAlignObj.Documentation); Assert.AreEqual(seqCount, seqAlignObj.PairwiseAlignedSequences.Count.ToString((IFormatProvider) null)); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the IsRead Property"); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Count Property"); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Sequences Property"); }
/// <summary> /// Validates the Mummer align method for several test cases for the parameters passed. /// </summary> /// <param name="nodeName">Node name to be read from xml</param> /// <param name="isFilePath">Is Sequence saved in File</param> /// <param name="isAlignList">Is align method to take list?</param> /// <param name="addParam">Additional parameter</param> /// Suppress the ParserParam variable CA1801 as this would be reused later. void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath, bool isAlignList, AdditionalParameters addParam) { ISequence referenceSeq; IList<ISequence> querySeqs; List<ISequence> alignList = null; if (isFilePath) { // Gets the reference sequence from the configuration file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); Assert.IsTrue(File.Exists(filePath)); IEnumerable<ISequence> referenceSeqs; FastAParser fastaParserObj = new FastAParser(); referenceSeqs = fastaParserObj.Parse(filePath); referenceSeq = referenceSeqs.FirstOrDefault(); Assert.IsNotNull(referenceSeq); // Gets the query sequence from the configuration file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); Assert.IsTrue(File.Exists(queryFilePath)); querySeqs = fastaParserObj.Parse(queryFilePath).ToList(); ISequence querySeq = querySeqs.First(); if (isAlignList) { alignList = new List<ISequence> {referenceSeq, querySeq}; } } else { // Gets the reference sequence from the configuration file string referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence); string querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); ISequence querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence); querySeqs = new List<ISequence>(); if (isAlignList) { alignList = new List<ISequence> {referenceSeq, querySeq}; } else querySeqs.Add(querySeq); } // Setup the algorithm string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); MUMmerAligner mumAlignObj = new MUMmerAligner {LengthOfMUM = long.Parse(mumLength, null), StoreMUMs = true}; switch (addParam) { case AdditionalParameters.PerformSimilarityMatrixChange: mumAlignObj.SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); mumAlignObj.GapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); break; default: mumAlignObj.GapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); break; } IEnumerable<ISequence> alignEnumSeqs = alignList; IList<IPairwiseSequenceAlignment> align = isAlignList ? mumAlignObj.AlignSimple(alignEnumSeqs) : mumAlignObj.AlignSimple(referenceSeq, querySeqs); // Validate MUMs Properties Assert.IsNotNull(mumAlignObj.MUMs); string expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); string[] expectedSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); // Validate for two aligned sequences and single aligned sequences appropriately if (querySeqs.Count <= 1) { IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]), Score = Convert.ToInt32(expectedScore,null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(CompareAlignment(align, expectedOutput)); } else { string[] expectedScores = expectedScore.Split(','); IPairwiseSequenceAlignment seq1Align = new PairwiseSequenceAlignment(); IPairwiseSequenceAlignment seq2Align = new PairwiseSequenceAlignment(); // Get the first sequence for validation PairwiseAlignedSequence alignedSeq1 = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]), Score = int.Parse(expectedScores[0], null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; seq1Align.PairwiseAlignedSequences.Add(alignedSeq1); expectedOutput.Add(seq1Align); // Get the second sequence for validation PairwiseAlignedSequence alignedSeq2 = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[2]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[3]), Score = int.Parse(expectedScores[1], null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; seq2Align.PairwiseAlignedSequences.Add(alignedSeq2); expectedOutput.Add(seq2Align); Assert.IsTrue(CompareAlignment(align, expectedOutput)); } }
/// <summary> /// This method is considered as main execute method which defines the /// step by step algorithm. Drived class flows the defined flow by this /// method. Does not store MUMs, processes MUMs and gaps to find /// alignment directly. /// </summary> /// <param name="referenceSequence">reference sequence</param> /// <param name="querySequenceList">list of input sequences</param> /// <returns>A list of sequence alignments</returns> private IList <IPairwiseSequenceAlignment> AlignmentWithoutAccumulatedMUMs( ISequence referenceSequence, IList <ISequence> querySequenceList) { IList <IPairwiseSequenceAlignment> results = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment alignment = null; if (Validate(referenceSequence, querySequenceList)) { // Safety check for public methods to ensure that null // inputs are handled. if (referenceSequence == null || querySequenceList == null) { return(null); } // Getting refernce sequence _referenceSequence = referenceSequence; // Step1 : building suffix trees using reference sequence _suffixTree = BuildSuffixTree(_referenceSequence); // On each query sequence aligned with reference sequence foreach (ISequence sequence in querySequenceList) { if (sequence.Equals(referenceSequence)) { continue; } alignment = new PairwiseSequenceAlignment(referenceSequence, sequence); // Step2 : streaming process is performed with the query sequence _mumList = Streaming(_suffixTree, sequence, LengthOfMUM); // Step3(a) : sorted mum list based on reference sequence _sortedMumList = SortMum(_mumList); if (_sortedMumList.Count > 0) { // Step3(b) : LIS using greedy cover algorithm _finalMumList = CollectLongestIncreasingSubsequence(_sortedMumList); if (_finalMumList.Count > 0) { // Step 4 : get all the gaps in each sequence and call // pairwise alignment alignment.PairwiseAlignedSequences.Add(ProcessGaps(referenceSequence, sequence)); } results.Add(alignment); } else { IList <IPairwiseSequenceAlignment> sequenceAlignment = RunPairWise( referenceSequence, sequence); foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment) { results.Add(pairwiseAlignment); } } } } return(results); }
/// <summary> /// This is step (3) in the dynamic programming model - to walk the traceback/scoring /// matrix and generate the alignment. /// </summary> private PairwiseSequenceAlignment CreateAlignment(IEnumerable<OptScoreMatrixCell> startingCells) { // Generate each alignment. var alignment = new PairwiseSequenceAlignment(_sequence1, _sequence2); foreach (var startingCell in startingCells) alignment.PairwiseAlignedSequences.Add(CreateAlignmentFromCell(startingCell)); // Include the scoring table if requested. if (IncludeScoreTable) alignment.Metadata["ScoreTable"] = GetScoreTable(); return alignment; }
/// <summary> /// This method is considered as main execute method which defines the /// step by step algorithm. Drived class flows the defined flow by this /// method. Store generated MUMs in properties MUMs, SortedMUMs. /// Alignment first finds MUMs for all the query sequence, and then /// runs pairwise algorithm on gaps to produce alignments. /// </summary> /// <param name="referenceSequence">reference sequence</param> /// <param name="querySequenceList">list of input sequences</param> /// <returns>A list of sequence alignments</returns> private IList <IPairwiseSequenceAlignment> AlignmentWithAccumulatedMUMs( ISequence referenceSequence, IList <ISequence> querySequenceList) { // Get MUMs IDictionary <ISequence, IList <MaxUniqueMatch> > queryMums = new Dictionary <ISequence, IList <MaxUniqueMatch> >(); _mums = new Dictionary <ISequence, IList <MaxUniqueMatch> >(); _finalMums = new Dictionary <ISequence, IList <MaxUniqueMatch> >(); if (Validate(referenceSequence, querySequenceList)) { IList <MaxUniqueMatch> mumList; // Step1 : building suffix trees using reference sequence SequenceSuffixTree suffixTree = BuildSuffixTree(referenceSequence); // On each query sequence aligned with reference sequence foreach (ISequence sequence in querySequenceList) { if (sequence.Equals(referenceSequence)) { continue; } // Step2 : streaming process is performed with the query sequence mumList = Streaming(suffixTree, sequence, LengthOfMUM); _mums.Add(sequence, mumList); // Step3(a) : sorted mum list based on reference sequence mumList = SortMum(mumList); if (mumList.Count > 0) { // Step3(b) : LIS using greedy cover algorithm mumList = CollectLongestIncreasingSubsequence(mumList); } else { mumList = null; } _finalMums.Add(sequence, mumList); } } IList <IPairwiseSequenceAlignment> results = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment alignment = null; if (MUMs != null && FinalMUMs != null) { // Getting refernce sequence _referenceSequence = referenceSequence; // On each query sequence aligned with reference sequence foreach (var finalMum in FinalMUMs) { var sequence = finalMum.Key; _mumList = MUMs[sequence]; _finalMumList = finalMum.Value; alignment = new PairwiseSequenceAlignment(referenceSequence, sequence); if (_mumList.Count > 0) { if (_finalMumList.Count > 0) { // Step 4 : get all the gaps in each sequence and call // pairwise alignment alignment.PairwiseAlignedSequences.Add(ProcessGaps(referenceSequence, sequence)); } results.Add(alignment); } else { IList <IPairwiseSequenceAlignment> sequenceAlignment = RunPairWise( referenceSequence, sequence); foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment) { results.Add(pairwiseAlignment); } } } } return(results); }
/// <summary> /// Validates PairwiseOverlapAlignment algorithm for the parameters passed. /// </summary> /// <param name="nodeName">Xml node name</param> /// <param name="isTextFile">Is text file an input.</param> /// <param name="caseType">Case Type</param> /// <param name="additionalParameter">parameter based on which certain validations are done.</param> /// <param name="alignType">Is the Align type Simple or Align with Gap Extension cost?</param> /// <param name="similarityMatrixParam">Similarity Matrix</param> private void ValidatePairwiseOverlapAlignment(string nodeName, bool isTextFile, SequenceCaseType caseType, AlignParameters additionalParameter, AlignmentType alignType, SimilarityMatrixParameters similarityMatrixParam) { Sequence aInput = null; Sequence bInput = null; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); var parser1 = new FastAParser(); ISequence originalSequence1 = parser1.Parse(filePath1).ElementAt(0); ISequence originalSequence2 = parser1.Parse(filePath2).ElementAt(0); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(new string(originalSequence1.Select(a => (char) a).ToArray()), new string(originalSequence2.Select(a => (char) a).ToArray()), alphabet, caseType, out aInput, out bInput); } else { string originalSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string originalSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType( originalSequence1, originalSequence2, alphabet, caseType, out aInput, out bInput); } var aInputString = new string(aInput.Select(a => (char) a).ToArray()); var bInputString = new string(bInput.Select(a => (char) a).ToArray()); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : First sequence used is '{0}'.", aInputString)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Second sequence used is '{0}'.", bInputString)); // Create similarity matrix object for a given file. string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm = null; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create PairwiseOverlapAligner instance and set its values. var pairwiseOverlapObj = new PairwiseOverlapAligner(); if (additionalParameter != AlignParameters.AllParam) { pairwiseOverlapObj.SimilarityMatrix = sm; pairwiseOverlapObj.GapOpenCost = gapOpenCost; pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; } IList<IPairwiseSequenceAlignment> result = null; // Align the input sequences. switch (additionalParameter) { case AlignParameters.AlignList: var sequences = new List<ISequence>(); sequences.Add(aInput); sequences.Add(bInput); switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sequences); break; default: result = pairwiseOverlapObj.AlignSimple(sequences); break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(aInput, bInput); break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } aInput = null; bInput = null; sm = null; // Get the expected sequence and scorde from xml config. string expectedSequence1 = string.Empty; string expectedSequence2 = string.Empty; string expectedScore = string.Empty; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); string[] expectedSequences1, expectedSequences2; var seperators = new char[1] {';'}; expectedSequences1 = expectedSequence1.Split(seperators); expectedSequences2 = expectedSequence2.Split(seperators); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq; for (int i = 0; i < expectedSequences1.Length; i++) { alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequences1[i]), SecondSequence = new Sequence(alphabet, expectedSequences2[i]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); } expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput,true)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Aligned Second Sequence is '{0}'.", expectedSequence2)); }
/// <summary> /// Launches the alignment algorithm /// </summary> public virtual List<IPairwiseSequenceAlignment> Align() { InitializeCache(); // Grid for (int diagonal = 0; diagonal < gridCols + gridRows - 2; diagonal++) { for (int blockRow = 0; blockRow < gridRows; blockRow++) { int blockCol = diagonal - blockRow; if ((blockCol >= 0) && (blockCol < gridCols)) { int lastRow = (blockRow == gridRows - 1) ? (int)(colHeight - Helper.BigMul(blockRow, gridStride) - 1) : gridStride; int lastCol = (blockCol == gridCols - 1) ? (int)(rowWidth - Helper.BigMul(blockCol, gridStride) - 1) : gridStride; ComputeIntermediateBlock(blockRow, blockCol, lastRow, lastCol); } } } sbyte[][] trace = new sbyte[gridStride + 1][]; for (int i = 0; i <= gridStride; i++) { trace[i] = new sbyte[gridStride + 1]; } // Last Block - grid calculation and Traceback combined int completeTraceRow = gridRows - 1; int completeTraceCol = gridCols - 1; int completeLastRow = (int)(colHeight - Helper.BigMul(completeTraceRow, gridStride) - 1); int completeLastCol = (int)(rowWidth - Helper.BigMul(completeTraceCol, gridStride) - 1); ComputeCornerBlock(completeTraceRow, completeTraceCol, completeLastRow, completeLastCol, trace); //Traceback if (optScoreCells.Count == 0) { return new List<IPairwiseSequenceAlignment>(); } else { PairwiseSequenceAlignment alignment = new PairwiseSequenceAlignment(sequenceI, sequenceJ); for (int alignmentCount = 0; alignmentCount < optScoreCells.Count; alignmentCount++) { PairwiseAlignedSequence result = new PairwiseAlignedSequence(); result.Score = optScore; long alignmentRow = optScoreCells[alignmentCount].Item1; long alignmentCol = optScoreCells[alignmentCount].Item2; int blockRow = (int)(alignmentRow / gridStride); int blockCol = (int)(alignmentCol / gridStride); int lastRow = (int)(alignmentRow - Helper.BigMul(blockRow, gridStride)); int lastCol = (int)(alignmentCol - Helper.BigMul(blockCol, gridStride)); result.Metadata["EndOffsets"] = new List<long> { alignmentRow - 1, alignmentCol - 1 }; long alignmentLength = 0; byte[] sequence1 = new byte[colHeight + rowWidth]; byte[] sequence2 = new byte[colHeight + rowWidth]; int colGaps = 0; int rowGaps = 0; while ((blockRow >= 0) && (blockCol >= 0)) { if ((blockRow != completeTraceRow) || (blockCol != completeTraceCol) || (lastRow > completeLastRow) || (lastCol > completeLastCol)) { ComputeTraceBlock(blockRow, blockCol, lastRow, lastCol, trace); completeTraceRow = blockRow; completeTraceCol = blockCol; completeLastRow = lastRow; completeLastCol = lastCol; } long startPositionI = blockRow * gridStride - 1; long startPositionJ = blockCol * gridStride - 1; while ((trace[lastRow][lastCol] != SourceDirection.Stop) && (trace[lastRow][lastCol] != SourceDirection.Block)) { switch (trace[lastRow][lastCol]) { case SourceDirection.Diagonal: // diagonal, no gap, use both sequence residues sequence1[alignmentLength] = sequenceI[startPositionI + lastRow]; sequence2[alignmentLength] = sequenceJ[startPositionJ + lastCol]; alignmentLength++; lastRow--; lastCol--; break; case SourceDirection.Up: // up, gap in J sequence1[alignmentLength] = sequenceI[startPositionI + lastRow]; sequence2[alignmentLength] = this.gapCode; alignmentLength++; lastRow--; colGaps++; break; case SourceDirection.Left: // left, gap in I sequence1[alignmentLength] = this.gapCode; sequence2[alignmentLength] = sequenceJ[startPositionJ + lastCol]; alignmentLength++; lastCol--; rowGaps++; break; } } if (trace[lastRow][lastCol] == SourceDirection.Stop) { // Be nice, turn aligned solutions around so that they match the input sequences byte[] alignedA = new byte[alignmentLength]; byte[] alignedB = new byte[alignmentLength]; for (long i = 0, j = alignmentLength - 1; i < alignmentLength; i++, j--) { alignedA[i] = sequence1[j]; alignedB[i] = sequence2[j]; } // If alphabet of inputA is DnaAlphabet then alphabet of alignedA may be Dna or AmbiguousDna. IAlphabet alphabet = Alphabets.AutoDetectAlphabet(alignedA, 0, alignedA.GetLongLength(), sequenceI.Alphabet); Sequence seq = new Sequence(alphabet, alignedA, false); seq.ID = sequenceI.ID; // seq.DisplayID = aInput.DisplayID; result.FirstSequence = seq; alphabet = Alphabets.AutoDetectAlphabet(alignedB, 0, alignedB.GetLongLength(), sequenceJ.Alphabet); seq = new Sequence(alphabet, alignedB, false); seq.ID = sequenceJ.ID; // seq.DisplayID = bInput.DisplayID; result.SecondSequence = seq; // Offset is start of alignment in input sequence with respect to other sequence. if (lastCol >= lastRow) { result.FirstOffset = lastCol - lastRow; result.SecondOffset = 0; } else { result.FirstOffset = 0; result.SecondOffset = lastRow - lastCol; } result.Metadata["StartOffsets"] = new List<long> { lastRow, lastCol }; result.Metadata["Insertions"] = new List<long> { rowGaps, colGaps }; alignment.PairwiseAlignedSequences.Add(result); break; } else { if (lastRow == 0 && lastCol == 0) { blockRow--; blockCol--; lastRow = gridStride; lastCol = gridStride; } else { if (lastRow == 0) { blockRow--; lastRow = gridStride; } else { blockCol--; lastCol = gridStride; } } } } } return new List<IPairwiseSequenceAlignment>() { alignment }; } }
public void PairwiseOverlapMultipleAlignments() { Sequence sequence1 = new Sequence(Alphabets.DNA, "CCCAACCC"); Sequence sequence2 = new Sequence(Alphabets.DNA, "CCC"); SimilarityMatrix sm = new DiagonalSimilarityMatrix(5, -20); int gapPenalty = -10; PairwiseOverlapAligner overlap = new PairwiseOverlapAligner(); overlap.SimilarityMatrix = sm; overlap.GapOpenCost = gapPenalty; IList<IPairwiseSequenceAlignment> result = overlap.AlignSimple(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Simple; Matrix {1}; GapOpenCost {2}", overlap.Name, overlap.SimilarityMatrix.Name, overlap.GapOpenCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); // First alignment PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); // Second alignment alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 5; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// Validates the Mummer align method for several test cases for the parameters passed. /// </summary> /// <param name="nodeName">Node name to be read from xml</param> /// <param name="isFilePath"></param> /// <param name="isSeqList">Is MUMmer alignment with List of sequences</param> void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath, bool isSeqList) { ISequence referenceSeq; ISequence querySeq; IList<ISequence> querySeqs = new List<ISequence>(); string referenceSequence; string querySequence; IList<IPairwiseSequenceAlignment> align; if (isFilePath) { // Gets the reference sequence from the configuration file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastAParser parser = new FastAParser(); IEnumerable<ISequence> referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs.FirstOrDefault(); Assert.IsNotNull(referenceSeq); referenceSequence = referenceSeq.ConvertToString(); parser.Close(); // Gets the reference sequence from the configuration file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); FastAParser queryParserObj = new FastAParser(); querySeqs = queryParserObj.Parse(queryFilePath).ToList(); querySeq = querySeqs.FirstOrDefault(); Assert.IsNotNull(querySeq); querySequence = querySeq.ConvertToString(); queryParserObj.Close(); } else { // Gets the reference sequence from the configuration file referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence); querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence); querySeqs = new List<ISequence> {querySeq}; } string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); var mumAlignObj = new Bio.Algorithms.MUMmer.MUMmerAligner { LengthOfMUM = long.Parse(mumLength, null), GapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null) }; if (isSeqList) { querySeqs.Add(referenceSeq); align = mumAlignObj.Align(querySeqs); } else { align = mumAlignObj.AlignSimple(referenceSeq, querySeqs); } string expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); Assert.AreEqual(expectedScore, align[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider)null)); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); string[] expectedSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue }; seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(CompareAlignment(align, expectedOutput)); ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the aligned sequences."); }
/// <summary> /// Validates PairwiseOverlapAlignment algorithm for the parameters passed. /// </summary> /// <param name="nodeName">Node Name in the xml.</param> /// <param name="alignParam">parameter based on which certain validations are done.</param> /// <param name="similarityMatrixParam">Similarity Matrix Parameter.</param> /// <param name="alignType">Alignment Type</param> private void ValidatePairwiseOverlapAlignment(string nodeName, AlignParameters alignParam, SimilarityMatrixParameters similarityMatrixParam, AlignmentType alignType) { ISequence aInput; ISequence bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (alignParam.ToString().Contains("Code")) { string sequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, sequence1); bInput = new Sequence(alphabet, sequence2); } else { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); var parser1 = new FastAParser { Alphabet = alphabet }; aInput = parser1.Parse(filePath1).ElementAt(0); bInput = parser1.Parse(filePath2).ElementAt(0); } string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); var pairwiseOverlapObj = new PairwiseOverlapAligner(); if (AlignParameters.AllParam != alignParam) { pairwiseOverlapObj.SimilarityMatrix = sm; pairwiseOverlapObj.GapOpenCost = gapOpenCost; } IList<IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignParameters.AlignList: case AlignParameters.AlignListCode: var sequences = new List<ISequence> {aInput, bInput}; switch (alignType) { case AlignmentType.Align: pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; result = pairwiseOverlapObj.Align(sequences); break; default: result = pairwiseOverlapObj.AlignSimple(sequences); break; } break; case AlignParameters.AllParam: case AlignParameters.AllParamCode: switch (alignType) { case AlignmentType.Align: pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; case AlignParameters.AlignTwo: case AlignParameters.AlignTwoCode: switch (alignType) { case AlignmentType.Align: pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; result = pairwiseOverlapObj.Align(aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(aInput, bInput); break; } break; default: break; } // Read the xml file for getting both the files for aligning. string expectedSequence1; string expectedSequence2; string expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); var seperators = new [] {';'}; string[] expectedSequences1 = expectedSequence1.Split(seperators); string[] expectedSequences2 = expectedSequence2.Split(seperators); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); for (int i = 0; i < expectedSequences1.Length; i++) { PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequences1[i]), SecondSequence = new Sequence(alphabet, expectedSequences2[i]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); } expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput, true)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Aligned Second Sequence is '{0}'.", expectedSequence2)); }
/// <summary> /// Launches the alignment algorithm /// </summary> public virtual List <IPairwiseSequenceAlignment> Align() { InitializeCache(); // Grid for (int diagonal = 0; diagonal < gridCols + gridRows - 2; diagonal++) { for (int blockRow = 0; blockRow < gridRows; blockRow++) { int blockCol = diagonal - blockRow; if ((blockCol >= 0) && (blockCol < gridCols)) { int lastRow = (blockRow == gridRows - 1) ? (int)(colHeight - Math.BigMul(blockRow, gridStride) - 1) : gridStride; int lastCol = (blockCol == gridCols - 1) ? (int)(rowWidth - Math.BigMul(blockCol, gridStride) - 1) : gridStride; ComputeIntermediateBlock(blockRow, blockCol, lastRow, lastCol); } } } sbyte[][] trace = new sbyte[gridStride + 1][]; for (int i = 0; i <= gridStride; i++) { trace[i] = new sbyte[gridStride + 1]; } // Last Block - grid calculation and Traceback combined int completeTraceRow = gridRows - 1; int completeTraceCol = gridCols - 1; int completeLastRow = (int)(colHeight - Math.BigMul(completeTraceRow, gridStride) - 1); int completeLastCol = (int)(rowWidth - Math.BigMul(completeTraceCol, gridStride) - 1); ComputeCornerBlock(completeTraceRow, completeTraceCol, completeLastRow, completeLastCol, trace); //Traceback if (optScoreCells.Count == 0) { return(new List <IPairwiseSequenceAlignment>()); } else { PairwiseSequenceAlignment alignment = new PairwiseSequenceAlignment(sequenceI, sequenceJ); for (int alignmentCount = 0; alignmentCount < optScoreCells.Count; alignmentCount++) { PairwiseAlignedSequence result = new PairwiseAlignedSequence(); result.Score = optScore; long alignmentRow = optScoreCells[alignmentCount].Item1; long alignmentCol = optScoreCells[alignmentCount].Item2; int blockRow = (int)(alignmentRow / gridStride); int blockCol = (int)(alignmentCol / gridStride); int lastRow = (int)(alignmentRow - Math.BigMul(blockRow, gridStride)); int lastCol = (int)(alignmentCol - Math.BigMul(blockCol, gridStride)); result.Metadata["EndOffsets"] = new List <long> { alignmentRow - 1, alignmentCol - 1 }; long alignmentLength = 0; byte[] sequence1 = new byte[colHeight + rowWidth]; byte[] sequence2 = new byte[colHeight + rowWidth]; int colGaps = 0; int rowGaps = 0; while ((blockRow >= 0) && (blockCol >= 0)) { if ((blockRow != completeTraceRow) || (blockCol != completeTraceCol) || (lastRow > completeLastRow) || (lastCol > completeLastCol)) { ComputeTraceBlock(blockRow, blockCol, lastRow, lastCol, trace); completeTraceRow = blockRow; completeTraceCol = blockCol; completeLastRow = lastRow; completeLastCol = lastCol; } long startPositionI = blockRow * gridStride - 1; long startPositionJ = blockCol * gridStride - 1; while ((trace[lastRow][lastCol] != SourceDirection.Stop) && (trace[lastRow][lastCol] != SourceDirection.Block)) { switch (trace[lastRow][lastCol]) { case SourceDirection.Diagonal: // diagonal, no gap, use both sequence residues sequence1[alignmentLength] = sequenceI[startPositionI + lastRow]; sequence2[alignmentLength] = sequenceJ[startPositionJ + lastCol]; alignmentLength++; lastRow--; lastCol--; break; case SourceDirection.Up: // up, gap in J sequence1[alignmentLength] = sequenceI[startPositionI + lastRow]; sequence2[alignmentLength] = this.gapCode; alignmentLength++; lastRow--; colGaps++; break; case SourceDirection.Left: // left, gap in I sequence1[alignmentLength] = this.gapCode; sequence2[alignmentLength] = sequenceJ[startPositionJ + lastCol]; alignmentLength++; lastCol--; rowGaps++; break; } } if (trace[lastRow][lastCol] == SourceDirection.Stop) { // Be nice, turn aligned solutions around so that they match the input sequences byte[] alignedA = new byte[alignmentLength]; byte[] alignedB = new byte[alignmentLength]; for (long i = 0, j = alignmentLength - 1; i < alignmentLength; i++, j--) { alignedA[i] = sequence1[j]; alignedB[i] = sequence2[j]; } // If alphabet of inputA is DnaAlphabet then alphabet of alignedA may be Dna or AmbiguousDna. IAlphabet alphabet = Alphabets.AutoDetectAlphabet(alignedA, 0, alignedA.LongLength, sequenceI.Alphabet); Sequence seq = new Sequence(alphabet, alignedA, false); seq.ID = sequenceI.ID; // seq.DisplayID = aInput.DisplayID; result.FirstSequence = seq; alphabet = Alphabets.AutoDetectAlphabet(alignedB, 0, alignedB.LongLength, sequenceJ.Alphabet); seq = new Sequence(alphabet, alignedB, false); seq.ID = sequenceJ.ID; // seq.DisplayID = bInput.DisplayID; result.SecondSequence = seq; // Offset is start of alignment in input sequence with respect to other sequence. if (lastCol >= lastRow) { result.FirstOffset = lastCol - lastRow; result.SecondOffset = 0; } else { result.FirstOffset = 0; result.SecondOffset = lastRow - lastCol; } result.Metadata["StartOffsets"] = new List <long> { lastRow, lastCol }; result.Metadata["Insertions"] = new List <long> { rowGaps, colGaps }; alignment.PairwiseAlignedSequences.Add(result); break; } else { if (lastRow == 0 && lastCol == 0) { blockRow--; blockCol--; lastRow = gridStride; lastCol = gridStride; } else { if (lastRow == 0) { blockRow--; lastRow = gridStride; } else { blockCol--; lastCol = gridStride; } } } } } return(new List <IPairwiseSequenceAlignment>() { alignment }); } }
private void ValidateSmithWatermanAlignment(string nodeName, bool isTextFile, SequenceCaseType caseType, AlignParameters additionalParameter, AlignmentType alignType, SimilarityMatrixParameters similarityMatrixParam) { Sequence aInput, bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); // Parse the files and get the sequence. ISequence originalSequence1 = null; ISequence originalSequence2 = null; var parseObjectForFile1 = new FastAParser { Alphabet = alphabet }; originalSequence1 = parseObjectForFile1.Parse(filePath1).ElementAt(0); originalSequence2 = parseObjectForFile1.Parse(filePath2).ElementAt(0); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(originalSequence1.ConvertToString(), originalSequence2.ConvertToString(), alphabet, caseType, out aInput, out bInput); } else { string originalSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string originalSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType( originalSequence1, originalSequence2, alphabet, caseType, out aInput, out bInput); } ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : First sequence used is '{0}'.", aInput.ConvertToString())); ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : Second sequence used is '{0}'.", bInput.ConvertToString())); // Create similarity matrix object for a given file. string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create SmithWatermanAligner instance and set its values. var smithWatermanObj = new SmithWatermanAligner(); if (additionalParameter != AlignParameters.AllParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; smithWatermanObj.GapExtensionCost = gapExtensionCost; } IList<IPairwiseSequenceAlignment> result = null; // Align the input sequences. switch (additionalParameter) { case AlignParameters.AlignList: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(new List<ISequence> {aInput, bInput}); break; default: result = smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput}); break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(aInput, bInput); break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } // Get the expected sequence and scorde from xml config. string expectedSequence1, expectedSequence2, expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); switch (caseType) { case SequenceCaseType.LowerCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence1InLower); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence2InLower); break; default: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence2Node); break; } break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); switch (caseType) { case SequenceCaseType.LowerCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence1inLowerNode); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence2inLowerNode); break; case SequenceCaseType.LowerUpperCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence1inLowerNode); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; default: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } break; } // Match the alignment result with expected result. IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequence1), SecondSequence = new Sequence(alphabet, expectedSequence2), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned Second Sequence is '{0}'.", expectedSequence2)); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void TestMUMmer3MultipleMumWithCustomMatrix() { string reference = "ATGCGCATCCCCTT"; string search = "GCGCCCCCTA"; Sequence referenceSeq = null; Sequence searchSeq = null; referenceSeq = new Sequence(Alphabets.DNA, reference); searchSeq = new Sequence(Alphabets.DNA, search); List<ISequence> searchSeqs = new List<ISequence>(); searchSeqs.Add(searchSeq); int[,] customMatrix = new int[256, 256]; customMatrix[(byte)'A', (byte)'A'] = 3; customMatrix[(byte)'A', (byte)'T'] = -2; customMatrix[(byte)'A', (byte)'G'] = -2; customMatrix[(byte)'A', (byte)'c'] = -2; customMatrix[(byte)'G', (byte)'G'] = 3; customMatrix[(byte)'G', (byte)'A'] = -2; customMatrix[(byte)'G', (byte)'T'] = -2; customMatrix[(byte)'G', (byte)'C'] = -2; customMatrix[(byte)'T', (byte)'T'] = 3; customMatrix[(byte)'T', (byte)'A'] = -2; customMatrix[(byte)'T', (byte)'G'] = -2; customMatrix[(byte)'T', (byte)'C'] = -2; customMatrix[(byte)'C', (byte)'C'] = 3; customMatrix[(byte)'C', (byte)'T'] = -2; customMatrix[(byte)'C', (byte)'A'] = -2; customMatrix[(byte)'C', (byte)'G'] = -2; DiagonalSimilarityMatrix matrix = new DiagonalSimilarityMatrix(3, -2); int gapOpenCost = -6; MUMmerAligner mummer = new MUMmerAligner(); mummer.LengthOfMUM = 4; mummer.PairWiseAlgorithm = new NeedlemanWunschAligner(); mummer.SimilarityMatrix = matrix; mummer.GapOpenCost = gapOpenCost; mummer.GapExtensionCost = -2; IList<IPairwiseSequenceAlignment> result = mummer.AlignSimple(referenceSeq, searchSeqs); // Check if output is not null Assert.AreNotEqual(null, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "ATGCGCATCCCCTT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "--GCGC--CCCCTA"); alignedSeq.Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "ATGCGCATCCCCTW"); alignedSeq.Score = 1; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 2; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void NeedlemanWunschDnaSeqSimpleGap() { IPairwiseSequenceAligner nw = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(2, -1), GapOpenCost = -2 }; ISequence sequence1 = new Sequence(Alphabets.DNA, "GAATTCAGTTA"); ISequence sequence2 = new Sequence(Alphabets.DNA, "GGATCGA"); IList<IPairwiseSequenceAlignment> result = nw.AlignSimple(sequence1, sequence2); AlignmentHelpers.LogResult(nw, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "GAATTCAGTTA"), SecondSequence = new Sequence(Alphabets.DNA, "GGAT-C-G--A"), Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "GRATTCAGTTA"), Score = 3, FirstOffset = 0, SecondOffset = 0 }); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
public void TestMUMmerAlignerSingleMumRNA() { const string reference = "AUGCUUUUCCCCCCC"; const string search = "UAUAUUUUGG"; MUMmerAligner mummer = new MUMmerAligner { LengthOfMUM = 3, PairWiseAlgorithm = new NeedlemanWunschAligner(), SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna), GapOpenCost = -8, GapExtensionCost = -2 }; ISequence referenceSeq = new Sequence(Alphabets.RNA, reference); List<ISequence> searchSeqs = new List<ISequence> { new Sequence(Alphabets.RNA, search) }; IList<IPairwiseSequenceAlignment> result = mummer.Align(referenceSeq, searchSeqs); // Check if output is not null Assert.AreNotEqual(0, result.Count); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.RNA, "-AUGCUUUUCCCCCCC"), SecondSequence = new Sequence(Alphabets.RNA, "UAUA-UUUUGG-----"), Consensus = new Sequence(AmbiguousRnaAlphabet.Instance, "UAURCUUUUSSCCCCC"), Score = -14, FirstOffset = 1, SecondOffset = 0 }); expectedOutput.Add(align); AlignmentHelpers.CompareAlignment(result, expectedOutput); }
public void TestNUCmer3CustomBreakLength() { var referenceSeqs = new List<ISequence> { new Sequence(Alphabets.DNA, "CAAAAGGGATTGCAAATGTTGGAGTGAATGCCATTACCTACCGGCTAGGAGGAGT") { ID = "R1" }, new Sequence(Alphabets.DNA, "CCCCCCCCC") { ID = "R2" }, new Sequence(Alphabets.DNA, "TTTTT") { ID = "R3" }, }; var searchSeqs = new List<ISequence> { new Sequence(Alphabets.DNA, "CATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAA") { ID = "Q1" }, new Sequence(Alphabets.DNA, "CAAAGTCTCTATCAGAATGCAGATGCAGATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGC") { ID = "Q2" }, new Sequence(Alphabets.DNA, "AAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGC") { ID = "Q3" }, }; NucmerPairwiseAligner nucmer = new NucmerPairwiseAligner { MaximumSeparation = 0, MinimumScore = 2, SeparationFactor = 0.12F, LengthOfMUM = 5, BreakLength = 2, ForwardOnly = true }; var result = nucmer.Align(referenceSeqs, searchSeqs) .Select(a => a as IPairwiseSequenceAlignment) .ToList(); // Check if output is not null Assert.IsNotNull(result); var expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "AAAGGGA"), SecondSequence = new Sequence(Alphabets.DNA, "AAAGGGA"), Consensus = new Sequence(Alphabets.DNA, "AAAGGGA"), Score = 21, FirstOffset = 8, SecondOffset = 0 }); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "CATTA"), SecondSequence = new Sequence(Alphabets.DNA, "CATTA"), Consensus = new Sequence(Alphabets.DNA, "CATTA"), Score = 15, FirstOffset = 0, SecondOffset = 31 }); expectedOutput.Add(align); align = new PairwiseSequenceAlignment(); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "ATGTT"), SecondSequence = new Sequence(Alphabets.DNA, "ATGTT"), Consensus = new Sequence(Alphabets.DNA, "ATGTT"), Score = 15, FirstOffset = 13, SecondOffset = 0 }); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "GAATGC"), SecondSequence = new Sequence(Alphabets.DNA, "GAATGC"), Consensus = new Sequence(Alphabets.DNA, "GAATGC"), Score = 18, FirstOffset = 0, SecondOffset = 11 }); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "TTTTT"), SecondSequence = new Sequence(Alphabets.DNA, "TTTTT"), Consensus = new Sequence(Alphabets.DNA, "TTTTT"), Score = 15, FirstOffset = 31, SecondOffset = 0 }); expectedOutput.Add(align); align = new PairwiseSequenceAlignment(); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "CAAAA"), SecondSequence = new Sequence(Alphabets.DNA, "CAAAA"), Consensus = new Sequence(Alphabets.DNA, "CAAAA"), Score = 15, FirstOffset = 3, SecondOffset = 0 }); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "GGATT"), SecondSequence = new Sequence(Alphabets.DNA, "GGATT"), Consensus = new Sequence(Alphabets.DNA, "GGATT"), Score = 15, FirstOffset = 45, SecondOffset = 0 }); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "GCAAA"), SecondSequence = new Sequence(Alphabets.DNA, "GCAAA"), Consensus = new Sequence(Alphabets.DNA, "GCAAA"), Score = 15, FirstOffset = 0, SecondOffset = 9 }); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "TTACC"), SecondSequence = new Sequence(Alphabets.DNA, "TTACC"), Consensus = new Sequence(Alphabets.DNA, "TTACC"), Score = 15, FirstOffset = 22, SecondOffset = 0 }); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
public void TestMUMmerAlignerMultipleMum() { string reference = "ATGCGCATCCCCTT"; string search = "GCGCCCCCTA"; Sequence referenceSeq = null; Sequence searchSeq = null; referenceSeq = new Sequence(Alphabets.DNA, reference); searchSeq = new Sequence(Alphabets.DNA, search); List<ISequence> searchSeqs = new List<ISequence>(); searchSeqs.Add(searchSeq); MUMmerAligner mummer = new MUMmerAligner(); mummer.LengthOfMUM = 4; mummer.PairWiseAlgorithm = new NeedlemanWunschAligner(); IList<IPairwiseSequenceAlignment> result = mummer.AlignSimple(referenceSeq, searchSeqs); // Check if output is not null Assert.AreNotEqual(null, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "ATGCGCATCCCCTT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "--GCGC--CCCCTA"); alignedSeq.Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "ATGCGCATCCCCTW"); alignedSeq.Score = -11; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 2; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void SmithWatermanAlignerMultipleAlignments1() { IPairwiseSequenceAligner sw = new SmithWatermanAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(5, -20), GapOpenCost = -5 }; ISequence sequence1 = new Sequence(Alphabets.DNA, "AAATTCCCAG"); ISequence sequence2 = new Sequence(Alphabets.DNA, "AAAGCCC"); IList<IPairwiseSequenceAlignment> result = sw.AlignSimple(sequence1, sequence2); AlignmentHelpers.LogResult(sw, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(sequence1, sequence2); // First alignment align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "AAA"), SecondSequence = new Sequence(Alphabets.DNA, "AAA"), Consensus = new Sequence(Alphabets.DNA, "AAA"), Score = 15, FirstOffset = 0, SecondOffset = 0 }); // Second alignment align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "CCC"), SecondSequence = new Sequence(Alphabets.DNA, "CCC"), Consensus = new Sequence(Alphabets.DNA, "CCC"), Score = 15, FirstOffset = 0, SecondOffset = 1 }); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
/// <summary> /// This method is considered as main execute method which defines the /// step by step algorithm. Derived class flows the defined flow by this /// method. /// </summary> /// <param name="referenceSequenceList">Reference sequence.</param> /// <param name="originalQuerySequences">List of input sequences.</param> /// <returns>A list of sequence alignment.</returns> private IEnumerable<IPairwiseSequenceAlignment> Alignment(IEnumerable<ISequence> referenceSequenceList, IEnumerable<ISequence> originalQuerySequences) { ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList.ElementAt(0).Alphabet); IEnumerable<ISequence> querySequenceList = ForwardOnly ? originalQuerySequences : (ReverseOnly ? ReverseComplementSequenceList(originalQuerySequences) : AddReverseComplementsToSequenceList(originalQuerySequences)); IList<IPairwiseSequenceAlignment> results = new List<IPairwiseSequenceAlignment>(); var deltas = new List<DeltaAlignment>(); foreach (ISequence refSequence in referenceSequenceList) { this.nucmerAlgo = new NUCmer(refSequence); if (GapOpenCost != DefaultGapOpenCost) this.nucmerAlgo.GapOpenCost = GapOpenCost; if (GapExtensionCost != DefaultGapExtensionCost) this.nucmerAlgo.GapExtensionCost = GapExtensionCost; if (LengthOfMUM != DefaultLengthOfMUM) this.nucmerAlgo.LengthOfMUM = LengthOfMUM; // Set the ClusterBuilder properties to defaults if (FixedSeparation != ClusterBuilder.DefaultFixedSeparation) this.nucmerAlgo.FixedSeparation = FixedSeparation; if (MaximumSeparation != ClusterBuilder.DefaultMaximumSeparation) this.nucmerAlgo.MaximumSeparation = MaximumSeparation; if (MinimumScore != ClusterBuilder.DefaultMinimumScore) this.nucmerAlgo.MinimumScore = MinimumScore; if (SeparationFactor != ClusterBuilder.DefaultSeparationFactor) this.nucmerAlgo.SeparationFactor = SeparationFactor; if (BreakLength != ModifiedSmithWaterman.DefaultBreakLength) this.nucmerAlgo.BreakLength = BreakLength; this.nucmerAlgo.ConsensusResolver = ConsensusResolver; if (SimilarityMatrix != null) this.nucmerAlgo.SimilarityMatrix = SimilarityMatrix; foreach (ISequence querySequence in querySequenceList) { // Check for parameters that would prevent an alignment from being returned. if (Math.Min(querySequence.Count, refSequence.Count) < MinimumScore) { var msg = "Bad parameter settings for NucmerPairwiseAligner. " + "Tried to align a reference of length " + refSequence.Count.ToString() + " to a sequence of length " + querySequence.Count.ToString() + " while requiring a minimum score of MinimumScore = " + MinimumScore + ". This will prevent any alignments from being returned."; throw new ArgumentException(msg); } IEnumerable<DeltaAlignment> deltaAlignment = this.nucmerAlgo.GetDeltaAlignments(querySequence, !MaxMatch, querySequence.IsMarkedAsReverseComplement()); deltas.AddRange(deltaAlignment); } } if (deltas.Count > 0) { ISequence concatReference = referenceSequenceList.ElementAt(0); //// concat all the sequences into one sequence if (referenceSequenceList.Count() > 1) { concatReference = ConcatSequence(referenceSequenceList); } foreach (ISequence querySequence in querySequenceList) { List<DeltaAlignment> qDelta = deltas.Where(d => d.QuerySequence.Equals(querySequence)).ToList(); IPairwiseSequenceAlignment sequenceAlignment = new PairwiseSequenceAlignment(concatReference, querySequence); // Convert delta alignments to sequence alignments IList<PairwiseAlignedSequence> alignments = ConvertDeltaToAlignment(qDelta); if (alignments.Count > 0) { foreach (PairwiseAlignedSequence align in alignments) { // Calculate the score of alignment align.Score = CalculateScore( align.FirstSequence, align.SecondSequence); // Make Consensus align.Consensus = MakeConsensus( align.FirstSequence, align.SecondSequence); sequenceAlignment.PairwiseAlignedSequences.Add(align); } } results.Add(sequenceAlignment); } } return results; }