public void SmithWatermanProteinSeqAffineGap() { IPairwiseSequenceAligner sw = new SmithWatermanAligner { SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62), GapOpenCost = -8, GapExtensionCost = -1, }; ISequence sequence1 = new Sequence(Alphabets.Protein, "HEAGAWGHEE"); ISequence sequence2 = new Sequence(Alphabets.Protein, "PAWHEAE"); IList<IPairwiseSequenceAlignment> result = sw.Align(sequence1, sequence2); AlignmentHelpers.LogResult(sw, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"), SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"), Consensus = new Sequence(Alphabets.AmbiguousProtein, "AWGHE"), Score = 20, FirstOffset = 0, SecondOffset = 3 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
/// <summary> /// Initializes static members of the SequenceAligners class. /// Static constructor /// </summary> static SequenceAligners() { NUCmer = new NucmerPairwiseAligner(); MUMmer = new MUMmerAligner(); NeedlemanWunsch = new NeedlemanWunschAligner(); SmithWaterman = new SmithWatermanAligner(); var knownAligners = new List<ISequenceAligner> { SmithWaterman, NeedlemanWunsch, MUMmer, NUCmer }; // Get the registered aligners IEnumerable<ISequenceAligner> registeredAligners = GetAligners(); if (null != registeredAligners) { knownAligners.AddRange(registeredAligners .Where(aligner => aligner != null && knownAligners.All(sa => string.Compare(sa.Name, aligner.Name, StringComparison.OrdinalIgnoreCase) != 0))); } All = knownAligners; }
/// <summary> /// Initializes static members of the SequenceAligners class. /// Static constructor /// </summary> static SequenceAligners() { NUCmer = new NucmerPairwiseAligner(); MUMmer = new MUMmerAligner(); NeedlemanWunsch = new NeedlemanWunschAligner(); SmithWaterman = new SmithWatermanAligner(); var knownAligners = new List <ISequenceAligner> { SmithWaterman, NeedlemanWunsch, MUMmer, NUCmer }; // Get the registered aligners IEnumerable <ISequenceAligner> registeredAligners = GetAligners(); if (null != registeredAligners) { knownAligners.AddRange(registeredAligners .Where(aligner => aligner != null && knownAligners.All(sa => string.Compare(sa.Name, aligner.Name, StringComparison.OrdinalIgnoreCase) != 0))); } All = knownAligners; }
public static List <ContinuousFrequencyIndelGenotype> CallIndelsFromPathCollection(DeBruijnPathList paths, DeBruijnGraph graph) { var sequences = paths.Paths.Select(z => new IndelData(z, graph.KmerLength)).Where(z => z.OkayData).ToList(); if (sequences.Count == 0) { return(new List <ContinuousFrequencyIndelGenotype>()); } // Get the reference sequence. var regionStart = sequences.Min(x => x.LikelyStart) - IndelPathCollection.AlignmentPadding - graph.KmerLength; var regionEnd = sequences.Max(x => x.LikelyEnd) + IndelPathCollection.AlignmentPadding + graph.KmerLength; var reference = HaploGrepSharp.ReferenceGenome.GetReferenceSequenceSection(regionStart, regionEnd); // Setup the aligner with appropriate parameters. var algo = new Bio.Algorithms.Alignment.SmithWatermanAligner(); algo.SimilarityMatrix = new Bio.SimilarityMatrices.DiagonalSimilarityMatrix(1, -1); algo.GapOpenCost = -2; algo.GapExtensionCost = -1; // Execute the alignment and go through and generate variants. Dictionary <IndelData, List <IndelLocation> > indels = new Dictionary <IndelData, List <IndelLocation> >(); foreach (var s in sequences) { //Note, do not change alignment order here. var aln = algo.Align(reference.Seq, s.Seq); var res = aln[0].PairwiseAlignedSequences[0]; var indels_locs = FindIndels(res); indels[s] = indels_locs; } // Now to group indels by unique starts and collect them. var locations = indels.Values.SelectMany(z => z).ToList().Distinct().GroupBy(z => z.Start); var toReturn = new List <ContinuousFrequencyIndelGenotype>(10); // Note: Typically there will only be one Indel location per alignment foreach (var g in locations) { var g2 = g.ToList(); var lspots = g2.Select(x => x.DeletionOnReference).Distinct().Count(); if (lspots > 1) { throw new NotImplementedException("Same location had indels present on both reference and reads. This is an edge case not handled"); } // Add a fake reference allele. var first = g2[0]; var no_indel = new IndelLocation(first.DeletionOnReference, first.Start, 0); no_indel.InsertedSequence = String.Empty; g2.Add(no_indel); //sort by location g2.Sort(); double[] counts = new double[g2.Count]; //now add counts from each. foreach (var s in sequences) { var cur = indels[s]; int index = 0; var atLoc = cur.Where(x => x.Start == first.Start).FirstOrDefault(); if (atLoc == null) { counts[index] += s.MinKmerCount; } else { bool found = false; for (int i = 1; i < g2.Count; i++) { if (g2[i].Equals(atLoc)) { counts[i] += s.MinKmerCount; found = true; break; } } if (!found) { throw new InvalidProgramException("Point should never be reached"); } } } var types = g2.Select(x => x.InsertedSequence).ToList(); var ref_loc = HaploGrepSharp.ReferenceGenome.ConvertTorCRSPosition(first.Start + regionStart); var indel_call = new ContinuousFrequencyIndelGenotype(first.DeletionOnReference, types, counts, ref_loc); toReturn.Add(indel_call); } return(toReturn); }
public void SmithWatermanAlignerMultipleAlignments1() { IPairwiseSequenceAligner sw = new SmithWatermanAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(5, -20), GapOpenCost = -5 }; ISequence sequence1 = new Sequence(Alphabets.DNA, "AAATTCCCAG"); ISequence sequence2 = new Sequence(Alphabets.DNA, "AAAGCCC"); IList<IPairwiseSequenceAlignment> result = sw.AlignSimple(sequence1, sequence2); AlignmentHelpers.LogResult(sw, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(sequence1, sequence2); // First alignment align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "AAA"), SecondSequence = new Sequence(Alphabets.DNA, "AAA"), Consensus = new Sequence(Alphabets.DNA, "AAA"), Score = 15, FirstOffset = 0, SecondOffset = 0 }); // Second alignment align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "CCC"), SecondSequence = new Sequence(Alphabets.DNA, "CCC"), Consensus = new Sequence(Alphabets.DNA, "CCC"), Score = 15, FirstOffset = 0, SecondOffset = 1 }); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
private void InValidateSmithWatermanAlignmentWithInvalidSimilarityMatrix(string nodeName, bool isTextFile, SimilarityMatrixInvalidTypes invalidType, AlignParameters additionalParameter, AlignmentType alignType) { Sequence aInput = null; Sequence bInput = null; ISequence inputSequence1; ISequence inputSequence2; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string firstInputFilepath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string secondInputFilepath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); // Parse the files and get the sequence. var parseObjectForFile1 = new FastAParser { Alphabet = alphabet }; inputSequence1 = parseObjectForFile1.Parse(firstInputFilepath).ElementAt(0); inputSequence2 = parseObjectForFile1.Parse(secondInputFilepath).ElementAt(0); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(new string(inputSequence1.Select(a => (char) a).ToArray()), new string(inputSequence2.Select(a => (char) a).ToArray()), alphabet, SequenceCaseType.LowerCase, out aInput, out bInput); } else { string firstInputSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string secondInputSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(firstInputSequence, secondInputSequence, alphabet, SequenceCaseType.LowerCase, out aInput, out bInput); } ApplicationLog.WriteLine(string.Concat( "SmithWatermanAligner P2 : First sequence used is '{0}'.", new string(aInput.Select(a => (char) a).ToArray()))); ApplicationLog.WriteLine(string.Concat( "SmithWatermanAligner P2 : Second sequence used is '{0}'.", new string(bInput.Select(a => (char) a).ToArray()))); // Create similarity matrix object for a invalid file. string blosumFilePath = this.GetSimilarityMatrixFileWithInvalidType(nodeName, invalidType); Exception actualExpection = null; // For invalid similarity matrix data format; exception will be thrown while instantiating SimilarityMatrix sm = null; try { if (invalidType != SimilarityMatrixInvalidTypes.NullSimilarityMatrix) { sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); } } catch (InvalidDataException ex) { actualExpection = ex; } // For non matching similarity matrix exception will be thrown while alignment if (actualExpection == null) { int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create SmithWatermanAligner instance and set its values. var smithWatermanObj = new SmithWatermanAligner(); if (additionalParameter != AlignParameters.AllParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; smithWatermanObj.GapExtensionCost = gapExtensionCost; } // Align the input sequences and catch the exception. switch (additionalParameter) { case AlignParameters.AlignList: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(new List<ISequence> {aInput, bInput}); } catch (ArgumentException ex) { actualExpection = ex; } break; default: try { smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput}); } catch (ArgumentException ex) { actualExpection = ex; } break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(aInput, bInput); } catch (ArgumentException ex) { actualExpection = ex; } break; default: try { smithWatermanObj.AlignSimple(aInput, bInput); } catch (ArgumentException ex) { actualExpection = ex; } break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); } catch (ArgumentException ex) { actualExpection = ex; } break; default: try { smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); } catch (ArgumentException ex) { actualExpection = ex; } break; } break; default: break; } } // Validate that expected exception is thrown using error message. string expectedErrorMessage = this.GetExpectedErrorMeesageWithInvalidSimilarityMatrixType(nodeName, invalidType); Assert.AreEqual(expectedErrorMessage, actualExpection.Message); ApplicationLog.WriteLine(string.Concat( "SmithWatermanAligner P2 : Expected Error message is thrown ", expectedErrorMessage)); }
private void InValidateSmithWatermanAlignmentWithInvalidSequence(string nodeName, bool isTextFile, InvalidSequenceType invalidSequenceType, AlignParameters additionalParameter, AlignmentType alignType, InvalidSequenceType sequenceType) { IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); Exception actualException = null; Sequence aInput = null; Sequence bInput = null; if (isTextFile) { // Read the xml file for getting both the files for aligning. string filepath = this.GetInputFileNameWithInvalidType(nodeName, invalidSequenceType); // Create input sequence for sequence string in different cases. try { // Parse the files and get the sequence. IEnumerable<ISequence> seqs = null; var parser = new FastAParser(); seqs = parser.Parse(filepath); aInput = new Sequence(alphabet, new string(seqs.ElementAt(0).Select(a => (char) a).ToArray())); } catch (Exception ex) { actualException = ex; } } else { string originalSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.InvalidSequence1); // Create input sequence for sequence string in different cases. try { aInput = new Sequence(alphabet, originalSequence); } catch (ArgumentException ex) { actualException = ex; } } if (actualException == null) { bInput = aInput; // Create similarity matrix object for a given file. string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); var sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create SmithWatermanAligner instance and set its values. var smithWatermanObj = new SmithWatermanAligner(); if (additionalParameter != AlignParameters.AllParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; smithWatermanObj.GapExtensionCost = gapExtensionCost; } // Align the input sequences and catch the exception. switch (additionalParameter) { case AlignParameters.AlignList: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(new List<ISequence> {aInput, bInput}); } catch (ArgumentException ex) { actualException = ex; } break; default: try { smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput}); } catch (ArgumentException ex) { actualException = ex; } break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(aInput, bInput); } catch (ArgumentException ex) { actualException = ex; } break; default: try { smithWatermanObj.AlignSimple(aInput, bInput); } catch (ArgumentException ex) { actualException = ex; } break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: try { smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); } catch (ArgumentException ex) { actualException = ex; } break; default: try { smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); } catch (ArgumentException ex) { actualException = ex; } break; } break; default: break; } } // Validate Error messages for Invalid Sequence types. string expectedErrorMessage = this.GetExpectedErrorMeesageWithInvalidSequenceType(nodeName, sequenceType); Assert.AreEqual(expectedErrorMessage, actualException.Message); ApplicationLog.WriteLine(string.Concat( "SmithWatermanAligner P2 : Expected Error message is thrown ", expectedErrorMessage)); }
public void SmithWatermanGotohAlignSequenceWithMultiplePossibleIndelPositions() { var longRef = "TGACCCCGAGGGGGCCCGGGGCCGCGTCCCTGGGCCCTCCCCA"; var longHapOrg = "TGACCCCGAGGG---CCGGG--------------CCCTCCCCA"; var longHap = longHapOrg.Replace("-", ""); var lr = new Sequence(DnaAlphabet.Instance, longRef); var lh = new Sequence(DnaAlphabet.Instance, longHap); var al = new SmithWatermanAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(20, -15), GapOpenCost = -26, GapExtensionCost = -1 }; var res = al.Align(lr, lh); var results = res.First(); var alignedSeq = results.AlignedSequences[0].Sequences[1].ToString(); Assert.AreEqual(alignedSeq, longHapOrg); }
private void ValidateSmithWatermanAlignment(string nodeName, bool isTextFile, SequenceCaseType caseType, AlignParameters additionalParameter, AlignmentType alignType, SimilarityMatrixParameters similarityMatrixParam) { Sequence aInput, bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); // Parse the files and get the sequence. ISequence originalSequence1 = null; ISequence originalSequence2 = null; var parseObjectForFile1 = new FastAParser { Alphabet = alphabet }; originalSequence1 = parseObjectForFile1.Parse(filePath1).ElementAt(0); originalSequence2 = parseObjectForFile1.Parse(filePath2).ElementAt(0); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(originalSequence1.ConvertToString(), originalSequence2.ConvertToString(), alphabet, caseType, out aInput, out bInput); } else { string originalSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string originalSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType( originalSequence1, originalSequence2, alphabet, caseType, out aInput, out bInput); } ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : First sequence used is '{0}'.", aInput.ConvertToString())); ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : Second sequence used is '{0}'.", bInput.ConvertToString())); // Create similarity matrix object for a given file. string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create SmithWatermanAligner instance and set its values. var smithWatermanObj = new SmithWatermanAligner(); if (additionalParameter != AlignParameters.AllParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; smithWatermanObj.GapExtensionCost = gapExtensionCost; } IList<IPairwiseSequenceAlignment> result = null; // Align the input sequences. switch (additionalParameter) { case AlignParameters.AlignList: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(new List<ISequence> {aInput, bInput}); break; default: result = smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput}); break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(aInput, bInput); break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } // Get the expected sequence and scorde from xml config. string expectedSequence1, expectedSequence2, expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); switch (caseType) { case SequenceCaseType.LowerCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence1InLower); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence2InLower); break; default: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence2Node); break; } break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); switch (caseType) { case SequenceCaseType.LowerCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence1inLowerNode); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence2inLowerNode); break; case SequenceCaseType.LowerUpperCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence1inLowerNode); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; default: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } break; } // Match the alignment result with expected result. IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequence1), SecondSequence = new Sequence(alphabet, expectedSequence2), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned Second Sequence is '{0}'.", expectedSequence2)); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void SmithWatermanThrowsExceptionWhenTooLarge() { // What size squared is too large? int seq_size = (int)Math.Sqrt ((double)Int32.MaxValue) + 5; byte[] seq = new byte[seq_size]; // Now let's generate sequences of those size for (int i = 0; i < seq.Length; i++) { seq [i] = (byte)'A'; } var seq1 = new Sequence (DnaAlphabet.Instance, seq, false); var na = new SmithWatermanAligner (); Assert.Throws(typeof(ArgumentOutOfRangeException), () => na.Align(seq1, seq1)); }
private void ValidateSmithWatermanAlignment(string nodeName, AlignParameters alignParam, SimilarityMatrixParameters similarityMatrixParam, AlignmentType alignType) { ISequence aInput, bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); // Parse the files and get the sequence. if (alignParam.ToString().Contains("Code")) { string sequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, sequence1); bInput = new Sequence(alphabet, sequence2); } else { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); var parseObjectForFile1 = new FastAParser { Alphabet = alphabet }; ISequence originalSequence1 = parseObjectForFile1.Parse(filePath1).FirstOrDefault(); Assert.IsNotNull(originalSequence1); aInput = new Sequence(alphabet, originalSequence1.ConvertToString()); var parseObjectForFile2 = new FastAParser { Alphabet = alphabet }; ISequence originalSequence2 = parseObjectForFile2.Parse(filePath2).FirstOrDefault(); Assert.IsNotNull(originalSequence2); bInput = new Sequence(alphabet, originalSequence2.ConvertToString()); } string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); var smithWatermanObj = new SmithWatermanAligner(); if (AlignParameters.AllParam != alignParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; } IList<IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignParameters.AlignList: case AlignParameters.AlignListCode: var sequences = new List<ISequence> {aInput, bInput}; switch (alignType) { case AlignmentType.Align: smithWatermanObj.GapExtensionCost = gapExtensionCost; result = smithWatermanObj.Align(sequences); break; default: result = smithWatermanObj.AlignSimple(sequences); break; } break; case AlignParameters.AllParam: case AlignParameters.AllParamCode: switch (alignType) { case AlignmentType.Align: smithWatermanObj.GapExtensionCost = gapExtensionCost; result = smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; case AlignParameters.AlignTwo: case AlignParameters.AlignTwoCode: switch (alignType) { case AlignmentType.Align: smithWatermanObj.GapExtensionCost = gapExtensionCost; result = smithWatermanObj.Align(aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(aInput, bInput); break; } break; default: break; } // Read the xml file for getting both the files for aligning. string expectedSequence1, expectedSequence2, expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequence1), SecondSequence = new Sequence(alphabet, expectedSequence2), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P1 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P1 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P1 : Aligned Second Sequence is '{0}'.", expectedSequence2)); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
private void ValidateSmithWatermanAlignment(bool isTextFile, AlignmentParamType alignParam, AlignmentType alignType) { ISequence aInput, bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.FilePathNode2); // Parse the files and get the sequence. var parseObjectForFile1 = new FastAParser(); { parseObjectForFile1.Alphabet = alphabet; aInput = parseObjectForFile1.Parse(filePath1).First(); } var parseObjectForFile2 = new FastAParser(); { parseObjectForFile2.Alphabet = alphabet; bInput = parseObjectForFile2.Parse(filePath2).First(); } } else { // Read the xml file for getting both the files for aligning. string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, origSequence1); bInput = new Sequence(alphabet, origSequence2); } string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.BlosumFilePathNode); var sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); int gapOpenCost = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.GapExtensionCostNode), null); var smithWatermanObj = new SmithWatermanAligner(); if (AlignmentParamType.AllParam != alignParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; } IList<IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignmentParamType.AlignList: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(new List<ISequence> {aInput, bInput}); break; default: result = smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput}); break; } break; case AlignmentParamType.AlignTwo: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(aInput, bInput); break; } break; case AlignmentParamType.AllParam: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } // Read the xml file for getting both the files for aligning. string expectedSequence1, expectedSequence2, expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue( Constants.SmithWatermanAlignAlgorithmNodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue( Constants.SmithWatermanAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue( Constants.SmithWatermanAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue( Constants.SmithWatermanAlignAlgorithmNodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue( Constants.SmithWatermanAlignAlgorithmNodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue( Constants.SmithWatermanAlignAlgorithmNodeName, Constants.ExpectedSequenceNode2); break; } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequence1), SecondSequence = new Sequence(alphabet, expectedSequence2), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner BVT : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner BVT : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner BVT : Aligned Second Sequence is '{0}'.", expectedSequence2)); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }