public static void TestExceptionThrownForUnclippedAlignment() { var refseq = "ACAATATA"; var queryseq = "ACAATAT-"; var r = new Sequence(DnaAlphabet.Instance, refseq); var q = new Sequence(DnaAlphabet.Instance, queryseq); var aln = new PairwiseSequenceAlignment(r, q); var pas = new PairwiseAlignedSequence(); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add(pas); Assert.Throws <FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants(aln, true)); refseq = "AAACAATATA"; queryseq = "AA-CAATATA"; r = new Sequence(DnaAlphabet.Instance, refseq); q = new Sequence(DnaAlphabet.Instance, queryseq); aln = new PairwiseSequenceAlignment(r, q); pas = new PairwiseAlignedSequence(); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add(pas); Assert.Throws <FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants(aln, true)); }
public void SmithWatermanProteinSeqAffineGap() { IPairwiseSequenceAligner sw = new SmithWatermanAligner { SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62), GapOpenCost = -8, GapExtensionCost = -1, }; ISequence sequence1 = new Sequence(Alphabets.Protein, "HEAGAWGHEE"); ISequence sequence2 = new Sequence(Alphabets.Protein, "PAWHEAE"); IList <IPairwiseSequenceAlignment> result = sw.Align(sequence1, sequence2); AlignmentHelpers.LogResult(sw, result); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"), SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"), Consensus = new Sequence(Alphabets.AmbiguousProtein, "AWGHE"), Score = 20, FirstOffset = 0, SecondOffset = 3 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
public void TestNUCmer3MultipleReferences() { Sequence referenceSeq = null; Sequence searchSeq = null; List <ISequence> referenceSeqs = null; List <ISequence> searchSeqs = null; referenceSeqs = new List <ISequence>(); string reference = "ATGCGCATCCCC"; referenceSeq = new Sequence(Alphabets.DNA, reference); referenceSeq.ID = "R1"; referenceSeqs.Add(referenceSeq); reference = "TAGCT"; referenceSeq = new Sequence(Alphabets.DNA, reference); referenceSeq.ID = "R11"; referenceSeqs.Add(referenceSeq); searchSeqs = new List <ISequence>(); string search = "CCGCGCCCCCTCAGCT"; searchSeq = new Sequence(Alphabets.DNA, search); searchSeq.ID = "Q1"; searchSeqs.Add(searchSeq); NucmerPairwiseAligner nucmer = new NucmerPairwiseAligner(); nucmer.FixedSeparation = 0; nucmer.MinimumScore = 2; nucmer.SeparationFactor = -1; nucmer.LengthOfMUM = 3; IList <IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList(); // Check if output is not null Assert.AreNotEqual(null, result); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "GCGCATCCCC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GCGC--CCCC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "GCGCATCCCC"); alignedSeq.Score = -5; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "AGCT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "AGCT"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "AGCT"); alignedSeq.Score = 12; alignedSeq.FirstOffset = 11; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// Преобразовать <paramref name="alignedSequence"/> в экземпляр <see cref="AlignmentResult"/>. /// </summary> public static AlignmentResult ToAlignmentResult(this PairwiseAlignedSequence alignedSequence, TransitionWeights transitionWeights) { var indices = alignedSequence.FirstSequence .Zip(alignedSequence.SecondSequence) .Select((tuple, index) => (Bytes: tuple, Index: (uint)index)) .Where(tuple => tuple.Bytes.First != tuple.Bytes.Second) .Select(tuple => tuple.Index) .ToArray(); var alignedStrings = new AlignedStrings( alignedSequence.FirstSequence.ToFullString(), alignedSequence.SecondSequence.ToFullString(), indices); var score = alignedSequence.CalculateScore(transitionWeights); var replacementCount = alignedSequence.FirstSequence .Zip(alignedSequence.SecondSequence) .Count(tuple => tuple.First != '-' && tuple.Second != '-' && tuple.First != tuple.Second); var indelCount = alignedSequence.FirstSequence .Concat(alignedSequence.SecondSequence) .Count(byteValue => byteValue == '-'); return(new AlignmentResult(alignedStrings, score, (uint)replacementCount, (uint)indelCount)); }
private void ValidateMUMmerAlignGeneralTestCases(string nodeName) { // Gets the reference sequence from the configuration file string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode).TestDir(); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer P2 : Successfully validated the File Path '{0}'.", filePath)); var fastaParserObj = new FastAParser(); IEnumerable <ISequence> referenceSeqs = fastaParserObj.Parse(filePath); ISequence referenceSeq = referenceSeqs.ElementAt(0); // Gets the reference sequence from the configuration file string queryFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode).TestDir(); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer P2 : Successfully validated the Search File Path '{0}'.", queryFilePath)); var fastaParserObj1 = new FastAParser(); IEnumerable <ISequence> querySeqs = fastaParserObj1.Parse(queryFilePath); string mumLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); var mum = new MUMmerAligner { LengthOfMUM = long.Parse(mumLength, null), StoreMUMs = true, PairWiseAlgorithm = new NeedlemanWunschAligner(), GapOpenCost = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null) }; IList <IPairwiseSequenceAlignment> align = mum.Align(referenceSeq, querySeqs); // Validate FinalMUMs and MUMs Properties. Assert.IsNotNull(mum.MUMs); string expectedScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); string[] expectedSequences = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(AlignmentHelpers.CompareAlignment(align, expectedOutput)); ApplicationLog.WriteLine("MUMmer P2 : Successfully validated the aligned sequences."); }
public void SmithWatermanAlignerMultipleAlignments2() { Sequence sequence1 = new Sequence(Alphabets.DNA, "AAAAGGGGGGCCCC"); Sequence sequence2 = new Sequence(Alphabets.DNA, "AAAATTTTTTTCCCC"); SimilarityMatrix sm = new DiagonalSimilarityMatrix(5, -4); int gapPenalty = -10; SmithWatermanAligner sw = new SmithWatermanAligner(); sw.SimilarityMatrix = sm; sw.GapOpenCost = gapPenalty; IList <IPairwiseSequenceAlignment> result = sw.AlignSimple(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Simple; Matrix {1}; GapOpenCost {2}", sw.Name, sw.SimilarityMatrix.Name, sw.GapOpenCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(sequence1, sequence2); // First alignment PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "AAAA"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "AAAA"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "AAAA"); alignedSeq.Score = 20; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); // Second alignment alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "CCCC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CCCC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "CCCC"); alignedSeq.Score = 20; alignedSeq.FirstOffset = 1; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void TestNUCmer3SingleCluster() { string reference = "AGAAAAGTTTTCA"; string search = "TTTTGAGATAAAATC"; Sequence referenceSeq = null; Sequence searchSeq = null; List <ISequence> referenceSeqs = null; List <ISequence> searchSeqs = null; referenceSeq = new Sequence(Alphabets.DNA, reference); referenceSeq.ID = "R1"; searchSeq = new Sequence(Alphabets.DNA, search); searchSeq.ID = "Q1"; referenceSeqs = new List <ISequence>(); referenceSeqs.Add(referenceSeq); searchSeqs = new List <ISequence>(); searchSeqs.Add(searchSeq); NucmerPairwiseAligner nucmer = new NucmerPairwiseAligner(); nucmer.FixedSeparation = 0; nucmer.MinimumScore = 2; nucmer.SeparationFactor = -1; nucmer.LengthOfMUM = 3; nucmer.ForwardOnly = true; IList <IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList(); // Check if output is not null Assert.AreNotEqual(null, result); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "AG--AAAA"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "AGATAAAA"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "AGATAAAA"); alignedSeq.Score = -11; alignedSeq.FirstOffset = 5; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "TTTT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "TTTT"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "TTTT"); alignedSeq.Score = 12; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 7; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
public void PairwiseOverlapProteinSeqAffineGapUseEarth() { string sequenceString1 = "HEAGAWGHEE"; string sequenceString2 = "PAWHEAE"; Sequence sequence1 = new Sequence(Alphabets.Protein, sequenceString1); Sequence sequence2 = new Sequence(Alphabets.Protein, sequenceString2); SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); int gapPenalty = -8; PairwiseOverlapAligner overlap = new PairwiseOverlapAligner(); overlap.SimilarityMatrix = sm; overlap.GapOpenCost = gapPenalty; overlap.UseEARTHToFillMatrix = true; overlap.GapExtensionCost = -1; IList <IPairwiseSequenceAlignment> result = overlap.Align(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Affine; Matrix {1}; GapOpenCost {2}; GapExtenstionCost {3}", overlap.Name, overlap.SimilarityMatrix.Name, overlap.GapOpenCost, overlap.GapExtensionCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "GAWGHEE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "PAW-HEA"); alignedSeq.Consensus = new Sequence(Alphabets.AmbiguousProtein, "XAWGHEX"); alignedSeq.Score = 25; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void SmithWatermanProteinSeqSimpleGap() { string sequenceString1 = "HEAGAWGHEE"; string sequenceString2 = "PAWHEAE"; Sequence sequence1 = new Sequence(Alphabets.Protein, sequenceString1); Sequence sequence2 = new Sequence(Alphabets.Protein, sequenceString2); SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); int gapPenalty = -8; SmithWatermanAligner sw = new SmithWatermanAligner(); sw.SimilarityMatrix = sm; sw.GapOpenCost = gapPenalty; IList <IPairwiseSequenceAlignment> result = sw.AlignSimple(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Simple; Matrix {1}; GapOpenCost {2}", sw.Name, sw.SimilarityMatrix.Name, sw.GapOpenCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"); alignedSeq.Consensus = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.Score = 28; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void NeedlemanWunschProteinSeqAffineGap() { string sequenceString1 = "HEAGAWGHEE"; string sequenceString2 = "PAWHEAE"; Sequence sequence1 = new Sequence(Alphabets.Protein, sequenceString1); Sequence sequence2 = new Sequence(Alphabets.Protein, sequenceString2); SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); int gapPenalty = -8; NeedlemanWunschAligner nw = new NeedlemanWunschAligner(); nw.SimilarityMatrix = sm; nw.GapOpenCost = gapPenalty; nw.GapExtensionCost = -1; IList <IPairwiseSequenceAlignment> result = nw.Align(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Affine; Matrix {1}; GapOpenCost {2}; GapExtenstionCost {3}", nw.Name, nw.SimilarityMatrix.Name, nw.GapOpenCost, nw.GapExtensionCost)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", result[0].PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", result[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", result[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", result[0].PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", result[0].PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", result[0].PairwiseAlignedSequences[0].Consensus)); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "HEAGAWGHE-E"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "---PAW-HEAE"); alignedSeq.Consensus = new Sequence(AmbiguousProteinAlphabet.Instance, "HEAXAWGHEAE"); alignedSeq.Score = 14; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void ValidatePairwiseAlignedSequenceToString() { var alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"); alignedSeq.Consensus = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.Score = 28; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; string actualString = alignedSeq.ToString(); string expectedString = "AWGHE\r\nAWGHE\r\nAW-HE\r\n".Replace("\r\n", System.Environment.NewLine); Assert.AreEqual(actualString, expectedString); }
public void TestPairwiseAlignedSequenceToString() { PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"); alignedSeq.Consensus = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.Score = 28; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; string actualString = alignedSeq.ToString(); string expectedString = "AWGHE\r\nAWGHE\r\nAW-HE\r\n"; Assert.AreEqual(actualString, expectedString); }
public static void TestLeftAlignmentStep() { var refseq = "ACAATAAAAGCGCGCGCGCGTTACGTATAT--ATGGATAT"; var queryseq = "ACAATAA-AGC--GCGC--GTTACGTATATATATGGATAT"; var r = new Sequence(DnaAlphabet.Instance, refseq); var q = new Sequence(DnaAlphabet.Instance, queryseq); var aln = new PairwiseSequenceAlignment(r, q); var pas = new PairwiseAlignedSequence(); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add(pas); var tpl = VariantCaller.LeftAlignIndelsAndCallVariants(aln, true); // Check the left alignment aln = tpl.Item1 as PairwiseSequenceAlignment; var lar = aln.PairwiseAlignedSequences [0].FirstSequence.ConvertToString(); var laq = aln.PairwiseAlignedSequences [0].SecondSequence.ConvertToString(); var exprefseq = "ACAATAAAAGCGCGCGCGCGTTACG--TATATATGGATAT"; var expqueryseq = "ACAAT-AAA----GCGCGCGTTACGTATATATATGGATAT"; Assert.AreEqual(exprefseq, lar); Assert.AreEqual(expqueryseq, laq); // And it's hard, so we might as well check the variants var variants = tpl.Item2; Assert.AreEqual(3, variants.Count); string[] bases = new string[] { "A", "GCGC", "TA" }; char[] hpbases = new char[] { 'A', 'G', 'T' }; bool[] inHp = new bool[] { true, false, false }; int[] lengths = new int[] { 1, 4, 2 }; int[] starts = new int[] { 4, 8, 24 }; IndelType[] types = new IndelType[] { IndelType.Deletion, IndelType.Deletion, IndelType.Insertion }; for (int i = 0; i < 3; i++) { Assert.AreEqual(VariantType.INDEL, variants [i].Type); var vi = variants [i] as IndelVariant; Assert.AreEqual(hpbases[i], vi.HomopolymerBase); Assert.AreEqual(starts [i], vi.StartPosition); Assert.AreEqual(lengths [i], vi.Length); Assert.AreEqual(bases [i], vi.InsertedOrDeletedBases); Assert.AreEqual(inHp [i], vi.InHomopolymer); Assert.AreEqual(types [i], vi.InsertionOrDeletion); } }
public void TestSequenceAlignmentWithBinaryFormatter() { Stream stream = null; try { stream = File.Open("SequenceAlignment.data", FileMode.Create); BinaryFormatter formatter = new BinaryFormatter(); Sequence seq1 = new Sequence(Alphabets.DNA, "ACGACTTACG"); Sequence seq2 = new Sequence(Alphabets.DNA, "TACGATCCGGAAA"); Sequence seq3 = new Sequence(Alphabets.DNA, "ACGACTTACGATCCGGAAA"); PairwiseAlignedSequence seqAlignment = new PairwiseAlignedSequence(); seqAlignment.FirstSequence = seq1; seqAlignment.SecondSequence = seq2; seqAlignment.Score = 30; seqAlignment.Consensus = seq3; PairwiseSequenceAlignment alignment = new PairwiseSequenceAlignment(); alignment.PairwiseAlignedSequences.Add(seqAlignment); alignment.Documentation = "Aligned seq1 and seq2"; formatter.Serialize(stream, alignment); stream.Seek(0, SeekOrigin.Begin); PairwiseSequenceAlignment deserializedseqAlignment = (PairwiseSequenceAlignment)formatter.Deserialize(stream); Assert.AreNotSame(alignment, deserializedseqAlignment); Assert.AreEqual(alignment.PairwiseAlignedSequences[0].Consensus.ToString(), deserializedseqAlignment.PairwiseAlignedSequences[0].Consensus.ToString()); Assert.AreEqual(alignment.Documentation, deserializedseqAlignment.Documentation); Assert.AreEqual(alignment.IsReadOnly, deserializedseqAlignment.IsReadOnly); Assert.AreEqual(alignment.PairwiseAlignedSequences[0].Score, deserializedseqAlignment.PairwiseAlignedSequences[0].Score); Assert.AreEqual(alignment.PairwiseAlignedSequences.Count, deserializedseqAlignment.PairwiseAlignedSequences.Count); Assert.AreEqual(alignment.PairwiseAlignedSequences[0].FirstSequence.ToString(), deserializedseqAlignment.PairwiseAlignedSequences[0].FirstSequence.ToString()); Assert.AreEqual(alignment.PairwiseAlignedSequences[0].SecondSequence.ToString(), deserializedseqAlignment.PairwiseAlignedSequences[0].SecondSequence.ToString()); } catch (Exception) { Assert.Fail(); } finally { if (stream != null) { stream.Close(); stream = null; } } }
public void ValidatePairwiseSequenceAlignmentToString() { IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"); alignedSeq.Consensus = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.Score = 28; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); string actualString = align.ToString(); string expectedString = "AWGHE\r\nAWGHE\r\nAW-HE\r\n\r\n"; Assert.AreEqual(actualString, expectedString); }
public void TestPairwiseSequenceAlignmentToString() { IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"); alignedSeq.Consensus = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.Score = 28; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); string actualString = align.ToString(); string expectedString = "AWGHE\r\nAWGHE\r\nAW-HE\r\n\r\n".Replace("\r\n", Environment.NewLine); Assert.AreEqual(actualString, expectedString); }
/// <summary> /// Посчитать значение весовой функции пары выровненных последовательностей. /// </summary> public static long CalculateScore(this PairwiseAlignedSequence alignedSequence, TransitionWeights transitionWeights) { long CalculateScorePerPair(byte leftByte, byte rightByte) { if (leftByte == rightByte) { return(transitionWeights.MatchBonus); } if (leftByte == '-' || rightByte == '-') { return(transitionWeights.IndelPenalty); } return(transitionWeights.MismatchPenalty); } long totalScore = 0; var isFirstIndel = true; foreach (var(leftByte, rightByte) in alignedSequence.FirstSequence.Zip(alignedSequence.SecondSequence)) { var scorePerPair = CalculateScorePerPair(leftByte, rightByte); if (leftByte != '-' && rightByte != '-') { isFirstIndel = true; } if (isFirstIndel && (leftByte == '-' || rightByte == '-') && transitionWeights.GapOpeningPenalty.HasValue) { isFirstIndel = false; totalScore += transitionWeights.GapOpeningPenalty.Value; } totalScore += scorePerPair; } return(totalScore); }
public void TestMUMmerAlignerSingleMumRNA() { string reference = "AUGCUUUUCCCCCCC"; string search = "UAUAUUUUGG"; Sequence referenceSeq = null; Sequence searchSeq = null; List <ISequence> searchSeqs = null; referenceSeq = new Sequence(Alphabets.RNA, reference); searchSeq = new Sequence(Alphabets.RNA, search); searchSeqs = new List <ISequence>(); searchSeqs.Add(searchSeq); MUMmerAligner mummer = new MUMmerAligner(); mummer.LengthOfMUM = 3; mummer.PairWiseAlgorithm = new NeedlemanWunschAligner(); mummer.SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); mummer.GapOpenCost = -8; mummer.GapExtensionCost = -2; IList <IPairwiseSequenceAlignment> result = mummer.Align(referenceSeq, searchSeqs); // Check if output is not null Assert.AreNotEqual(0, result.Count); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.RNA, "-AUGCUUUUCCCCCCC"); alignedSeq.SecondSequence = new Sequence(Alphabets.RNA, "UAU-AUUUU-----GG"); alignedSeq.Consensus = new Sequence(AmbiguousRnaAlphabet.Instance, "UAUGMUUUUCCCCCSS"); alignedSeq.Score = -14; alignedSeq.FirstOffset = 1; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void NeedlemanWunschDnaSeqSimpleGap() { Sequence sequence1 = new Sequence(Alphabets.DNA, "GAATTCAGTTA"); Sequence sequence2 = new Sequence(Alphabets.DNA, "GGATCGA"); SimilarityMatrix sm = new DiagonalSimilarityMatrix(2, -1); int gapPenalty = -2; NeedlemanWunschAligner nw = new NeedlemanWunschAligner(); nw.SimilarityMatrix = sm; nw.GapOpenCost = gapPenalty; IList <IPairwiseSequenceAlignment> result = nw.AlignSimple(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Simple; Matrix {1}; GapOpenCost {2}", nw.Name, nw.SimilarityMatrix.Name, nw.GapOpenCost)); ApplicationLog.WriteLine(string.Format( (IFormatProvider)null, "score {0}", result[0].PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format( (IFormatProvider)null, "input 0 {0}", result[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", result[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", result[0].PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", result[0].PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", result[0].PairwiseAlignedSequences[0].Consensus)); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "GAATTCAGTTA"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GGA-TC-G--A"); alignedSeq.Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "GRATTCAGTTA"); alignedSeq.Score = 3; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void TestMUMmerAlignerSingleMum() { const string reference = "TTAATTTTAG"; const string search = "AGTTTAGAG"; ISequence referenceSeq = new Sequence(Alphabets.DNA, reference); ISequence searchSeq = new Sequence(Alphabets.DNA, search); var searchSeqs = new List <ISequence> { searchSeq }; MUMmerAligner mummer = new MUMmerAligner { LengthOfMUM = 3, PairWiseAlgorithm = new NeedlemanWunschAligner(), GapExtensionCost = -2 }; IList <IPairwiseSequenceAlignment> result = mummer.Align(referenceSeq, searchSeqs); // Check if output is not null Assert.AreNotEqual(null, result); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "TTAATTTTAG--"), SecondSequence = new Sequence(Alphabets.DNA, "---AGTTTAGAG"), Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "TTAAKTTTAGAG"), Score = 5, FirstOffset = 0, SecondOffset = 3 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
private static List <IndelLocation> LeftAlignIndels(PairwiseAlignedSequence aln, List <IndelLocation> curIndels) { var haplo = aln.SecondSequence; var refer = aln.FirstSequence; var toReturn = new List <IndelLocation>(); foreach (var indel in curIndels) { int start = indel.Start; int len = indel.Length; ISequence deleted, notDeleted; if (indel.DeletionOnReference) { deleted = refer; notDeleted = haplo; } else { deleted = haplo; notDeleted = refer; } while ((start + len) < notDeleted.Count && start > 0) { //is a ,a bool matchBefore = notDeleted[start - 1] == deleted [start - 1]; bool matchAfterShift = deleted[start - 1] == notDeleted[start + len - 1]; if (matchAfterShift && matchBefore) { indel.Start--; start--; } else { break; } } } return(curIndels); }
public void TestMUMmerAlignerMultipleMum() { string reference = "ATGCGCATCCCCTT"; string search = "GCGCCCCCTA"; Sequence referenceSeq = null; Sequence searchSeq = null; referenceSeq = new Sequence(Alphabets.DNA, reference); searchSeq = new Sequence(Alphabets.DNA, search); List <ISequence> searchSeqs = new List <ISequence>(); searchSeqs.Add(searchSeq); MUMmerAligner mummer = new MUMmerAligner(); mummer.LengthOfMUM = 4; mummer.PairWiseAlgorithm = new NeedlemanWunschAligner(); IList <IPairwiseSequenceAlignment> result = mummer.AlignSimple(referenceSeq, searchSeqs); // Check if output is not null Assert.AreNotEqual(null, result); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "ATGCGCATCCCCTT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "--GCGC--CCCCTA"); alignedSeq.Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "ATGCGCATCCCCTW"); alignedSeq.Score = -11; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 2; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void SequenceAlignmentAddSequence() { // Read the xml file for getting both the files for aligning. string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignAlgorithmNodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignAlgorithmNodeName, Constants.SequenceNode2); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AlignAlgorithmNodeName, Constants.AlphabetNameNode)); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : First sequence used is '{0}'.", origSequence1)); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : Second sequence used is '{0}'.", origSequence2)); // Create two sequences ISequence aInput = new Sequence(alphabet, origSequence1); ISequence bInput = new Sequence(alphabet, origSequence2); // Add the sequences to the Sequence alignment object using AddSequence() method. IList <IPairwiseSequenceAlignment> sequenceAlignmentObj = new List <IPairwiseSequenceAlignment>(); var alignSeq = new PairwiseAlignedSequence { FirstSequence = aInput, SecondSequence = bInput }; IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(); seqAlignObj.Add(alignSeq); sequenceAlignmentObj.Add(seqAlignObj); // Read the output back and validate the same. IList <PairwiseAlignedSequence> newAlignedSequences = sequenceAlignmentObj[0].PairwiseAlignedSequences; ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : First sequence read is '{0}'.", origSequence1)); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : Second sequence read is '{0}'.", origSequence2)); Assert.AreEqual(newAlignedSequences[0].FirstSequence.ConvertToString(), origSequence1); Assert.AreEqual(newAlignedSequences[0].SecondSequence.ConvertToString(), origSequence2); }
void ValidatePairwiseOverlapAlignment(bool isTextFile, AlignmentParamType alignParam, AlignmentType alignType) { ISequence aInput = null; ISequence bInput = null; IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.FilePathNode1); string filePath2 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.FilePathNode2); //Parse the files and get the sequence. using (FastAParser parser1 = new FastAParser(filePath1)) { parser1.Alphabet = alphabet; aInput = parser1.Parse().ElementAt(0); } using (FastAParser parser2 = new FastAParser(filePath2)) { parser2.Alphabet = alphabet; bInput = parser2.Parse().ElementAt(0); } } else { // Read the xml file for getting both the files for aligning. string origSequence1 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.SequenceNode1); string origSequence2 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, origSequence1); bInput = new Sequence(alphabet, origSequence2); } string aInputString = new string(aInput.Select(a => (char)a).ToArray()); string bInputString = new string(bInput.Select(a => (char)a).ToArray()); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : First sequence used is '{0}'.", aInputString)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Second sequence used is '{0}'.", bInputString)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : First sequence used is '{0}'.", aInputString)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Second sequence used is '{0}'.", bInputString)); string blosumFilePath = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm = new SimilarityMatrix(blosumFilePath); int gapOpenCost = int.Parse(utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.GapOpenCostNode), (IFormatProvider)null); int gapExtensionCost = int.Parse(utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.GapExtensionCostNode), (IFormatProvider)null); PairwiseOverlapAligner pairwiseOverlapObj = new PairwiseOverlapAligner(); if (AlignmentParamType.AllParam != alignParam) { pairwiseOverlapObj.SimilarityMatrix = sm; pairwiseOverlapObj.GapOpenCost = gapOpenCost; } IList <IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignmentParamType.AlignList: List <ISequence> sequences = new List <ISequence>(); sequences.Add(aInput); sequences.Add(bInput); switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sequences); break; default: result = pairwiseOverlapObj.AlignSimple(sequences); break; } break; case AlignmentParamType.AlignTwo: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(aInput, bInput); break; } break; case AlignmentParamType.AllParam: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } pairwiseOverlapObj = null; aInput = null; bInput = null; sm = null; // Read the xml file for getting both the files for aligning. string expectedSequence1 = string.Empty; string expectedSequence2 = string.Empty; string expectedScore = string.Empty; aInput = null; bInput = null; sm = null; switch (alignType) { case AlignmentType.Align: expectedScore = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedScoreNode); expectedSequence1 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedSequenceNode2); break; } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); string[] expectedSequences1, expectedSequences2; char[] seperators = new char[1] { ';' }; expectedSequences1 = expectedSequence1.Split(seperators); expectedSequences2 = expectedSequence2.Split(seperators); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq; for (int i = 0; i < expectedSequences1.Length; i++) { alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(alphabet, expectedSequences1[i]); alignedSeq.SecondSequence = new Sequence(alphabet, expectedSequences2[i]); alignedSeq.Score = Convert.ToInt32(expectedScore, (IFormatProvider)null); align.PairwiseAlignedSequences.Add(alignedSeq); } expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Aligned Second Sequence is '{0}'.", expectedSequence2)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Final Score '{0}'.", expectedScore)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Aligned First Sequence is '{0}'.", expectedSequence1)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Aligned Second Sequence is '{0}'.", expectedSequence2)); }
/// <summary> /// Validate Sequence Alignment Class General methods exception. /// </summary> /// <param name="nodeName">Node Name in the xml.</param> /// <param name="methodName">Name of the SequenceAlignment method to be validated</param> /// </summary> static void InValidateSequenceAlignmentGeneralMethods(string nodeName, SeqAlignmentMethods methodName) { // Read the xml file for getting both the files for aligning. string origSequence1 = Utility._xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string origSequence2 = Utility._xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); IAlphabet alphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); string readOnlyException = Utility._xmlUtil.GetTextValue(nodeName, Constants.ReadOnlyExceptionNode); string expectedGetObjectDataException = Utility._xmlUtil.GetTextValue(nodeName, Constants.GetObjectDataNullErrorMessageNode); string actualError = null; StreamingContext context = new StreamingContext(StreamingContextStates.All); // Create two sequences ISequence aInput = new Sequence(alphabet, origSequence1); ISequence bInput = new Sequence(alphabet, origSequence2); // Add the sequences to the Sequence alignment object using AddSequence() method. IList <IPairwiseSequenceAlignment> sequenceAlignmentObj = new List <IPairwiseSequenceAlignment>(); PairwiseAlignedSequence alignSeq = new PairwiseAlignedSequence(); alignSeq.FirstSequence = aInput; alignSeq.SecondSequence = bInput; PairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(aInput, bInput);; seqAlignObj.Add(alignSeq); sequenceAlignmentObj.Add(seqAlignObj); // Set SequenceAlignment IsReadOnly prpoerty to true. seqAlignObj.IsReadOnly = true; IList <PairwiseAlignedSequence> newAlignedSequences = sequenceAlignmentObj[0].PairwiseAlignedSequences; switch (methodName) { case SeqAlignmentMethods.Add: try { seqAlignObj.Add(newAlignedSequences[0]); } catch (NotSupportedException ex) { actualError = ex.Message; } // Validate Error message Assert.AreEqual(readOnlyException, actualError); break; case SeqAlignmentMethods.Clear: try { seqAlignObj.Clear(); } catch (NotSupportedException ex) { actualError = ex.Message; } // Validate Error message Assert.AreEqual(readOnlyException, actualError); break; case SeqAlignmentMethods.Remove: try { seqAlignObj.Remove(newAlignedSequences[0]); } catch (NotSupportedException ex) { actualError = ex.Message; } // Validate Error message Assert.AreEqual(readOnlyException, actualError); break; case SeqAlignmentMethods.AddSequence: try { seqAlignObj.AddSequence(newAlignedSequences[0]); } catch (NotSupportedException ex) { actualError = ex.Message; } // Validate Error message Assert.AreEqual(readOnlyException, actualError); break; case SeqAlignmentMethods.GetObjectData: try { seqAlignObj.GetObjectData(null, context); } catch (ArgumentNullException ex) { actualError = ex.Message; } // Validate Error message Assert.AreEqual(expectedGetObjectDataException, actualError.Replace("\r", "").Replace("\n", "")); break; default: break; } ApplicationLog.WriteLine("SequenceAlignment P2 : Successfully validated the IsRead Property"); Console.WriteLine("SequenceAlignment P2 : Successfully validated the IsRead Property"); }
/// <summary> /// Validates Sequence Alignment test cases for the parameters passed. /// </summary> /// <param name="nodeName">Node Name in the xml.</param> /// <param name="offset">Offset</param> /// <param name="caseType">Case type</param> /// <param name="type">Sequence type</param> static void ValidateGeneralSequenceAlignment(string nodeName, OffsetValidation offset, SequenceCaseType caseType, SequenceType type) { // Read the xml file for getting both the files for aligning. string firstInputSequence = string.Empty; string secondInputSequence = string.Empty; IAlphabet alphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); GetInputSequencesWithSequenceType(nodeName, type, out firstInputSequence, out secondInputSequence); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment P2 : First sequence used is '{0}'.", firstInputSequence)); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment P2 : Second sequence used is '{0}'.", secondInputSequence)); Console.WriteLine(string.Format(null, "SequenceAlignment P2 : First sequence used is '{0}'.", firstInputSequence)); Console.WriteLine(string.Format(null, "SequenceAlignment P2 : Second sequence used is '{0}'.", secondInputSequence)); // Create two sequences ISequence aInput = null; ISequence bInput = null; Exception actualException = null; switch (caseType) { case SequenceCaseType.LowerCase: aInput = new Sequence(alphabet, firstInputSequence.ToLower(CultureInfo.CurrentCulture)); bInput = new Sequence(alphabet, secondInputSequence.ToLower(CultureInfo.CurrentCulture)); break; case SequenceCaseType.UpperCase: aInput = new Sequence(alphabet, firstInputSequence.ToUpper(CultureInfo.CurrentCulture)); bInput = new Sequence(alphabet, secondInputSequence.ToUpper(CultureInfo.CurrentCulture)); break; case SequenceCaseType.Default: try { aInput = new Sequence(alphabet, firstInputSequence); bInput = new Sequence(alphabet, secondInputSequence); } catch (Exception ex) { actualException = ex; } break; } // Add the sequences to the Sequence alignment object using AddSequence() method. IList <IPairwiseSequenceAlignment> sequenceAlignmentObj = new List <IPairwiseSequenceAlignment>(); try { PairwiseAlignedSequence alignSeq = new PairwiseAlignedSequence(); alignSeq.FirstSequence = aInput; IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(); seqAlignObj.Add(alignSeq); sequenceAlignmentObj.Add(seqAlignObj); } catch (Exception ex) { actualException = ex; } if (actualException == null) { if (offset == OffsetValidation.Default) { sequenceAlignmentObj[0].PairwiseAlignedSequences[0].SecondSequence = bInput; // Read the output back and validate the same. IList <PairwiseAlignedSequence> newAlignedSequences = sequenceAlignmentObj[0].PairwiseAlignedSequences; ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment P2 : First sequence read is '{0}'.", firstInputSequence)); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment P2 : Second sequence read is '{0}'.", secondInputSequence)); Console.WriteLine(string.Format(null, "SequenceAlignment P2 : First sequence read is '{0}'.", firstInputSequence)); Console.WriteLine(string.Format(null, "SequenceAlignment P2 : Second sequence read is '{0}'.", secondInputSequence)); Assert.AreEqual(newAlignedSequences[0].FirstSequence.ToString(), firstInputSequence); Assert.AreEqual(newAlignedSequences[0].SecondSequence.ToString(), secondInputSequence); } } else { // Validate that expected exception is thrown using error message. string expectedErrorMessage = Utility._xmlUtil.GetTextValue(nodeName, Constants.ExpectedErrorMessage); Assert.AreEqual(expectedErrorMessage, actualException.Message); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment P2 : Expected Error message is thrown ", expectedErrorMessage)); Console.WriteLine(string.Format(null, "SequenceAlignment P2 : Expected Error message is thrown ", expectedErrorMessage)); } }
private static List <IndelLocation> FindIndels(PairwiseAlignedSequence aln) { //Go through and find indels var haplo = aln.SecondSequence; var refer = aln.FirstSequence; var toReturn = new List <IndelLocation>(); for (int i = 0; i < haplo.Count; i++) { if (haplo[i] == '-') { int length = 1; int start = i; do { if (haplo[++i] == '-') { length++; } else { break; } } while (i < haplo.Count); toReturn.Add(new IndelLocation(false, start, length)); } if (refer[i] == '-') { int length = 1; int start = i; do { if (refer[++i] == '-') { length++; } else { break; } } while (i < haplo.Count); toReturn.Add(new IndelLocation(true, start, length)); } } //left align them var lAlign = LeftAlignIndels(aln, toReturn); //now reposition them and get the missing sequence //be sure to account for any offsets introduced by past indels var refGapsSeen = 0; foreach (var laln in lAlign) { var relSeq = laln.DeletionOnReference ? haplo : refer; laln.InsertedSequence = new String(relSeq.Skip(laln.Start).Take(laln.Length).Select(x => (char)x).ToArray()); laln.Start = laln.Start + (int)aln.SecondOffset - refGapsSeen; if (laln.DeletionOnReference) { refGapsSeen += laln.Length; } } return(lAlign); }
public void TestMUMmer3MultipleMumWithCustomMatrix() { string reference = "ATGCGCATCCCCTT"; string search = "GCGCCCCCTA"; Sequence referenceSeq = null; Sequence searchSeq = null; referenceSeq = new Sequence(Alphabets.DNA, reference); searchSeq = new Sequence(Alphabets.DNA, search); List <ISequence> searchSeqs = new List <ISequence>(); searchSeqs.Add(searchSeq); int[,] customMatrix = new int[256, 256]; customMatrix[(byte)'A', (byte)'A'] = 3; customMatrix[(byte)'A', (byte)'T'] = -2; customMatrix[(byte)'A', (byte)'G'] = -2; customMatrix[(byte)'A', (byte)'c'] = -2; customMatrix[(byte)'G', (byte)'G'] = 3; customMatrix[(byte)'G', (byte)'A'] = -2; customMatrix[(byte)'G', (byte)'T'] = -2; customMatrix[(byte)'G', (byte)'C'] = -2; customMatrix[(byte)'T', (byte)'T'] = 3; customMatrix[(byte)'T', (byte)'A'] = -2; customMatrix[(byte)'T', (byte)'G'] = -2; customMatrix[(byte)'T', (byte)'C'] = -2; customMatrix[(byte)'C', (byte)'C'] = 3; customMatrix[(byte)'C', (byte)'T'] = -2; customMatrix[(byte)'C', (byte)'A'] = -2; customMatrix[(byte)'C', (byte)'G'] = -2; DiagonalSimilarityMatrix matrix = new DiagonalSimilarityMatrix(3, -2); int gapOpenCost = -6; MUMmerAligner mummer = new MUMmerAligner(); mummer.LengthOfMUM = 4; mummer.PairWiseAlgorithm = new NeedlemanWunschAligner(); mummer.SimilarityMatrix = matrix; mummer.GapOpenCost = gapOpenCost; mummer.GapExtensionCost = -2; IList <IPairwiseSequenceAlignment> result = mummer.AlignSimple(referenceSeq, searchSeqs); // Check if output is not null Assert.AreNotEqual(null, result); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "ATGCGCATCCCCTT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "--GCGC--CCCCTA"); alignedSeq.Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "ATGCGCATCCCCTW"); alignedSeq.Score = 1; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 2; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// Get all the gaps in each sequence and call pairwise alignment. /// </summary> /// <param name="referenceSequence">Reference sequence.</param> /// <param name="sequence">Query sequence.</param> /// <param name="mums">List of MUMs.</param> /// <returns>Aligned sequences.</returns> private PairwiseAlignedSequence ProcessGaps( ISequence referenceSequence, ISequence sequence, IList <Match> mums) { List <byte> sequenceResult1 = new List <byte>(); List <byte> sequenceResult2 = new List <byte>(); List <byte> consensusResult = new List <byte>(); PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence(); Match mum1; Match mum2; // Run the alignment for gap before first MUM List <long> insertions = new List <long>(2); insertions.Add(0); insertions.Add(0); List <long> gapInsertions; mum1 = mums.First(); alignedSequence.Score += this.AlignGap( referenceSequence, sequence, sequenceResult1, sequenceResult2, consensusResult, new Match() { Length = 0 }, // Here the first MUM does not exist mum1, out gapInsertions); insertions[0] += gapInsertions[0]; insertions[1] += gapInsertions[1]; // Run the alignment for all the gaps between MUM for (int index = 1; index < mums.Count; index++) { mum2 = mums[index]; alignedSequence.Score += this.AlignGap( referenceSequence, sequence, sequenceResult1, sequenceResult2, consensusResult, mum1, mum2, out gapInsertions); insertions[0] += gapInsertions[0]; insertions[1] += gapInsertions[1]; mum1 = mum2; } // Run the alignment for gap after last MUM alignedSequence.Score += this.AlignGap( referenceSequence, sequence, sequenceResult1, sequenceResult2, consensusResult, mum1, new Match() { Length = 0 }, out gapInsertions); insertions[0] += gapInsertions[0]; insertions[1] += gapInsertions[1]; byte[] result1 = sequenceResult1.ToArray(); IAlphabet alphabet = Alphabets.AutoDetectAlphabet(result1, 0, result1.LongLength, referenceSequence.Alphabet); alignedSequence.FirstSequence = new Sequence( alphabet, result1) { ID = referenceSequence.ID, Metadata = referenceSequence.Metadata }; byte[] result2 = sequenceResult2.ToArray(); alphabet = Alphabets.AutoDetectAlphabet(result2, 0, result2.LongLength, sequence.Alphabet); alignedSequence.SecondSequence = new Sequence( alphabet, result2) { ID = sequence.ID, Metadata = sequence.Metadata }; byte[] consensus = consensusResult.ToArray(); alphabet = Alphabets.AutoDetectAlphabet(consensus, 0, consensus.LongLength, referenceSequence.Alphabet); alignedSequence.Consensus = new Sequence( alphabet, consensus); // Offset is not required as Smith Waterman will fragmented alignment. // Offset is the starting position of alignment of sequence1 with respect to sequence2. if (this.PairWiseAlgorithm is NeedlemanWunschAligner) { alignedSequence.FirstOffset = alignedSequence.FirstSequence.IndexOfNonGap() - referenceSequence.IndexOfNonGap(); alignedSequence.SecondOffset = alignedSequence.SecondSequence.IndexOfNonGap() - sequence.IndexOfNonGap(); } List <long> startOffsets = new List <long>(2); List <long> endOffsets = new List <long>(2); startOffsets.Add(0); startOffsets.Add(0); endOffsets.Add(referenceSequence.Count - 1); endOffsets.Add(sequence.Count - 1); alignedSequence.Metadata["StartOffsets"] = startOffsets; alignedSequence.Metadata["EndOffsets"] = endOffsets; alignedSequence.Metadata["Insertions"] = insertions; // return the aligned sequence return(alignedSequence); }
/// <summary> /// Given two byte arrays representing a pairwise alignment, shift them so /// that all deletions start as early as possible. For example: /// /// <code> /// TTTTAAAATTTT -> Converts to -> TTTTAAAATTTT /// TTTTAA--TTTT TTTT--AATTTT /// </code> /// /// This function takes a IPairwiseSequenceAlignment and assumes that the first sequence is the reference and second /// sequence is the query. It returns a new Pairwise sequence alignment with all of the indels left aligned as well as a list of variants. /// </summary> /// <param name="aln">Aln. The second sequence should be of type QualitativeSequence or Sequence</param> /// <param name="callVariants">callVariants. If true, it will call variants, otherwise the second half of tuple will be null. </param> public static Tuple <IPairwiseSequenceAlignment, List <Variant> > LeftAlignIndelsAndCallVariants(IPairwiseSequenceAlignment aln, bool callVariants = true) { if (aln == null) { throw new NullReferenceException("aln"); } if (aln.PairwiseAlignedSequences == null || aln.PairwiseAlignedSequences.Count != 1) { throw new ArgumentException("The pairwise aligned sequence should only have one alignment"); } var frstAln = aln.PairwiseAlignedSequences.First(); var seq1 = frstAln.FirstSequence; var seq2 = frstAln.SecondSequence; if (seq1 == null) { throw new NullReferenceException("seq1"); } else if (seq2 == null) { throw new NullReferenceException("seq2"); } //TODO: Might implement an ambiguity check later. #if FALSE if (seq1.Alphabet.HasAmbiguity || seq2.Alphabet.HasAmbiguity) { throw new ArgumentException("Cannot left align sequences with ambiguous symbols."); } #endif // Note we have to copy unless we can guarantee the array will not be mutated. byte[] refseq = seq1.ToArray(); ISequence newQuery; List <Variant> variants = null; // Call variants for a qualitative sequence if (seq2 is QualitativeSequence) { var qs = seq2 as QualitativeSequence; var query = Enumerable.Zip(qs, qs.GetQualityScores(), (bp, qv) => new BPandQV(bp, (byte)qv, false)).ToArray(); AlignmentUtils.LeftAlignIndels(refseq, query); AlignmentUtils.VerifyNoGapsOnEnds(refseq, query); if (callVariants) { variants = VariantCaller.CallVariants(refseq, query, seq2.IsMarkedAsReverseComplement()); } var newQueryQS = new QualitativeSequence(qs.Alphabet, qs.FormatType, query.Select(z => z.BP).ToArray(), query.Select(p => p.QV).ToArray(), false); newQueryQS.Metadata = seq2.Metadata; newQuery = newQueryQS; } else if (seq2 is Sequence) // For a sequence with no QV values. { var qs = seq2 as Sequence; var query = qs.Select(v => new BPandQV(v, 0, false)).ToArray(); AlignmentUtils.LeftAlignIndels(refseq, query); AlignmentUtils.VerifyNoGapsOnEnds(refseq, query); // ISequence does not have a setable metadata var newQueryS = new Sequence(qs.Alphabet, query.Select(z => z.BP).ToArray(), false); newQueryS.Metadata = seq2.Metadata; if (callVariants) { variants = VariantCaller.CallVariants(refseq, query, seq2.IsMarkedAsReverseComplement()); } newQuery = newQueryS; } else { throw new ArgumentException("Can only left align indels if the query sequence is of type Sequence or QualitativeSequence."); } if (aln.FirstSequence != null && aln.FirstSequence.ID != null) { foreach (var v in variants) { v.RefName = aln.FirstSequence.ID; } } var newRef = new Sequence(seq1.Alphabet, refseq, false); newRef.ID = seq1.ID; newRef.Metadata = seq1.Metadata; newQuery.ID = seq2.ID; var newaln = new PairwiseSequenceAlignment(aln.FirstSequence, aln.SecondSequence); var pas = new PairwiseAlignedSequence(); pas.FirstSequence = newRef; pas.SecondSequence = newQuery; newaln.Add(pas); return(new Tuple <IPairwiseSequenceAlignment, List <Variant> > (newaln, variants)); }