public static void TestExceptionThrownForUnclippedAlignment() { var refseq = "ACAATATA"; var queryseq = "ACAATAT-"; var r = new Sequence(DnaAlphabet.Instance, refseq); var q = new Sequence(DnaAlphabet.Instance, queryseq); var aln = new PairwiseSequenceAlignment(r, q); var pas = new PairwiseAlignedSequence(); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add(pas); Assert.Throws <FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants(aln, true)); refseq = "AAACAATATA"; queryseq = "AA-CAATATA"; r = new Sequence(DnaAlphabet.Instance, refseq); q = new Sequence(DnaAlphabet.Instance, queryseq); aln = new PairwiseSequenceAlignment(r, q); pas = new PairwiseAlignedSequence(); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add(pas); Assert.Throws <FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants(aln, true)); }
public static void TestLeftAlignmentStep() { var refseq = "ACAATAAAAGCGCGCGCGCGTTACGTATAT--ATGGATAT"; var queryseq = "ACAATAA-AGC--GCGC--GTTACGTATATATATGGATAT"; var r = new Sequence(DnaAlphabet.Instance, refseq); var q = new Sequence(DnaAlphabet.Instance, queryseq); var aln = new PairwiseSequenceAlignment(r, q); var pas = new PairwiseAlignedSequence(); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add(pas); var tpl = VariantCaller.LeftAlignIndelsAndCallVariants(aln, true); // Check the left alignment aln = tpl.Item1 as PairwiseSequenceAlignment; var lar = aln.PairwiseAlignedSequences [0].FirstSequence.ConvertToString(); var laq = aln.PairwiseAlignedSequences [0].SecondSequence.ConvertToString(); var exprefseq = "ACAATAAAAGCGCGCGCGCGTTACG--TATATATGGATAT"; var expqueryseq = "ACAAT-AAA----GCGCGCGTTACGTATATATATGGATAT"; Assert.AreEqual(exprefseq, lar); Assert.AreEqual(expqueryseq, laq); // And it's hard, so we might as well check the variants var variants = tpl.Item2; Assert.AreEqual(3, variants.Count); string[] bases = new string[] { "A", "GCGC", "TA" }; char[] hpbases = new char[] { 'A', 'G', 'T' }; bool[] inHp = new bool[] { true, false, false }; int[] lengths = new int[] { 1, 4, 2 }; int[] starts = new int[] { 4, 8, 24 }; IndelType[] types = new IndelType[] { IndelType.Deletion, IndelType.Deletion, IndelType.Insertion }; for (int i = 0; i < 3; i++) { Assert.AreEqual(VariantType.INDEL, variants [i].Type); var vi = variants [i] as IndelVariant; Assert.AreEqual(hpbases[i], vi.HomopolymerBase); Assert.AreEqual(starts [i], vi.StartPosition); Assert.AreEqual(lengths [i], vi.Length); Assert.AreEqual(bases [i], vi.InsertedOrDeletedBases); Assert.AreEqual(inHp [i], vi.InHomopolymer); Assert.AreEqual(types [i], vi.InsertionOrDeletion); } }
public void SequenceAlignmentAddSequence() { // Read the xml file for getting both the files for aligning. string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignAlgorithmNodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignAlgorithmNodeName, Constants.SequenceNode2); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AlignAlgorithmNodeName, Constants.AlphabetNameNode)); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : First sequence used is '{0}'.", origSequence1)); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : Second sequence used is '{0}'.", origSequence2)); // Create two sequences ISequence aInput = new Sequence(alphabet, origSequence1); ISequence bInput = new Sequence(alphabet, origSequence2); // Add the sequences to the Sequence alignment object using AddSequence() method. IList <IPairwiseSequenceAlignment> sequenceAlignmentObj = new List <IPairwiseSequenceAlignment>(); var alignSeq = new PairwiseAlignedSequence { FirstSequence = aInput, SecondSequence = bInput }; IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(); seqAlignObj.Add(alignSeq); sequenceAlignmentObj.Add(seqAlignObj); // Read the output back and validate the same. IList <PairwiseAlignedSequence> newAlignedSequences = sequenceAlignmentObj[0].PairwiseAlignedSequences; ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : First sequence read is '{0}'.", origSequence1)); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : Second sequence read is '{0}'.", origSequence2)); Assert.AreEqual(newAlignedSequences[0].FirstSequence.ConvertToString(), origSequence1); Assert.AreEqual(newAlignedSequences[0].SecondSequence.ConvertToString(), origSequence2); }
/// <summary> /// Validate Sequence Alignment Class General methods exception. /// </summary> /// <param name="nodeName">Node Name in the xml.</param> /// <param name="methodName">Name of the SequenceAlignment method to be validated</param> /// </summary> static void InValidateSequenceAlignmentGeneralMethods(string nodeName, SeqAlignmentMethods methodName) { // Read the xml file for getting both the files for aligning. string origSequence1 = Utility._xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string origSequence2 = Utility._xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); IAlphabet alphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); string readOnlyException = Utility._xmlUtil.GetTextValue(nodeName, Constants.ReadOnlyExceptionNode); string expectedGetObjectDataException = Utility._xmlUtil.GetTextValue(nodeName, Constants.GetObjectDataNullErrorMessageNode); string actualError = null; StreamingContext context = new StreamingContext(StreamingContextStates.All); // Create two sequences ISequence aInput = new Sequence(alphabet, origSequence1); ISequence bInput = new Sequence(alphabet, origSequence2); // Add the sequences to the Sequence alignment object using AddSequence() method. IList <IPairwiseSequenceAlignment> sequenceAlignmentObj = new List <IPairwiseSequenceAlignment>(); PairwiseAlignedSequence alignSeq = new PairwiseAlignedSequence(); alignSeq.FirstSequence = aInput; alignSeq.SecondSequence = bInput; PairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(aInput, bInput);; seqAlignObj.Add(alignSeq); sequenceAlignmentObj.Add(seqAlignObj); // Set SequenceAlignment IsReadOnly prpoerty to true. seqAlignObj.IsReadOnly = true; IList <PairwiseAlignedSequence> newAlignedSequences = sequenceAlignmentObj[0].PairwiseAlignedSequences; switch (methodName) { case SeqAlignmentMethods.Add: try { seqAlignObj.Add(newAlignedSequences[0]); } catch (NotSupportedException ex) { actualError = ex.Message; } // Validate Error message Assert.AreEqual(readOnlyException, actualError); break; case SeqAlignmentMethods.Clear: try { seqAlignObj.Clear(); } catch (NotSupportedException ex) { actualError = ex.Message; } // Validate Error message Assert.AreEqual(readOnlyException, actualError); break; case SeqAlignmentMethods.Remove: try { seqAlignObj.Remove(newAlignedSequences[0]); } catch (NotSupportedException ex) { actualError = ex.Message; } // Validate Error message Assert.AreEqual(readOnlyException, actualError); break; case SeqAlignmentMethods.AddSequence: try { seqAlignObj.AddSequence(newAlignedSequences[0]); } catch (NotSupportedException ex) { actualError = ex.Message; } // Validate Error message Assert.AreEqual(readOnlyException, actualError); break; case SeqAlignmentMethods.GetObjectData: try { seqAlignObj.GetObjectData(null, context); } catch (ArgumentNullException ex) { actualError = ex.Message; } // Validate Error message Assert.AreEqual(expectedGetObjectDataException, actualError.Replace("\r", "").Replace("\n", "")); break; default: break; } ApplicationLog.WriteLine("SequenceAlignment P2 : Successfully validated the IsRead Property"); Console.WriteLine("SequenceAlignment P2 : Successfully validated the IsRead Property"); }
/// <summary> /// Validates Sequence Alignment test cases for the parameters passed. /// </summary> /// <param name="nodeName">Node Name in the xml.</param> /// <param name="offset">Offset</param> /// <param name="caseType">Case type</param> /// <param name="type">Sequence type</param> static void ValidateGeneralSequenceAlignment(string nodeName, OffsetValidation offset, SequenceCaseType caseType, SequenceType type) { // Read the xml file for getting both the files for aligning. string firstInputSequence = string.Empty; string secondInputSequence = string.Empty; IAlphabet alphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); GetInputSequencesWithSequenceType(nodeName, type, out firstInputSequence, out secondInputSequence); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment P2 : First sequence used is '{0}'.", firstInputSequence)); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment P2 : Second sequence used is '{0}'.", secondInputSequence)); Console.WriteLine(string.Format(null, "SequenceAlignment P2 : First sequence used is '{0}'.", firstInputSequence)); Console.WriteLine(string.Format(null, "SequenceAlignment P2 : Second sequence used is '{0}'.", secondInputSequence)); // Create two sequences ISequence aInput = null; ISequence bInput = null; Exception actualException = null; switch (caseType) { case SequenceCaseType.LowerCase: aInput = new Sequence(alphabet, firstInputSequence.ToLower(CultureInfo.CurrentCulture)); bInput = new Sequence(alphabet, secondInputSequence.ToLower(CultureInfo.CurrentCulture)); break; case SequenceCaseType.UpperCase: aInput = new Sequence(alphabet, firstInputSequence.ToUpper(CultureInfo.CurrentCulture)); bInput = new Sequence(alphabet, secondInputSequence.ToUpper(CultureInfo.CurrentCulture)); break; case SequenceCaseType.Default: try { aInput = new Sequence(alphabet, firstInputSequence); bInput = new Sequence(alphabet, secondInputSequence); } catch (Exception ex) { actualException = ex; } break; } // Add the sequences to the Sequence alignment object using AddSequence() method. IList <IPairwiseSequenceAlignment> sequenceAlignmentObj = new List <IPairwiseSequenceAlignment>(); try { PairwiseAlignedSequence alignSeq = new PairwiseAlignedSequence(); alignSeq.FirstSequence = aInput; IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(); seqAlignObj.Add(alignSeq); sequenceAlignmentObj.Add(seqAlignObj); } catch (Exception ex) { actualException = ex; } if (actualException == null) { if (offset == OffsetValidation.Default) { sequenceAlignmentObj[0].PairwiseAlignedSequences[0].SecondSequence = bInput; // Read the output back and validate the same. IList <PairwiseAlignedSequence> newAlignedSequences = sequenceAlignmentObj[0].PairwiseAlignedSequences; ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment P2 : First sequence read is '{0}'.", firstInputSequence)); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment P2 : Second sequence read is '{0}'.", secondInputSequence)); Console.WriteLine(string.Format(null, "SequenceAlignment P2 : First sequence read is '{0}'.", firstInputSequence)); Console.WriteLine(string.Format(null, "SequenceAlignment P2 : Second sequence read is '{0}'.", secondInputSequence)); Assert.AreEqual(newAlignedSequences[0].FirstSequence.ToString(), firstInputSequence); Assert.AreEqual(newAlignedSequences[0].SecondSequence.ToString(), secondInputSequence); } } else { // Validate that expected exception is thrown using error message. string expectedErrorMessage = Utility._xmlUtil.GetTextValue(nodeName, Constants.ExpectedErrorMessage); Assert.AreEqual(expectedErrorMessage, actualException.Message); ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment P2 : Expected Error message is thrown ", expectedErrorMessage)); Console.WriteLine(string.Format(null, "SequenceAlignment P2 : Expected Error message is thrown ", expectedErrorMessage)); } }
/// <summary> /// Given two byte arrays representing a pairwise alignment, shift them so /// that all deletions start as early as possible. For example: /// /// <code> /// TTTTAAAATTTT -> Converts to -> TTTTAAAATTTT /// TTTTAA--TTTT TTTT--AATTTT /// </code> /// /// This function takes a IPairwiseSequenceAlignment and assumes that the first sequence is the reference and second /// sequence is the query. It returns a new Pairwise sequence alignment with all of the indels left aligned as well as a list of variants. /// </summary> /// <param name="aln">Aln. The second sequence should be of type QualitativeSequence or Sequence</param> /// <param name="callVariants">callVariants. If true, it will call variants, otherwise the second half of tuple will be null. </param> public static Tuple <IPairwiseSequenceAlignment, List <Variant> > LeftAlignIndelsAndCallVariants(IPairwiseSequenceAlignment aln, bool callVariants = true) { if (aln == null) { throw new NullReferenceException("aln"); } if (aln.PairwiseAlignedSequences == null || aln.PairwiseAlignedSequences.Count != 1) { throw new ArgumentException("The pairwise aligned sequence should only have one alignment"); } var frstAln = aln.PairwiseAlignedSequences.First(); var seq1 = frstAln.FirstSequence; var seq2 = frstAln.SecondSequence; if (seq1 == null) { throw new NullReferenceException("seq1"); } else if (seq2 == null) { throw new NullReferenceException("seq2"); } //TODO: Might implement an ambiguity check later. #if FALSE if (seq1.Alphabet.HasAmbiguity || seq2.Alphabet.HasAmbiguity) { throw new ArgumentException("Cannot left align sequences with ambiguous symbols."); } #endif // Note we have to copy unless we can guarantee the array will not be mutated. byte[] refseq = seq1.ToArray(); ISequence newQuery; List <Variant> variants = null; // Call variants for a qualitative sequence if (seq2 is QualitativeSequence) { var qs = seq2 as QualitativeSequence; var query = Enumerable.Zip(qs, qs.GetQualityScores(), (bp, qv) => new BPandQV(bp, (byte)qv, false)).ToArray(); AlignmentUtils.LeftAlignIndels(refseq, query); AlignmentUtils.VerifyNoGapsOnEnds(refseq, query); if (callVariants) { variants = VariantCaller.CallVariants(refseq, query, seq2.IsMarkedAsReverseComplement()); } var newQueryQS = new QualitativeSequence(qs.Alphabet, qs.FormatType, query.Select(z => z.BP).ToArray(), query.Select(p => p.QV).ToArray(), false); newQueryQS.Metadata = seq2.Metadata; newQuery = newQueryQS; } else if (seq2 is Sequence) // For a sequence with no QV values. { var qs = seq2 as Sequence; var query = qs.Select(v => new BPandQV(v, 0, false)).ToArray(); AlignmentUtils.LeftAlignIndels(refseq, query); AlignmentUtils.VerifyNoGapsOnEnds(refseq, query); // ISequence does not have a setable metadata var newQueryS = new Sequence(qs.Alphabet, query.Select(z => z.BP).ToArray(), false); newQueryS.Metadata = seq2.Metadata; if (callVariants) { variants = VariantCaller.CallVariants(refseq, query, seq2.IsMarkedAsReverseComplement()); } newQuery = newQueryS; } else { throw new ArgumentException("Can only left align indels if the query sequence is of type Sequence or QualitativeSequence."); } if (aln.FirstSequence != null && aln.FirstSequence.ID != null) { foreach (var v in variants) { v.RefName = aln.FirstSequence.ID; } } var newRef = new Sequence(seq1.Alphabet, refseq, false); newRef.ID = seq1.ID; newRef.Metadata = seq1.Metadata; newQuery.ID = seq2.ID; var newaln = new PairwiseSequenceAlignment(aln.FirstSequence, aln.SecondSequence); var pas = new PairwiseAlignedSequence(); pas.FirstSequence = newRef; pas.SecondSequence = newQuery; newaln.Add(pas); return(new Tuple <IPairwiseSequenceAlignment, List <Variant> > (newaln, variants)); }