예제 #1
0
        /// <summary>
        /// Given two byte arrays representing a pairwise alignment, shift them so 
        /// that all deletions start as early as possible.  For example:
        /// 
        /// <code>
        /// TTTTAAAATTTT  -> Converts to ->  TTTTAAAATTTT
        /// TTTTAA--TTTT                     TTTT--AATTTT
        /// </code>
        /// 
        /// This function takes a IPairwiseSequenceAlignment and assumes that the first sequence is the reference and second
        /// sequence is the query.  It returns a new Pairwise sequence alignment with all of the indels left aligned as well as a list of variants.
        /// </summary>
        /// <param name="aln">Aln. The second sequence should be of type QualitativeSequence or Sequence</param>
        /// <param name="callVariants">callVariants.  If true, it will call variants, otherwise the second half of tuple will be null. </param>
        public static Tuple<IPairwiseSequenceAlignment, List<Variant>> LeftAlignIndelsAndCallVariants(IPairwiseSequenceAlignment aln, bool callVariants = true) {

            if (aln == null) {
                throw new NullReferenceException ("aln");
            }
            if (aln.PairwiseAlignedSequences == null || aln.PairwiseAlignedSequences.Count != 1) {
                throw new ArgumentException ("The pairwise aligned sequence should only have one alignment");
            }
            var frstAln = aln.PairwiseAlignedSequences.First ();
            var seq1 = frstAln.FirstSequence;
            var seq2 = frstAln.SecondSequence;
            if (seq1 == null) {
                throw new NullReferenceException ("seq1");
            } else if (seq2 == null) {
                throw new NullReferenceException ("seq2");
            }

            //TODO: Might implement an ambiguity check later.
            #if FALSE
            if (seq1.Alphabet.HasAmbiguity || seq2.Alphabet.HasAmbiguity) {
                throw new ArgumentException ("Cannot left align sequences with ambiguous symbols.");
            }
            #endif

            // Note we have to copy unless we can guarantee the array will not be mutated.
            byte[] refseq = seq1.ToArray ();
            ISequence newQuery;
            List<Variant> variants = null;
            // Call variants for a qualitative sequence
            if (seq2 is QualitativeSequence) {
                var qs = seq2 as QualitativeSequence;
                var query = Enumerable.Zip (qs, qs.GetQualityScores (), (bp, qv) => new BPandQV (bp, (byte)qv, false)).ToArray ();
                AlignmentUtils.LeftAlignIndels (refseq, query);
                AlignmentUtils.VerifyNoGapsOnEnds (refseq, query);
                if (callVariants) {
                    variants = VariantCaller.CallVariants (refseq, query, seq2.IsMarkedAsReverseComplement());
                }
                var newQueryQS = new QualitativeSequence (qs.Alphabet, 
                    qs.FormatType,
                    query.Select (z => z.BP).ToArray (),
                    query.Select (p => p.QV).ToArray (),
                    false);
                newQueryQS.Metadata = seq2.Metadata;
                newQuery = newQueryQS;
                
            } else if (seq2 is Sequence) {  // For a sequence with no QV values.
                var qs = seq2 as Sequence;
                var query = qs.Select (v => new BPandQV (v, 0, false)).ToArray();
                AlignmentUtils.LeftAlignIndels (refseq, query);
                AlignmentUtils.VerifyNoGapsOnEnds (refseq, query);
                // ISequence does not have a setable metadata
                var newQueryS = new Sequence(qs.Alphabet, query.Select(z=>z.BP).ToArray(), false);
                newQueryS.Metadata = seq2.Metadata;
                if (callVariants) {
                    variants = VariantCaller.CallVariants (refseq, query, seq2.IsMarkedAsReverseComplement());
                }
                newQuery = newQueryS;
            } else {
                throw new ArgumentException ("Can only left align indels if the query sequence is of type Sequence or QualitativeSequence.");
            }

            if (aln.FirstSequence != null && aln.FirstSequence.ID != null) {
                foreach (var v in variants) {
                    v.RefName = aln.FirstSequence.ID;
                }
            }

            var newRef = new Sequence (seq1.Alphabet, refseq, false);
            newRef.ID = seq1.ID;
            newRef.Metadata = seq1.Metadata;

            newQuery.ID = seq2.ID;

            var newaln = new PairwiseSequenceAlignment (aln.FirstSequence, aln.SecondSequence);
            var pas = new PairwiseAlignedSequence ();
            pas.FirstSequence = newRef;
            pas.SecondSequence = newQuery;
            newaln.Add (pas);
            return new Tuple<IPairwiseSequenceAlignment, List<Variant>> (newaln, variants);
        }
예제 #2
0
        public static void TestExceptionThrownForUnclippedAlignment() {
            var refseq =   "ACAATATA";
            var queryseq = "ACAATAT-";

            var r = new Sequence (DnaAlphabet.Instance, refseq);
            var q = new Sequence (DnaAlphabet.Instance, queryseq);
            var aln = new PairwiseSequenceAlignment (r, q);
            var pas = new PairwiseAlignedSequence ();
            pas.FirstSequence = r;
            pas.SecondSequence = q;
            aln.Add (pas);
            Assert.Throws<FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants (aln, true));

            refseq =   "AAACAATATA";
            queryseq = "AA-CAATATA";

            r = new Sequence (DnaAlphabet.Instance, refseq);
            q = new Sequence (DnaAlphabet.Instance, queryseq);
            aln = new PairwiseSequenceAlignment (r, q);
            pas = new PairwiseAlignedSequence ();
            pas.FirstSequence = r;
            pas.SecondSequence = q;
            aln.Add (pas);
            Assert.Throws<FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants (aln, true));
        }
예제 #3
0
        public static void TestLeftAlignmentStep() {
            var refseq =   "ACAATAAAAGCGCGCGCGCGTTACGTATAT--ATGGATAT";
            var queryseq = "ACAATAA-AGC--GCGC--GTTACGTATATATATGGATAT";

            var r = new Sequence (DnaAlphabet.Instance, refseq);
            var q = new Sequence (DnaAlphabet.Instance, queryseq);
            var aln = new PairwiseSequenceAlignment (r, q);
            var pas = new PairwiseAlignedSequence ();
            pas.FirstSequence = r;
            pas.SecondSequence = q;
            aln.Add (pas);
            var tpl = VariantCaller.LeftAlignIndelsAndCallVariants (aln, true);

            // Check the left alignment
            aln = tpl.Item1 as PairwiseSequenceAlignment;
            var lar = aln.PairwiseAlignedSequences [0].FirstSequence.ConvertToString();
            var laq = aln.PairwiseAlignedSequences [0].SecondSequence.ConvertToString();
            var exprefseq =   "ACAATAAAAGCGCGCGCGCGTTACG--TATATATGGATAT";
            var expqueryseq = "ACAAT-AAA----GCGCGCGTTACGTATATATATGGATAT";
            Assert.AreEqual (exprefseq, lar);
            Assert.AreEqual (expqueryseq, laq);

            // And it's hard, so we might as well check the variants
            var variants = tpl.Item2;
            Assert.AreEqual (3, variants.Count);
            string[] bases = new string[] { "A", "GCGC", "TA" };
            char[] hpbases = new char[] { 'A', 'G', 'T' };
            bool[] inHp = new bool[] { true, false, false };
            int[] lengths = new int[] { 1, 4, 2 };
            int[] starts = new int[] { 4, 8, 24 };
            IndelType[] types = new IndelType[] { IndelType.Deletion, IndelType.Deletion, IndelType.Insertion };
            for (int i = 0; i < 3; i++) {
                Assert.AreEqual (VariantType.INDEL, variants [i].Type);
                var vi = variants [i] as IndelVariant;
                Assert.AreEqual (hpbases[i], vi.HomopolymerBase);
                Assert.AreEqual (starts [i], vi.StartPosition);
                Assert.AreEqual (lengths [i], vi.Length);
                Assert.AreEqual (bases [i], vi.InsertedOrDeletedBases);
                Assert.AreEqual (inHp [i], vi.InHomopolymer);
                Assert.AreEqual (types [i], vi.InsertionOrDeletion);

            }
        
        }
예제 #4
0
        private void ValidateGeneralSequenceAlignment(string nodeName, bool validateProperty)
        {
            // Read the xml file for getting both the files for aligning.
            string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
            string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode));

            ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : First sequence used is '{0}'.",
                                                   origSequence1));
            ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : Second sequence used is '{0}'.",
                                                   origSequence2));

            // Create two sequences
            ISequence aInput = new Sequence(alphabet, origSequence1);
            ISequence bInput = new Sequence(alphabet, origSequence2);

            // Add the sequences to the Sequence alignment object using AddSequence() method.
            IList<IPairwiseSequenceAlignment> sequenceAlignmentObj = new List<IPairwiseSequenceAlignment>();

            var alignSeq = new PairwiseAlignedSequence {FirstSequence = aInput, SecondSequence = bInput};
            IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment();
            seqAlignObj.Add(alignSeq);
            sequenceAlignmentObj.Add(seqAlignObj);

            // Read the output back and validate the same.
            IList<PairwiseAlignedSequence> newAlignedSequences =
                sequenceAlignmentObj[0].PairwiseAlignedSequences;

            ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : First sequence read is '{0}'.",
                                                   origSequence1));
            ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : Second sequence read is '{0}'.",
                                                   origSequence2));

            if (validateProperty)
            {
                string score = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode);
                string seqCount = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceCountNode);

                Assert.IsFalse(sequenceAlignmentObj.IsReadOnly);
                Assert.AreEqual(sequenceAlignmentObj.Count.ToString((IFormatProvider) null), seqCount);
                Assert.AreEqual(
                    sequenceAlignmentObj[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider) null), score);
                Assert.AreEqual(sequenceAlignmentObj.Count.ToString((IFormatProvider) null), seqCount);

                ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the IsRead Property");
                ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Count Property");
                ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Sequences Property");
            }
            else
            {
                Assert.AreEqual(new String(newAlignedSequences[0].FirstSequence.Select(a => (char) a).ToArray()),
                                origSequence1);
                Assert.AreEqual(new String(newAlignedSequences[0].SecondSequence.Select(a => (char) a).ToArray()),
                                origSequence2);
            }
        }
예제 #5
0
        public void ValidateSequenceAlignmentProperties()
        {
            // Read the xml file for getting both the files for aligning.
            string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignDnaAlgorithmNodeName,
                                                                   Constants.SequenceNode1);
            string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignDnaAlgorithmNodeName,
                                                                   Constants.SequenceNode2);
            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(
                Constants.AlignDnaAlgorithmNodeName,
                Constants.AlphabetNameNode));
            string seqCount = this.utilityObj.xmlUtil.GetTextValue(
                Constants.AlignDnaAlgorithmNodeName,
                Constants.SequenceCountNode);

            // Create two sequences
            ISequence aInput = new Sequence(alphabet, origSequence1);
            ISequence bInput = new Sequence(alphabet, origSequence2);

            // Add the sequences to the Sequence alignment object using AddSequence() method.
            IList<IPairwiseSequenceAlignment> sequenceAlignmentObj = new List<IPairwiseSequenceAlignment>();

            var alignSeq = new PairwiseAlignedSequence();

            alignSeq.FirstSequence = aInput;
            alignSeq.SecondSequence = bInput;
            IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(aInput, bInput);
            seqAlignObj.Add(alignSeq);
            sequenceAlignmentObj.Add(seqAlignObj);

            // Validate all properties of sequence alignment class. 
            Assert.AreEqual(seqCount, seqAlignObj.Count.ToString((IFormatProvider) null));
            Assert.AreEqual(origSequence1, new string(seqAlignObj.FirstSequence.Select(a => (char) a).ToArray()));
            Assert.AreEqual(origSequence2, new string(seqAlignObj.SecondSequence.Select(a => (char) a).ToArray()));
            Assert.IsFalse(seqAlignObj.IsReadOnly);
            Assert.IsNull(seqAlignObj.Documentation);
            Assert.AreEqual(seqCount, seqAlignObj.PairwiseAlignedSequences.Count.ToString((IFormatProvider) null));

            ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the IsRead Property");
            ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Count Property");
            ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Sequences Property");
        }
예제 #6
0
        public void SequenceAlignmentAddSequence()
        {
            // Read the xml file for getting both the files for aligning.
            string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignAlgorithmNodeName, Constants.SequenceNode1);
            string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignAlgorithmNodeName, Constants.SequenceNode2);

            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AlignAlgorithmNodeName, Constants.AlphabetNameNode));

            ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : First sequence used is '{0}'.", origSequence1));
            ApplicationLog.WriteLine(string.Format(null,"SequenceAlignment BVT : Second sequence used is '{0}'.", origSequence2));

            // Create two sequences
            ISequence aInput = new Sequence(alphabet, origSequence1);
            ISequence bInput = new Sequence(alphabet, origSequence2);

            // Add the sequences to the Sequence alignment object using AddSequence() method.
            IList<IPairwiseSequenceAlignment> sequenceAlignmentObj =
                new List<IPairwiseSequenceAlignment>();

            var alignSeq = new PairwiseAlignedSequence {FirstSequence = aInput, SecondSequence = bInput};
            IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment();
            seqAlignObj.Add(alignSeq);
            sequenceAlignmentObj.Add(seqAlignObj);

            // Read the output back and validate the same.
            IList<PairwiseAlignedSequence> newAlignedSequences = sequenceAlignmentObj[0].PairwiseAlignedSequences;

            ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : First sequence read is '{0}'.", origSequence1));
            ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : Second sequence read is '{0}'.", origSequence2));

            Assert.AreEqual(newAlignedSequences[0].FirstSequence.ConvertToString(), origSequence1);
            Assert.AreEqual(newAlignedSequences[0].SecondSequence.ConvertToString(), origSequence2);
        }